# Fake news Detection

![I-Newspaper2.jpg](attachment:I-Newspaper2.jpg)

### Importing required library
Here I am going to importing some of the required library, if extra library is required to install It will be install later on.

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

### Inserting fake and real dataset

In [None]:
df_fake = pd.read_csv("/content/drive/MyDrive/ML Projects/Fake News Detection/New folder/Fake_news1/Fake.csv")
df_true = pd.read_csv("/content/drive/MyDrive/ML Projects/Fake News Detection/New folder/Fake_news1/True.csv")

In [None]:
df_fake.head(5)

In [None]:
df_true.head(5)

Inserting a column called "class" for fake and real news dataset to categories fake and true news.

In [None]:
df_fake["class"] = 0
df_true["class"] = 1

Removing last 10 rows from both the dataset, for manual testing  

In [None]:
df_fake.shape, df_true.shape

In [None]:
df_fake_manual_testing = df_fake.tail(10)
for i in range(23480,23470,-1):
    df_fake.drop([i], axis = 0, inplace = True)
df_true_manual_testing = df_true.tail(10)
for i in range(21416,21406,-1):
    df_true.drop([i], axis = 0, inplace = True)

In [None]:
df_fake.shape, df_true.shape

Merging the manual testing dataframe in single dataset and save it in a csv file

In [None]:
df_fake_manual_testing["class"] = 0
df_true_manual_testing["class"] = 1

In [None]:
df_fake_manual_testing.head(10)

In [None]:
df_true_manual_testing.head(10)

In [None]:
df_manual_testing = pd.concat([df_fake_manual_testing,df_true_manual_testing], axis = 0)
df_manual_testing.to_csv("manual_testing.csv")

Merging the main fake and true dataframe

In [None]:
df_marge = pd.concat([df_fake, df_true], axis =0 )
df_marge.head(10)

In [None]:
df_marge.columns

#### "title",  "subject" and "date" columns is not required for detecting the fake news, so I am going to drop the columns.

In [None]:
df = df_marge.drop(["title", "subject","date"], axis = 1)

In [None]:
df.isnull().sum()

#### Randomly shuffling the dataframe

In [None]:
df = df.sample(frac = 1)

In [None]:
df.head()

In [None]:
df.reset_index(inplace = True)
df.drop(["index"], axis = 1, inplace = True)

In [None]:
df.columns

In [None]:
df.head()

#### Creating a function to convert the text in lowercase, remove the extra space, special chr., ulr and links.

In [None]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

In [76]:
df["text"] = df["text"].apply(wordopt)

#### Defining dependent and independent variable as x and y

In [77]:
x = df["text"]
y = df["class"]

#### Splitting the dataset into training set and testing set.

In [78]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

#### Convert text to vectors

In [79]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [80]:
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)
#vectorization.fit(df["text"])

### 1. Logistic Regression

In [81]:
from sklearn.linear_model import LogisticRegression

In [82]:
LR = LogisticRegression()
LR.fit(xv_train,y_train)

In [83]:
pred_lr=LR.predict(xv_test)

In [84]:
LR.score(xv_test, y_test)

0.9869875222816399

In [85]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5840
           1       0.99      0.99      0.99      5380

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



### 2. Decision Tree Classification

In [86]:
from sklearn.tree import DecisionTreeClassifier

In [87]:
DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

In [88]:
pred_dt = DT.predict(xv_test)

In [89]:
DT.score(xv_test, y_test)

0.9961675579322639

In [90]:
print(classification_report(y_test, pred_dt))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5840
           1       1.00      1.00      1.00      5380

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



### 3. Gradient Boosting Classifier

In [91]:
from sklearn.ensemble import GradientBoostingClassifier

In [92]:
GBC = GradientBoostingClassifier(random_state=0)
GBC.fit(xv_train, y_train)

####Model Dumping

In [93]:
import joblib
joblib.dump(GBC, r"/content/drive/MyDrive/ML Projects/Pickle files/model.pkl")
joblib.dump(vectorization, r"/content/drive/MyDrive/ML Projects/Pickle files/vectorizer.pkl")

['/content/drive/MyDrive/ML Projects/Pickle files/vectorizer.jb']

In [94]:
pred_gbc = GBC.predict(xv_test)

In [95]:
GBC.score(xv_test, y_test)

0.9957219251336898

In [96]:
print(classification_report(y_test, pred_gbc))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5840
           1       0.99      1.00      1.00      5380

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



### 4. Random Forest Classifier

In [97]:
from sklearn.ensemble import RandomForestClassifier

In [98]:
RFC = RandomForestClassifier(random_state=0)
RFC.fit(xv_train, y_train)

In [99]:
pred_rfc = RFC.predict(xv_test)

In [100]:
RFC.score(xv_test, y_test)

0.9887700534759358

In [101]:
print(classification_report(y_test, pred_rfc))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5840
           1       0.99      0.99      0.99      5380

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



# Model Testing With Manual Entry

### News

In [102]:
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"

def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)

    return print("\n\nLR Prediction: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {}".format(output_lable(pred_LR[0]),
                                                                                                              output_lable(pred_DT[0]),
                                                                                                              output_lable(pred_GBC[0]),
                                                                                                              output_lable(pred_RFC[0])))

In [None]:
  news = str(input())
manual_testing(news)