# **Fake News Detection**
*(Using Machine Learning, Numpy, Pandas, Matplotlib, Seaborn)*

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
true = pd.read_csv('/content/True (3).csv')
fake = pd.read_csv('/content/Fake.csv')

In [4]:
fake.head(3)

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"


In [5]:
true.head(3)

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"


In [6]:
true['label'] = 1

In [7]:
fake['label'] = 0

In [8]:
news = pd.concat([fake, true], axis=0)

In [9]:
news.head(3)

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0


In [10]:
news.tail(3)

Unnamed: 0,title,text,subject,date,label
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",1
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",1
21416,Indonesia to buy $1.14 billion worth of Russia...,JAKARTA (Reuters) - Indonesia will buy 11 Sukh...,worldnews,"August 22, 2017",1


In [11]:
news.isnull().sum()

Unnamed: 0,0
title,0
text,0
subject,0
date,0
label,0


In [12]:
news = news.drop(['title', 'subject', 'date'], axis=1)

In [13]:
news.head(3)

Unnamed: 0,text,label
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0


In [14]:
news = news.sample(frac=1)

In [15]:
news.head(3)

Unnamed: 0,text,label
14307,BEIJING (Reuters) - China s Foreign Ministry s...,1
4537,NEW YORK (Reuters) - The U.S. Environmental Pr...,1
13517,PARIS (Reuters) - French President Emmanuel Ma...,1


In [16]:
news.reset_index(inplace=True)

In [17]:
news.head()

Unnamed: 0,index,text,label
0,14307,BEIJING (Reuters) - China s Foreign Ministry s...,1
1,4537,NEW YORK (Reuters) - The U.S. Environmental Pr...,1
2,13517,PARIS (Reuters) - French President Emmanuel Ma...,1
3,8680,"On Sunday, publishing company Scholastic annou...",0
4,19192,President Donald Trump s Senior Counselor Kell...,0


In [18]:
news.drop(['index'],axis=1, inplace=True)

In [19]:
news.head()

Unnamed: 0,text,label
0,BEIJING (Reuters) - China s Foreign Ministry s...,1
1,NEW YORK (Reuters) - The U.S. Environmental Pr...,1
2,PARIS (Reuters) - French President Emmanuel Ma...,1
3,"On Sunday, publishing company Scholastic annou...",0
4,President Donald Trump s Senior Counselor Kell...,0


In [20]:
import re

In [21]:
def wordopt(text):
  text = text.lower()

  text = re.sub(r'https?://\S+|www.\.\S+','',text)

  text = re.sub(r'<.*?>', '', text)

  text = re.sub(r'[^\w\s]', '', text)

  text = re.sub(r'\d', '', text)

  text =  re.sub(r'\n', ' ', text)


  return text

In [22]:
news['text'] = news['text'].apply(wordopt)

In [23]:
news['text']

Unnamed: 0,text
0,beijing reuters china s foreign ministry said...
1,new york reuters the us environmental protect...
2,paris reuters french president emmanuel macro...
3,on sunday publishing company scholastic announ...
4,president donald trump s senior counselor kell...
...,...
44893,best known for her roll in the tv show charmed...
44894,berlin reuters turnout in germany s national ...
44895,arizona speaker of the house david gowan r ena...
44896,bangkok reuters authorities in thailand s sou...


In [24]:
x = news['text']
y = news['label']

In [25]:
y

Unnamed: 0,label
0,1
1,1
2,1
3,0
4,0
...,...
44893,0
44894,1
44895,0
44896,1


In [26]:
from sklearn.model_selection import train_test_split

In [28]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [29]:
x_train.shape

(31428,)

In [30]:
x_test.shape

(13470,)

In [34]:
from sklearn.feature_extraction.text import TfidfVectorizer


In [35]:
vectorization = TfidfVectorizer()

In [38]:
xv_train = vectorization.fit_transform(x_train)

In [37]:
xv_test = vectorization.transform(x_test)

In [39]:
xv_train

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 6463778 stored elements and shape (31428, 175113)>

In [40]:
xv_test

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 2716949 stored elements and shape (13470, 175113)>

In [41]:
from sklearn.linear_model import LogisticRegression

In [42]:
lr = LogisticRegression()

In [43]:
lr.fit(xv_train, y_train)

In [44]:
prep_lr = lr.predict(xv_test)

In [56]:
lr.score(xv_test, y_test)


0.987305122494432

In [58]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score

In [59]:
print(classification_report(y_test, prep_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      7046
           1       0.98      0.99      0.99      6424

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



In [61]:
from sklearn.tree import DecisionTreeClassifier

In [64]:
DTC = DecisionTreeClassifier()

In [70]:
DTC.fit(xv_train, y_train)

In [71]:
pred_dtc = DTC.predict(xv_test)

In [66]:
DTC.score(xv_test, y_test)

0.9962138084632517

In [72]:
print(classification_report(y_test, pred_dtc))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7046
           1       1.00      1.00      1.00      6424

    accuracy                           1.00     13470
   macro avg       1.00      1.00      1.00     13470
weighted avg       1.00      1.00      1.00     13470



In [73]:
from sklearn.ensemble import RandomForestClassifier

In [74]:
rfc = RandomForestClassifier()

In [75]:
rfc.fit(xv_train, y_train)

In [77]:
predict_rfc = rfc.predict(xv_test)

In [78]:
rfc.score(xv_test, y_test)

0.9855233853006682

In [80]:
print(classification_report(y_test, predict_rfc))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99      7046
           1       0.98      0.99      0.98      6424

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



In [81]:
from sklearn.ensemble import GradientBoostingClassifier

In [83]:
gbc = GradientBoostingClassifier()

In [85]:
gbc.fit(xv_train, y_train)

In [88]:
print(classification_report(y_test, gbc.predict(xv_test)))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      7046
           1       0.99      1.00      1.00      6424

    accuracy                           1.00     13470
   macro avg       1.00      1.00      1.00     13470
weighted avg       1.00      1.00      1.00     13470



In [89]:
def output_label(n):
  if n==0:
    print("It is a Fake news")
  elif n==1:
    print("It is a Geniune news")

In [95]:
def manual_testing(news):
  testing_news = {"text": [news]}
  new_df_test = pd.DataFrame(testing_news)
  new_df_test['text'] = new_df_test['text'].apply(wordopt)
  new_x_test = new_df_test['text']
  new_xv_test = vectorization.transform(new_x_test)
  pred_lr = lr.predict(new_xv_test)
  pred_gbc = gbc.predict(new_xv_test)
  pred_rfc = rfc.predict(new_xv_test)
  return "\n\nlr Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {}".format(output_label(pred_lr[0]), output_label(pred_gbc[0]), output_label(pred_rfc[0]))

In [100]:
news_article = str(input())

Barcelona Femení started 2026 strongly with a 12–1 victory over Madrid CFF, with standout performances, including four goals from Ewa Pajor. The men’s team is preparing for the Spanish Super Cup final vs. Real Madrid in Jeddah.


In [101]:
manual_testing(news_article)

It is a Fake news
It is a Fake news
It is a Fake news


'\n\nlr Prediction: None \nGBC Prediction: None \nRFC Prediction: None'