Fake News Detection

In [1]:
# Importing required libraries and machine learning model

import numpy as np
import pandas as pd
import itertools as it
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
# Reading CSV file

df = pd.read_csv('news.csv')

In [3]:
df.head(3)

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL


In [4]:
target = df.label

In [5]:
# Splitting dataset into training and testing sets as 70:30

x_train, x_test, y_train, y_test = train_test_split(df['text'], target, 
                                                    test_size = 0.3, 
                                                    random_state = 7)

In [6]:
# Initializing a TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words = 'english', max_df = 0.7)

# Fiting and transforming train set, transform test set
tfidf_train = tfidf_vectorizer.fit_transform(x_train) 
tfidf_test = tfidf_vectorizer.transform(x_test)

In [7]:
# Initializing a PassiveAggressiveClassifier
pac = PassiveAggressiveClassifier(max_iter = 50)
pac.fit(tfidf_train,y_train)

# Predicting on the test set and calculating accuracy
y_pred = pac.predict(tfidf_test)
score = accuracy_score(y_test,y_pred)
print(f'Accuracy: {round(score * 100, 2)} %')

Accuracy: 92.21 %


In [8]:
# Building the confusion matrix
confusion_matrix(y_test, y_pred, labels = ['FAKE', 'REAL'])

array([[895,  79],
       [ 69, 858]], dtype=int64)

In [9]:
(895+858)/(895+79+69+858)

0.9221462388216728