In [1]:
import numpy as np                                                  #importing necessary libraries and functionalities.
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import PassiveAggressiveClassifier 



In [2]:
df = pd.read_csv("C:/Users/ASUS/Desktop/news.csv")                   #printing the dataset table
print(df.shape)
df

(6335, 4)


Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
...,...,...,...,...
6330,4490,State Department says it can't find emails fro...,The State Department told the Republican Natio...,REAL
6331,8062,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,FAKE
6332,8622,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,FAKE
6333,4021,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",REAL


In [3]:
labels = df.label                                                 #printing lable values(T/F)                                              
print(labels.value_counts())


REAL    3171
FAKE    3164
Name: label, dtype: int64


In [4]:
x_train,x_test,y_train,y_test = train_test_split(df['text'],labels,              #parameters of train test spilt
                                                test_size = 0.2, random_state=7)

In [5]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)             #parameters of TfIdfVectorizer
tfidf_train = tfidf_vectorizer.fit_transform(x_train)
tfidf_test = tfidf_vectorizer.transform(x_test)

In [6]:
pac=PassiveAggressiveClassifier(max_iter=300)                                  
pac.fit(tfidf_train,y_train)

y_pred = pac.predict(tfidf_test)                                                #predition of accuracy
score = accuracy_score(y_test, y_pred)
print(f'Accuracy:{round(score*100,2)}%')

Accuracy:92.66%


In [7]:
# Confusion matrix to see the number of false and true negatives and positive
confusion_matrix(y_test,y_pred, labels = ['FAKE','REAL'])

array([[589,  49],
       [ 44, 585]], dtype=int64)

In [8]:
def fake_news_det(news):                                                       #fn to precdict whether true or false then print
    input_data = [news]
    vectorized_input_data = tfidf_vectorizer.transform(input_data)
    prediction = pac.predict(vectorized_input_data)
    print(prediction)

In [9]:
fake_news_det("U.S. Secretary of State John F. Kerry said Monday that he will stop in Paris later this week, amid criticism that no top American officials attended Sundayâ€™s unity march against terrorism.Kerry said he expects to arrive in Paris Thursday evening, as he heads home after a week abroad. He said he will fly to France at the conclusion of a series of meetings scheduled for Thursday in Sofia, Bulgaria. He plans to meet the next day with Foreign Minister Laurent Fabius and President Francois Hollan")  #ex1

['REAL']


In [10]:
fake_news_det("Go to Article President Barack Obama has been campaigning hard for the woman who is supposedly going to extend his legacy four more years. The only problem with stumping for Hillary Clinton, however, is sheâ€™s not exactly a candidate easy to get too enthused about.") 

['FAKE']


In [11]:
import pickle                                                                    #pickle translates data into a format 
pickle.dump(pac,open('model.pkl','wb'))                                          #that can be transferred from RAM to disk

In [12]:
loaded_model = pickle.load(open('model.pkl','rb'))                               #module reads the pickled byte stream of 
                                                                                 #python objects from a file object.

In [13]:
def fake_news_det1(news):                                                        #ex 3
    input_data = [news]
    vectorized_input_data = tfidf_vectorizer.transform(input_data)
    prediction = loaded_model.predict(vectorized_input_data)
    print(prediction)