# Fake News Detection

This notebook contains code that takes data from a csv file and checks whether the journalised news is fake or not based on various parameters. 

In [18]:
    import numpy as np
    import pandas as pd
    import itertools
    from sklearn.model_selection import train_test_split
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.linear_model import PassiveAggressiveClassifier
    from sklearn.metrics import accuracy_score, confusion_matrix

In [19]:
    #Read the data
    df=pd.read_csv('https://raw.githubusercontent.com/sajalsuhane/fakenewsdetection/master/news.csv')
    #Get shape and head
    df.shape
    df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 131,Unnamed: 132,Unnamed: 133,Unnamed: 134,Unnamed: 135,Unnamed: 136,Unnamed: 137,Unnamed: 138,Unnamed: 139,Unnamed: 140
0,384,Debate Goals for the Democratic Candidates,Five candidates will be on stage Tuesday at th...,REAL,,,,,,,...,,,,,,,,,,
1,4492,'Spinning up as we speak': Email shows Pentago...,As the attack on the U.S. consulate in Benghaz...,REAL,,,,,,,...,,,,,,,,,,
2,2432,A Case Study On Why The Obamacare Lawsuit Is B...,"The Bentley administration, the Alabama legisl...",REAL,,,,,,,...,,,,,,,,,,
3,5307,Trump Proudly Declares: Most Of The People I’v...,Trump Proudly Declares: Most Of The People I’v...,FAKE,,,,,,,...,,,,,,,,,,
4,8484,Dems sue GOP over Trump's 'rigged' complaints,Dems sue GOP over Trump's 'rigged' complaints ...,FAKE,,,,,,,...,,,,,,,,,,


In [20]:
df.drop(df.iloc[:, 5:141], inplace = True, axis = 1)

In [21]:
df.head()


Unnamed: 0.1,Unnamed: 0,title,text,label,Unnamed: 4
0,384,Debate Goals for the Democratic Candidates,Five candidates will be on stage Tuesday at th...,REAL,
1,4492,'Spinning up as we speak': Email shows Pentago...,As the attack on the U.S. consulate in Benghaz...,REAL,
2,2432,A Case Study On Why The Obamacare Lawsuit Is B...,"The Bentley administration, the Alabama legisl...",REAL,
3,5307,Trump Proudly Declares: Most Of The People I’v...,Trump Proudly Declares: Most Of The People I’v...,FAKE,
4,8484,Dems sue GOP over Trump's 'rigged' complaints,Dems sue GOP over Trump's 'rigged' complaints ...,FAKE,


In [22]:
df.drop(['Unnamed: 4'],axis=1)

Unnamed: 0.1,Unnamed: 0,title,text,label
0,384,Debate Goals for the Democratic Candidates,Five candidates will be on stage Tuesday at th...,REAL
1,4492,'Spinning up as we speak': Email shows Pentago...,As the attack on the U.S. consulate in Benghaz...,REAL
2,2432,A Case Study On Why The Obamacare Lawsuit Is B...,"The Bentley administration, the Alabama legisl...",REAL
3,5307,Trump Proudly Declares: Most Of The People I’v...,Trump Proudly Declares: Most Of The People I’v...,FAKE
4,8484,Dems sue GOP over Trump's 'rigged' complaints,Dems sue GOP over Trump's 'rigged' complaints ...,FAKE
5,9948,US Airstrikes on Iraqi Army Slowing Advance on...,"By Gordon Duff, Senior Editor on October 29, 2...",FAKE
6,2916,Iran letter blowback startles GOP,The Manhattan billionaire made the announcemen...,REAL
7,6058,"Clinton Camp Desperate, Russia Trains for WWIII","Clinton Camp Desperate, Russia Trains for WWII...",FAKE
8,4713,Donald Trump's GOP civil war,"Panama City, Florida (CNN) Donald Trump is tea...",REAL
9,8767,Comment on Tainted Measles Vaccine Kills More ...,Photos Credits: Social Media The so called “ o...,FAKE


In [23]:
labels=df.label
labels.head()

0    REAL
1    REAL
2    REAL
3    FAKE
4    FAKE
Name: label, dtype: object

In [26]:
df['text']= df.text.astype(str)
x_train,x_test,y_train,y_test=train_test_split(df['text'], labels, test_size=0.2, random_state=7)
labels.head()

0    REAL
1    REAL
2    REAL
3    FAKE
4    FAKE
Name: label, dtype: object

In [27]:
    #DataFlair - Initialize a TfidfVectorizer
    tfidf_vectorizer=TfidfVectorizer(stop_words='english', max_df=0.7)
    #DataFlair - Fit and transform train set, transform test set
    tfidf_train=tfidf_vectorizer.fit_transform(x_train) 
    tfidf_test=tfidf_vectorizer.transform(x_test)

In [36]:
    #DataFlair - Initialize a PassiveAggressiveClassifier
    pac=PassiveAggressiveClassifier(max_iter=50)
    pac.fit(tfidf_train,y_train)
    #DataFlair - Predict on the test set and calculate accuracy
    y_pred=pac.predict(tfidf_test)
    score=accuracy_score(y_test,y_pred)
   
    print(f'Accuracy: {round(score*100,2)}%')

Accuracy: 88.41%


In [40]:
#DataFlair - Build confusion matrix
if score>=0.85:
    print("The prediction is most likely correct. Please check the csv file for results")
elif score>=0.60 and score<0.85 :
    print("The prediction may be correct. Please check the csv file for results")
else:
    print("Please re-run the model for better accuracy results")
    
confusion_matrix(y_test,y_pred, labels=['FAKE','REAL'])

The prediction is most likely correct. Please check the csv file for results


array([[478,  30],
       [ 32, 492]])

In [31]:
y_pred

array(['FAKE', 'FAKE', 'FAKE', ..., 'FAKE', 'REAL', 'REAL'],
      dtype='<U1091')