Importing the dependencies


In [39]:
import numpy as np
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [40]:
import csv

def read_csv_robust(filepath, encoding='utf-8', lineterminator='\n'):
    """Reads a CSV file line by line and handles potential parsing errors."""
    data = []
    with open(filepath, 'r', encoding=encoding, errors='ignore') as f:
        reader = csv.reader(f, lineterminator=lineterminator)
        for i, row in enumerate(reader):
            try:
                data.append(row)
            except Exception as e:
                print(f"Error reading row {i+1}: {e}")
                # Depending on the error and data, you might try to clean the row
                # or skip it: continue
    return data

# Try reading the files using the robust function
try:
    true_data = read_csv_robust('/content/True.csv', encoding='latin-1', lineterminator='\n')
    fake_data = read_csv_robust('/content/Fake.csv', encoding='latin-1', lineterminator='\n')

    # Convert the lists of rows back into DataFrames
    df_true = pd.DataFrame(true_data[1:], columns=true_data[0])
    df_fake = pd.DataFrame(fake_data[1:], columns=fake_data[0])

    print("Files read successfully.")
    display(df_true.head())
    display(df_fake.head())

except Exception as e:
    print(f"An error occurred during the robust reading process: {e}")

Files read successfully.


Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Yearâ...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obamaâs Na...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [41]:
df_true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [42]:
df_fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Yearâ...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obamaâs Na...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


Both true and Fake data set have missing output column

In [43]:
df_true['lable'] = 1
df_fake['lable'] = 0

In [44]:
df_true

Unnamed: 0,title,text,subject,date,lable
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1
...,...,...,...,...,...
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017",1
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017",1
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",1
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",1


Combining both the data set together

In [45]:
df_combined = pd.concat([df_true, df_fake], axis=0, ignore_index=True)

In [46]:
df_combined

Unnamed: 0,title,text,subject,date,lable
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1
...,...,...,...,...,...
44893,McPain: John McCain Furious That Iran Treated ...,21st Century Wire says As 21WIRE reported earl...,Middle-east,"January 16, 2016",0
44894,JUSTICE? Yahoo Settles E-mail Privacy Class-ac...,21st Century Wire says It s a familiar theme. ...,Middle-east,"January 16, 2016",0
44895,Sunnistan: US and Allied âSafe Zoneâ Plan ...,Patrick Henningsen 21st Century WireRemember ...,Middle-east,"January 15, 2016",0
44896,How to Blow $700 Million: Al Jazeera America F...,21st Century Wire says Al Jazeera America will...,Middle-east,"January 14, 2016",0


In [47]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [48]:
print(stopwords.words('english'))

['a', 'about', 'above', 'after', 'again', 'against', 'ain', 'all', 'am', 'an', 'and', 'any', 'are', 'aren', "aren't", 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'can', 'couldn', "couldn't", 'd', 'did', 'didn', "didn't", 'do', 'does', 'doesn', "doesn't", 'doing', 'don', "don't", 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadn', "hadn't", 'has', 'hasn', "hasn't", 'have', 'haven', "haven't", 'having', 'he', "he'd", "he'll", 'her', 'here', 'hers', 'herself', "he's", 'him', 'himself', 'his', 'how', 'i', "i'd", 'if', "i'll", "i'm", 'in', 'into', 'is', 'isn', "isn't", 'it', "it'd", "it'll", "it's", 'its', 'itself', "i've", 'just', 'll', 'm', 'ma', 'me', 'mightn', "mightn't", 'more', 'most', 'mustn', "mustn't", 'my', 'myself', 'needn', "needn't", 'no', 'nor', 'not', 'now', 'o', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 're', 's', 'same', 'shan', "shan't", 'she

**Data Preprocessing**

In [49]:
df_combined.shape

(44898, 5)

In [50]:
df_combined.sample(5)

Unnamed: 0,title,text,subject,date,lable
1642,Republicans plan healthcare vote; Obama and TV...,WASHINGTON (Reuters) - Senate Republicans anno...,politicsNews,"September 20, 2017",1
40102,FBI DIRECTOR CONFIRMS Hillaryâs Worst Nightm...,Be careful what you ask for Watch: FBI Direct...,left-news,"May 5, 2017",0
11281,Israel ambassador asks to meet New Zealand pop...,WELLINGTON (Reuters) - Israel s ambassador to ...,worldnews,"December 27, 2017",1
1580,New Jersey mayor resigns after admitting corru...,"NEW YORK (Reuters) - The mayor of Paterson, Ne...",politicsNews,"September 25, 2017",1
35628,WHY PICTURE OF CHE-OBAMA Was Much Worse Than A...,"Oh the irony, of our Nobel-Peace-Prize-Recipie...",politics,"Mar 25, 2016",0


In [51]:
#counting the number of missing values in the dataset

df_combined.isnull().sum()


Unnamed: 0,0
title,0
text,0
subject,0
date,0
lable,0


In [52]:
#replacing the null value by empty strings

news_dataset = df_combined.fillna('')

In [53]:
news_dataset = news_dataset.sample(frac=1, random_state=100).reset_index(drop=True)


In [54]:
news_dataset.head(10)

Unnamed: 0,title,text,subject,date,lable
0,"China, Pakistan to look at including Afghanist...",BEIJING (Reuters) - China and Pakistan will lo...,worldnews,"December 26, 2017",1
1,10 REASONS A VOTE RECOUNT Is A Really Bad Idea...,Watch Jill Stein explain why she s working to ...,left-news,"Nov 27, 2016",0
2,DEBBIE WASSERMAN SCHULTZ Planned To Continue P...,Democratic Rep. Debbie Wasserman Schultz seemi...,politics,"Jul 29, 2017",0
3,FINALLY: Sheriff Joe Ruled In Contempt Over R...,A Maricopa Country judge has finally stood up ...,News,"May 14, 2016",0
4,Obama says he does not think FBI's Comey is tr...,WASHINGTON (Reuters) - President Barack Obama ...,politicsNews,"November 5, 2016",1
5,TRUMP WAS RIGHT! Why The Heck Is This Liberal ...,If you watch CNBC at all you know John Harwood...,politics,"Oct 28, 2015",0
6,LATE NIGHT HOST Goes Low In Anti-Trump Rant Wi...,COLBERT LANGUAGE WARNING! This late night ho...,politics,"May 2, 2017",0
7,South Korea's Moon asks Russia to continue sup...,"VLADIVOSTOK, Russia (Reuters) - South Korean P...",worldnews,"September 7, 2017",1
8,YOUâLL NEVER BELIEVE WHICH REPUBLICAN JUST C...,What kind of leader speaks like this? It s obv...,politics,"Aug 27, 2015",0
9,Alabama Senate election winner due to be certi...,WASHINGTON (Reuters) - The outcome of Alabamaâ...,politicsNews,"November 18, 2017",1


In [55]:
X = news_dataset.drop(columns = ['subject', 'date', 'lable'], axis= 1)
Y = news_dataset['lable']

In [56]:
print(X)
print(Y)

                                                   title  \
0      China, Pakistan to look at including Afghanist...   
1      10 REASONS A VOTE RECOUNT Is A Really Bad Idea...   
2      DEBBIE WASSERMAN SCHULTZ Planned To Continue P...   
3       FINALLY: Sheriff Joe Ruled In Contempt Over R...   
4      Obama says he does not think FBI's Comey is tr...   
...                                                  ...   
44893  Hungary's Jobbik supports EU deepening with vo...   
44894  Trump aides hope win on taxes will stem slide ...   
44895  South Africa's Zuma says influence-peddling in...   
44896  Iran's Rouhani says foreign interference in Sy...   
44897  GOVERNMENT GONE WILD: NY Man Return From Hospi...   

                                                    text  
0      BEIJING (Reuters) - China and Pakistan will lo...  
1      Watch Jill Stein explain why she s working to ...  
2      Democratic Rep. Debbie Wasserman Schultz seemi...  
3      A Maricopa Country judge has finally

Stemming
i.e-- reducing words to its Root word


In [57]:
port_stem = PorterStemmer()

def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]',' ', content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

In [58]:
news_dataset['content'] = news_dataset['title'] + ' ' + news_dataset['text']
news_dataset['content'] = news_dataset['content'].apply(stemming)

In [59]:
print(news_dataset['content'])

0        china pakistan look includ afghanistan billion...
1        reason vote recount realli bad idea america wa...
2        debbi wasserman schultz plan continu pay musli...
3        final sheriff joe rule contempt racial profil ...
4        obama say think fbi comey tri influenc elect w...
                               ...                        
44893    hungari jobbik support eu deepen voter bless b...
44894    trump aid hope win tax stem slide poll number ...
44895    south africa zuma say influenc peddl govern in...
44896    iran rouhani say foreign interfer syria must e...
44897    govern gone wild ny man return hospit find hom...
Name: content, Length: 44898, dtype: object


In [61]:
#seperating the data and the lables

X = news_dataset['content'].values
Y = news_dataset['lable'].values

In [62]:
print(X)

['china pakistan look includ afghanistan billion econom corridor beij reuter china pakistan look extend billion china pakistan econom corridor afghanistan chines foreign minist wang yi said tuesday part china ambiti belt road plan link china asia europ beyond china tri posit help parti promot talk pakistan afghanistan uneasi neighbor ever sinc pakistan independ tie poison recent year afghan accus pakistan support taliban insurg fight u back kabul order limit influenc old rival india afghanistan pakistan deni say want see peac stabl afghanistan speak first trilater meet foreign minist china pakistan afghanistan wang said china hope econom corridor could benefit whole region act impetu develop afghanistan urgent need develop improv peopl live hope join inter connect initi wang told report announc pakistan afghanistan agre mend strain relat china pakistan will look afghanistan basi win win mutual benefici principl use appropri mean extend china pakistan econom corridor afghanistan ad coul

In [63]:
#converting textual data to the numerical data

vectorizer = TfidfVectorizer()
vectorizer.fit(X)

X = vectorizer.transform(X)

**Train Test Split**

In [64]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify=Y, random_state=2)

**Training the model**

In [66]:
model = LogisticRegression()

In [67]:
model.fit(X_train, Y_train)

**Evaluation**



Accuracy Score

In [68]:
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [69]:
print('Accuracy score of the training data : ', training_data_accuracy)

Accuracy score of the training data :  0.9917311654323737


In [70]:
#accuracy score on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [71]:
print('Accuracy score of the test data : ', test_data_accuracy)

Accuracy score of the test data :  0.9861915367483296


**Making a predictive system**

In [72]:
X_new = X_test[3]

prediction = model.predict(X_new)
print(prediction)

if (prediction[0]==0):
  print('The news is Fake')
else:
  print('The news is True')

[0]
The news is Fake


In [74]:
print(Y_test[3])

0
