In [1]:
import pandas as pd
import numpy as np
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings("ignore")
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier

In [2]:
fake_n=pd.read_csv("fake.csv")
fake_n.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [3]:
true_n=pd.read_csv("true.csv")
true_n.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [4]:
fake_n["class"]=0
true_n["class"]=1

In [5]:
true_n.head(2)

Unnamed: 0,title,text,subject,date,class
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1


In [6]:
fake_n.head(2)

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0


In [7]:
merge_df=pd.concat([true_n,fake_n])

In [8]:
merge_df.head(2)

Unnamed: 0,title,text,subject,date,class
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1


In [9]:
df =merge_df.drop(["title", "subject","date"], axis = 1)

In [10]:
df.head(2)

Unnamed: 0,text,class
0,WASHINGTON (Reuters) - The head of a conservat...,1
1,WASHINGTON (Reuters) - Transgender people will...,1


In [11]:
df.isnull().sum()

text     0
class    0
dtype: int64

In [12]:
df = df.sample(frac = 1)

In [13]:
df.head(2)

Unnamed: 0,text,class
17152,It s time to stop hitting the snooze button Am...,0
3650,A Republican member of the Electoral College r...,0


In [14]:
df.reset_index(inplace = True)
df.drop(["index"], axis = 1, inplace = True)

In [15]:
df.head(2)

Unnamed: 0,text,class
0,It s time to stop hitting the snooze button Am...,0
1,A Republican member of the Electoral College r...,0


## Preprocessing data

In [16]:
def preprocess(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text) 
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)    
    return text
df["text"] = df["text"].apply(preprocess)

In [17]:
x=df["text"]
y=df["class"]

## splitting dataset

In [18]:
x_train,x_test,y_train,y_test=train_test_split(x,y,stratify=y,test_size=0.3)

In [19]:
print(x_train.shape,y_train.shape)
print(x_test.shape,y_test.shape)

(31428,) (31428,)
(13470,) (13470,)


## TFIDF Vectorizer

In [20]:
tfidf=TfidfVectorizer()
x_train_tfidf=tfidf.fit_transform(x_train)
print(type(x_train_tfidf))

<class 'scipy.sparse._csr.csr_matrix'>


In [21]:
x_test_tfidf=tfidf.transform(x_test)
print(type(x_train_tfidf))
print(type(x_test_tfidf))

<class 'scipy.sparse._csr.csr_matrix'>
<class 'scipy.sparse._csr.csr_matrix'>


## Applying Logistic Regression 

In [22]:
lr=LogisticRegression()
lr.fit(x_train_tfidf,y_train)

LogisticRegression()

In [23]:
predict_lr=lr.predict(x_test_tfidf)

In [24]:
lr.score(x_test_tfidf,y_test)

0.985003711952487

In [26]:
result=classification_report(y_test,predict_lr)

In [27]:
print(result)

              precision    recall  f1-score   support

           0       0.99      0.98      0.99      7045
           1       0.98      0.99      0.98      6425

    accuracy                           0.99     13470
   macro avg       0.98      0.99      0.98     13470
weighted avg       0.99      0.99      0.99     13470



## Bag of words

In [28]:
bow=CountVectorizer()
x_train_bow=bow.fit_transform(x_train)
x_test_bow=bow.transform(x_test)
print(type(x_train_bow))
print(type(x_test_bow))

<class 'scipy.sparse._csr.csr_matrix'>
<class 'scipy.sparse._csr.csr_matrix'>


In [29]:
lr.fit(x_train_bow,y_train)

LogisticRegression()

In [30]:
lr.score(x_test_tfidf,y_test)

0.5230141054194506

In [31]:
predict=lr.predict(x_test_tfidf)

In [32]:
print(classification_report(y_test,predict_lr))

              precision    recall  f1-score   support

           0       0.52      1.00      0.69      7045
           1       0.00      0.00      0.00      6425

    accuracy                           0.52     13470
   macro avg       0.26      0.50      0.34     13470
weighted avg       0.27      0.52      0.36     13470



#### from above observation bow giving very low score than tfidf vectorizer

## Decision Tree Classifier

In [83]:
model=DecisionTreeClassifier()

In [84]:
model.fit(x_train_tfidf,y_train)

DecisionTreeClassifier()

In [85]:
predict_dtc=model.predict(x_test_tfidf)

In [86]:
model.score(x_test_tfidf,y_test)

0.9953971789161099

In [87]:
print(classification_report(y_test,predict_dtc))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7045
           1       1.00      1.00      1.00      6425

    accuracy                           1.00     13470
   macro avg       1.00      1.00      1.00     13470
weighted avg       1.00      1.00      1.00     13470



## Gradient Boosting Classifier

In [39]:
clf=GradientBoostingClassifier(random_state=42)

In [40]:
clf.fit(x_train_tfidf,y_train)

GradientBoostingClassifier(random_state=42)

In [41]:
predict_gbc=clf.predict(x_test_tfidf)

In [42]:
clf.score(x_test_tfidf,y_test)

0.9948775055679288

In [43]:
print(classification_report(y_test,predict_gbc))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      7045
           1       0.99      1.00      0.99      6425

    accuracy                           0.99     13470
   macro avg       0.99      1.00      0.99     13470
weighted avg       0.99      0.99      0.99     13470



## RandomForestClassifier

In [44]:
rfc=RandomForestClassifier(random_state=42)

In [45]:
rfc.fit(x_train_tfidf,y_train)
predict_rfc=rfc.predict(x_test_tfidf)

In [46]:
rfc.score(x_test_tfidf,y_test)

0.9899034892353378

In [48]:
print(classification_report(y_test,predict_rfc))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      7045
           1       0.99      0.99      0.99      6425

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



## Model Testing

In [60]:
def news_label(n):
    if n==0:
        return "fake news"
    elif n==1:
        return "True News"

In [90]:
def testing(n):
    testing_news={"text":[n]}
    new_def_test=pd.DataFrame(testing_news)
    new_def_test["text"]=new_def_test["text"].apply(preprocess) 
    new_x_test=new_def_test["text"]
    new_xv_test=tfidf.transform(new_x_test)
    pred_lr=lr.predict(new_xv_test)
    pred_dt=model.predict(new_xv_test)
    pred_gbc=clf.predict(new_xv_test)
    pred_rfc=rfc.predict(new_xv_test)
    return print("\n\nLR Prediction: {} \nDT Prediction: {} \nGBC Prediction: {} \nRFC Prediction: {}".format(news_label(predict_lr[0]),news_label(predict_dtc[0]),news_label(predict_gbc[0]),news_label(predict_rfc[0])))

In [91]:
news = str(input())
testing(news)

BRUSSELS (Reuters) - NATO allies on Tuesday welcomed President Donald Trump s decision to commit more forces to Afghanistan, as part of a new U.S. strategy he said would require more troops and funding from America s partners. Having run for the White House last year on a pledge to withdraw swiftly from Afghanistan, Trump reversed course on Monday and promised a stepped-up military campaign against  Taliban insurgents, saying:  Our troops will fight to win .  U.S. officials said he had signed off on plans to send about 4,000 more U.S. troops to add to the roughly 8,400 now deployed in Afghanistan. But his speech did not define benchmarks for successfully ending the war that began with the U.S.-led invasion of Afghanistan in 2001, and which he acknowledged had required an   extraordinary sacrifice of blood and treasure .  We will ask our NATO allies and global partners to support our new strategy, with additional troops and funding increases in line with our own. We are confident they w

In [93]:
news = str(input())
testing(news)

SAO PAULO (Reuters) - Cesar Mata Pires, the owner and co-founder of Brazilian engineering conglomerate OAS SA, one of the largest companies involved in Brazil s corruption scandal, died on Tuesday. He was 68. Mata Pires died of a heart attack while taking a morning walk in an upscale district of S o Paulo, where OAS is based, a person with direct knowledge of the matter said. Efforts to contact his family were unsuccessful. OAS declined to comment. The son of a wealthy cattle rancher in the northeastern state of Bahia, Mata Pires  links to politicians were central to the expansion of OAS, which became Brazil s No. 4 builder earlier this decade, people familiar with his career told Reuters last year. His big break came when he befriended Antonio Carlos Magalh es, a popular politician who was Bahia governor several times, and eventually married his daughter Tereza. Brazilians joked that OAS stood for  Obras Arranjadas pelo Sogro  - or  Work Arranged by the Father-In-Law.   After years of