In [1]:
import pandas as pd
import numpy as np

In [2]:
df_fake=pd.read_csv("Fake.csv")
df_true=pd.read_csv("True.csv")

In [3]:
df_fake.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23481 entries, 0 to 23480
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    23481 non-null  object
 1   text     23481 non-null  object
 2   subject  23481 non-null  object
 3   date     23481 non-null  object
dtypes: object(4)
memory usage: 733.9+ KB


In [4]:
df_true.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21417 entries, 0 to 21416
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    21417 non-null  object
 1   text     21417 non-null  object
 2   subject  21417 non-null  object
 3   date     21417 non-null  object
dtypes: object(4)
memory usage: 669.4+ KB


In [5]:
df_fake['label']='fake'
df_true['label']='real'

In [6]:
df=pd.concat([df_fake,df_true],axis=0)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 44898 entries, 0 to 21416
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    44898 non-null  object
 1   text     44898 non-null  object
 2   subject  44898 non-null  object
 3   date     44898 non-null  object
 4   label    44898 non-null  object
dtypes: object(5)
memory usage: 2.1+ MB


In [8]:
df

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",fake
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",fake
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",fake
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",fake
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",fake
...,...,...,...,...,...
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017",real
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017",real
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",real
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",real


In [9]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(df['text'],df['label'],test_size=0.2,random_state=42)

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer=TfidfVectorizer(stop_words='english',max_df=0.75)
train_tfidf=tfidf_vectorizer.fit_transform(x_train)
test_tfidf=tfidf_vectorizer.transform(x_test)

In [11]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

classifiers=[('Logistic Regression',LogisticRegression()),
            ('MultiNomial Naives Bayes',MultinomialNB()),
            ('Decision Tree Classifier',DecisionTreeClassifier()),
            ('Random Forest Clasifier',RandomForestClassifier(max_depth=2,random_state=42))]

pipelines=[]

for classifiername,classifier in classifiers:
  pipeline=Pipeline([('classifier:',classifier)])
  pipelines.append((classifiername,pipeline))

In [12]:
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix

for classifier_name,pipeline in pipelines:
  print(f'Training and Testing:{classifier_name}')
  pipeline.fit(train_tfidf,y_train)
  y_prediction=pipeline.predict(test_tfidf)
  score=accuracy_score(y_test,y_prediction)
  confuse_matrix=confusion_matrix(y_test,y_prediction,labels=['fake','real'])
  f1score=f1_score(y_test,y_prediction,pos_label="real")
  print(f'Accuracy score:{round(score*100,2)}%')
  print(f'confusion Matrix:{confuse_matrix}')
  print(f'F1_score:{round(f1score*100,2)}%')

Training and Testing:Logistic Regression
Accuracy score:98.65%
confusion Matrix:[[4667   66]
 [  55 4192]]
F1_score:98.58%
Training and Testing:MultiNomial Naives Bayes
Accuracy score:93.44%
confusion Matrix:[[4461  272]
 [ 317 3930]]
F1_score:93.03%
Training and Testing:Decision Tree Classifier
Accuracy score:99.52%
confusion Matrix:[[4709   24]
 [  19 4228]]
F1_score:99.49%
Training and Testing:Random Forest Clasifier
Accuracy score:78.44%
confusion Matrix:[[4689   44]
 [1892 2355]]
F1_score:70.87%


In [14]:
News=input("Enter the News:")
for classifier_name,pipeline in pipelines:
  pipeline.fit(train_tfidf,y_train)
  tests_tfidf=tfidf_vectorizer.transform([News])
  print(f'{classifier_name:} prediction')
  prediction=pipeline.predict(tests_tfidf)
  print(prediction)

Enter the News: Python programming language that returns a copy of the string where all occurrences of a substring are replaced with another substring.
Logistic Regression prediction
['fake']
MultiNomial Naives Bayes prediction
['fake']
Decision Tree Classifier prediction
['fake']
Random Forest Clasifier prediction
['fake']
