<a href="https://colab.research.google.com/github/trisha-shiny/Fake-news-detection/blob/main/4_Fake_news_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
!pip install scikit-learn pandas numpy



In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.svm import LinearSVC
from sklearn import metrics

In [8]:
fake = pd.read_csv("Fake.csv")
true = pd.read_csv("True.csv")

In [9]:
fake["label"] = "FAKE"
true["label"] = "REAL"

In [10]:
df = pd.concat([fake, true], axis=0).reset_index(drop=True)
print("Dataset Shape:", df.shape)
print(df.head())

Dataset Shape: (44898, 5)
                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date label  
0  December 31, 2017  FAKE  
1  December 31, 2017  FAKE  
2  December 30, 2017  FAKE  
3  December 29, 2017  FAKE  
4  December 25, 2017  FAKE  


In [11]:
X = df["text"]
y = df["label"]

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

In [14]:
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)
y_pred_pac = pac.predict(tfidf_test)

In [15]:
accuracy_pac = metrics.accuracy_score(y_test, y_pred_pac)
f1_pac = metrics.f1_score(y_test, y_pred_pac, average="weighted")

In [16]:
print("Passive Aggressive Classifier")
print(f"Accuracy: {accuracy_pac*100:.2f}%")
print(f"F1 Score: {f1_pac:.2f}\n")

Passive Aggressive Classifier
Accuracy: 99.43%
F1 Score: 0.99



In [17]:
svm = LinearSVC()
svm.fit(tfidf_train, y_train)
y_pred_svm = svm.predict(tfidf_test)

In [18]:
accuracy_svm = metrics.accuracy_score(y_test, y_pred_svm)
f1_svm = metrics.f1_score(y_test, y_pred_svm, average="weighted")

In [19]:
print("Support Vector Machine (LinearSVC)")
print(f"Accuracy: {accuracy_svm*100:.2f}%")
print(f"F1 Score: {f1_svm:.2f}")

Support Vector Machine (LinearSVC)
Accuracy: 99.44%
F1 Score: 0.99


In [20]:
cm = metrics.confusion_matrix(y_test, y_pred_svm)
print("Confusion Matrix (SVM):\n", cm)

Confusion Matrix (SVM):
 [[4707   26]
 [  24 4223]]
