In [25]:
import numpy as np
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [26]:
# Read the data
df = pd.read_csv("news.csv")
# Get shape and head
df.shape
df['text'][0]



In [27]:
labels = df.label
labels.head()

0    FAKE
1    FAKE
2    REAL
3    FAKE
4    REAL
Name: label, dtype: object

In [28]:
x_train, x_test, y_train, y_test = train_test_split(
    df["text"], labels, test_size=0.2, random_state=7
)

In [29]:
tfidf_vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
tfidf_train = tfidf_vectorizer.fit_transform(x_train)
tfidf_test = tfidf_vectorizer.transform(x_test)

In [30]:
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

y_pred = pac.predict(tfidf_test)
score = accuracy_score(y_test, y_pred)
print(f"Accuracy: {round(score*100,2)}%")

Accuracy: 92.82%


In [31]:
confusion_matrix(y_test, y_pred, labels=["FAKE", "REAL"])

array([[591,  47],
       [ 44, 585]])

In [35]:
random_sentence = [
    "SNASA’s James Webb Space Telescope Detects Traces of Water Vapor on Exoplanet 110 Light-Years Away, Providing Clues About Potential Habitability"
]
tfidf_random = tfidf_vectorizer.transform(random_sentence)
prediction = pac.predict(tfidf_random)
print("Fake" if prediction[0] == 1 else "Real")

Real
