In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [4]:
data=pd.read_csv('news.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [5]:
# Split the dataset into training and testing data
X = data['text']
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Create an instance of the TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

In [7]:
# Transform the training data into TF-IDF vectors
X_train_tfidf = vectorizer.fit_transform(X_train)

In [8]:
# Transform the testing data into TF-IDF vectors
X_test_tfidf = vectorizer.transform(X_test)

In [9]:
# Create an instance of the PassiveAggressiveClassifier
classifier = PassiveAggressiveClassifier(max_iter=50)

In [10]:
# Fit the classifier to the training data
classifier.fit(X_train_tfidf, y_train)

In [11]:
# Predict the labels for the testing data
y_pred = classifier.predict(X_test_tfidf)

In [12]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.9368587213891081


In [13]:
# Confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print(f'Confusion Matrix: {confusion_mat}')

Confusion Matrix: [[588  40]
 [ 40 599]]


In [14]:
# a news example from dataset
news_text = " Pope Francis Just Called Out Donald Trump During His Christmas Speech"

# Vectorize the news text using the same TfidfVectorizer used during training
news_text_vectorized = vectorizer.transform([news_text])

# Make a prediction on the news text
prediction = classifier.predict(news_text_vectorized)

# Print the predicted label (0 for fake, 1 for real)
if prediction[0] == 0:
    print("The news is predicted to be FAKE.")
else:
    print("The news is predicted to be REAL.")

The news is predicted to be REAL.
