In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Load the dataset
data = pd.read_csv("fake_or_real_news.csv")
print(data.head())


   Unnamed: 0                                              title  \
0        8476                       You Can Smell Hillary’s Fear   
1       10294  Watch The Exact Moment Paul Ryan Committed Pol...   
2        3608        Kerry to go to Paris in gesture of sympathy   
3       10142  Bernie supporters on Twitter erupt in anger ag...   
4         875   The Battle of New York: Why This Primary Matters   

                                                text label  
0  Daniel Greenfield, a Shillman Journalism Fello...  FAKE  
1  Google Pinterest Digg Linkedin Reddit Stumbleu...  FAKE  
2  U.S. Secretary of State John F. Kerry said Mon...  REAL  
3  — Kaydee King (@KaydeeKing) November 9, 2016 T...  FAKE  
4  It's primary day in New York and front-runners...  REAL  


In [2]:
# Drop rows with missing values
data = data.dropna(subset=['title', 'label'])


In [3]:
# Text preprocessing
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
data['title'] = data['title'].apply(lambda x: " ".join(lemmatizer.lemmatize(word) for word in x.split() if word.isalpha() and word not in stop_words))


In [4]:
# Features and labels
x = np.array(data["title"])
y = np.array(data["label"])


In [5]:
# Train-test split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)


In [6]:
cv = CountVectorizer()
x = cv.fit_transform(x)


xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

# Create and train the model
model = MultinomialNB()
model.fit(xtrain, ytrain)


In [7]:
# Evaluate the model
y_pred = model.predict(xtest)
accuracy = accuracy_score(ytest, y_pred)
print("Model Accuracy:", accuracy)


Model Accuracy: 0.7900552486187845


In [8]:
# Make predictions on new headlines
news_headline_1 = "Plants release oxygen and take back carbon dioxide"
data_1 = cv.transform([news_headline_1]).toarray()
prediction_1 = model.predict(data_1)
print("Prediction for News Headline 1:", prediction_1)


Prediction for News Headline 1: ['REAL']


In [9]:
news_headline_2 = " A massive AI-integrated social network spy tool was planned by Google for the Hillary campaign. "
data_2 = cv.transform([news_headline_2]).toarray()
prediction_2 = model.predict(data_2)
print("Prediction for News Headline 2:", prediction_2)


Prediction for News Headline 2: ['FAKE']
