In [1]:
!pip install pandas scikit-learn nltk


Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 24.1.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd

#  datasets
fake_df = pd.read_csv(r"C:\Users\HP\AppData\Local\Temp\78c65583-c983-4d1f-897e-aed3ac65a087_archive (1).zip.087\Fake.csv")
true_df = pd.read_csv(r"C:\Users\HP\AppData\Local\Temp\620f1269-7936-4ec6-b036-11180d4d3485_archive (1).zip.485\True.csv")


fake_df['label'] = 0
true_df['label'] = 1

# Combine datasets
data = pd.concat([fake_df, true_df], ignore_index=True)
data = data[['title', 'text', 'label']]  
data.dropna(inplace=True)

print("Dataset size:", data.shape)
data.head()


Dataset size: (44898, 3)


Unnamed: 0,title,text,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,0


In [4]:
import nltk
import re
from sklearn.model_selection import train_test_split
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    tokens = text.split()
    tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
    return ' '.join(tokens)

data['processed_text'] = data['text'].apply(preprocess)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [7]:
X = data['processed_text']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score

model = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000)),
    ('clf', LogisticRegression())
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# FOR ACCURACY
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))


✅ Accuracy: 0.9866369710467706

📊 Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      4733
           1       0.98      0.99      0.99      4247

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [14]:
def predict_news(news_text):
    processed = preprocess(news_text)
    prediction = model.predict([processed])
    return " Real News" if prediction[0] == 1 else "Fake News"

print(predict_news("The prime minister announced a new plan for education reform."))
print(predict_news("NASA says aliens helped build the pyramids in Egypt!"))
print(predict_news("The Reserve Bank of India has decided to keep the repo rate unchanged at 6.5% during its monetary policy meeting held this week. The decision aims to maintain inflation"))
print(predict_news("Prime Minister Narendra Modi on Monday inaugurated the new Parliament building in New Delhi. The event was attended by several cabinet ministers, dignitaries, and spiritual leaders. The Prime Minister emphasized that the new Parliament will become a symbol of democratic strength and India's growing global presence. The building is equipped with modern facilities and aims to support the legislative needs of the country for the next several decades."))

Fake News
Fake News
Fake News
 Real News
