In [1]:
import os
API_KEY = os.getenv("NEWS_API_KEY")

In [2]:
import requests

def get_live_news(query):
    url = f"https://newsapi.org/v2/everything?q={query}&apiKey={API_KEY}"
    response = requests.get(url)
    data = response.json()

    articles = data["articles"]
    news_list = []

    for article in articles[:5]:   # sirf 5 news
        news_list.append(article["title"])

    return news_list

In [3]:
def check_live_news(topic):
    news_list = get_live_news(topic)

    for news in news_list:
        result = predict_news(news)
        print("\nNews:", news)
        print("Prediction:", result)

In [4]:
import pandas as pd
#load datasets
true = pd.read_csv("True.csv")
fake = pd.read_csv("Fake.csv")

#add labels
true['label'] = 1 #real news
fake['label'] = 0 #fake news

#merge datasets
data = pd.concat([true, fake])
# keep required columns
data = data[['text', 'label']]

# show first 5 rows
data.head()

Unnamed: 0,text,label
0,WASHINGTON (Reuters) - The head of a conservat...,1
1,WASHINGTON (Reuters) - Transgender people will...,1
2,WASHINGTON (Reuters) - The special counsel inv...,1
3,WASHINGTON (Reuters) - Trump campaign adviser ...,1
4,SEATTLE/WASHINGTON (Reuters) - President Donal...,1


In [5]:
import re
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z]', ' ', text)   # âœ… correct regex
    text = re.sub(r'\s+', ' ', text)
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return ' '.join(words)

# APPLY CLEANING (ONLY ONCE)
data['clean_text'] = data['text'].apply(clean_text)

data.head()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\kirti\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,text,label,clean_text
0,WASHINGTON (Reuters) - The head of a conservat...,1,washington reuters head conservative republica...
1,WASHINGTON (Reuters) - Transgender people will...,1,washington reuters transgender people allowed ...
2,WASHINGTON (Reuters) - The special counsel inv...,1,washington reuters special counsel investigati...
3,WASHINGTON (Reuters) - Trump campaign adviser ...,1,washington reuters trump campaign adviser geor...
4,SEATTLE/WASHINGTON (Reuters) - President Donal...,1,seattle washington reuters president donald tr...


In [6]:
# remove empty & null rows
data = data[data['clean_text'].str.strip() != ""]
data = data.dropna(subset=['clean_text'])
data = data.reset_index(drop=True)

# verify
data['clean_text'].str.strip().eq("").sum()

np.int64(0)

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

X = data['clean_text']
y = data['label']

vectorizer = TfidfVectorizer(max_features=5000)
X_tfidf = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_tfidf, y, test_size=0.2, random_state=42
)

print("Training samples:", X_train.shape)
print("Testing samples:", X_test.shape)

Training samples: (35412, 5000)
Testing samples: (8854, 5000)


In [8]:
print("Rows:", data.shape)
print("Columns:", data.columns)

Rows: (44266, 3)
Columns: Index(['text', 'label', 'clean_text'], dtype='object')


In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9888186130562457
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4568
           1       0.98      0.99      0.99      4286

    accuracy                           0.99      8854
   macro avg       0.99      0.99      0.99      8854
weighted avg       0.99      0.99      0.99      8854



In [10]:
def predict_news(news_text):
    cleaned = clean_text(news_text)
    vector = vectorizer.transform([cleaned])
    prediction = model.predict(vector)[0]

    if prediction == 1:
        return "REAL NEWS ðŸŸ¢"
    else:
        return "FAKE NEWS ðŸ”´"


# Example test
news = "India successfully launches new communication satellite"
print(predict_news(news))

FAKE NEWS ðŸ”´


In [11]:
print(predict_news("Aliens landed in Delhi last night"))

FAKE NEWS ðŸ”´


In [12]:
check_live_news("India")


News: India express train kills seven elephants crossing tracks
Prediction: FAKE NEWS ðŸ”´

News: India's solar boom faces a hidden waste problem
Prediction: FAKE NEWS ðŸ”´

News: The Dollar Is Facing an End to Its Dominance
Prediction: FAKE NEWS ðŸ”´

News: After a breakup, I traveled for a year to learn how to be alone again. It led me to move to a country I'd never imagined calling home.
Prediction: FAKE NEWS ðŸ”´

News: Amazon is letting visa workers stranded in India work remotely â€” as long as they don't code or talk to customers
Prediction: FAKE NEWS ðŸ”´


In [13]:
# Final user input test
user_news = input("Enter a news headline: ")
print(predict_news(user_news))

Enter a news headline:  Fake News Detection System using Machine Learning and NLP is arrived


FAKE NEWS ðŸ”´
