In [1]:
#live fake News Detection from NDTV Feed
#Part 1:Import
import pandas as pd
import numpy as np
import re
import string
import joblib
import feedparser
import requests
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\niles\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\niles\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [2]:
#Part 2: Preprocessing Function
def clean_text(text):
    text = text.lower()
    text = re.sub(r"https?://\S+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    words = text.split()
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    cleaned = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return ' '.join(cleaned)

In [3]:
#Part 3: Loading Fake News Dataset
def load_local_dataset():
    path = "fake_news_dataset.csv"
    df = pd.read_csv(path)
    df = df[['text', 'label']]
    df.dropna(inplace = True)
    df['label'] = df['label'].str.upper()
    return df

In [4]:
#Part 4: Fetch Live News from NDTV RSS Feed 
def fetch_ndtv_news():
    feed_url = 'http://feeds.feedburner.com/ndtvnews-top-stories'
    feed = feedparser.parse(feed_url)
    news_items = []
    for entry in feed.entries:
        title = entry.title
        summary = entry.summary if 'summary' in entry else title
        combined = f"{title}.{summary}"
        news_items.append({'text' : combined, 'label' : 'REAL'})
    return pd.DataFrame(news_items)

In [5]:
#Part 5: Combine and Clean
def prepare_data():
    local_df = load_local_dataset()
    ndtv_df = fetch_ndtv_news()
    combined = pd.concat([local_df, ndtv_df], ignore_index = True)
    combined['clean_text'] = combined['text'].apply(clean_text)
    return combined.dropna()

In [6]:
#Train and Save Model
def train_and_save_model(df):
    vectorizer = TfidfVectorizer(max_features=10000,ngram_range=(1,2))
    X = vectorizer.fit_transform(df['clean_text'])
    y = df['label']
    X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y,test_size=0.2,random_state=42)
    model = LogisticRegression(max_iter=1000,class_weight='balanced')
    model.fit(X_train,y_train)
    joblib.dump(model,"updated_model.pkl")
    joblib.dump(vectorizer,"updated_vectorizer.pkl")
    print("Model and vectorizer saved as 'updated_model.pkl' and 'updated_vectorizer.pkl'")
    print("Evaluation:")
    from sklearn.metrics import classification_report, accuracy_score
    y_pred = model.predict(X_test)
    print("Accuracy:",accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

In [7]:
#Calling Main
if __name__ =="__main__":
    print("Preparing data...")
    combined_df = prepare_data()
    print("Training model...")
    train_and_save_model(combined_df)

Preparing data...
Training model...
Model and vectorizer saved as 'updated_model.pkl' and 'updated_vectorizer.pkl'
Evaluation:
Accuracy: 1.0
              precision    recall  f1-score   support

        FAKE       1.00      1.00      1.00       120
        REAL       1.00      1.00      1.00       124

    accuracy                           1.00       244
   macro avg       1.00      1.00      1.00       244
weighted avg       1.00      1.00      1.00       244



In [8]:
#data model
model=joblib.load("updated_model.pkl")
vectorizer=joblib.load("updated_vectorizer.pkl")

def clean_text(text):
    text = text.lower()
    text = re.sub(r"https?://\S+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    words = text.split()
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    cleaned = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return ' '.join(cleaned)

In [9]:
#Fetch NDTV news
def fetch_ndtv_news():
    feed_url = 'http://feeds.feedburner.com/ndtvnews-top-stories'
    feed = feedparser.parse(feed_url)
    news_items = []
    for entry in feed.entries:
        title = entry.title
        summary = entry.summary if 'summary' in entry else title
        link = entry.link
        text = f"{title}.{summary}"
        news_items.append({'title':title,'summary':summary,'link':link,'full_text':text})
    return news_items

In [19]:
# Analyze news items
def analyze_news(news_items):
    result =[]
    for item in news_items:
        cleaned = clean_text(item['full_text'])
        vect = vectorizer.transform([cleaned])
        prediction = model.predict(vect)[0]
        proba = model.predict_proba(vect)[0]
        score = int(proba.max()*100)
        label = prediction if score >=70 else "uncertain"
        result.append({
        'title':item['title'],
        'link':item['link'],
        'label':label,
        'confidence':score,
        'immunization_score':100 - score
        })
    return result

In [23]:
# Main execution
if __name__ =='__main__':
    live_news = fetch_ndtv_news()
    analyzed = analyze_news(live_news)
    for item in analyzed:
        print(f"Title:{item['title']}")
        print(f"Link:{item['link']}")
        print(f"Prediction:{item['label']}")
        print(f"Confidence:{item['confidence']}%")
        print(f"Immunization Score:{item['immunization_score']}")
        print("-"*80)

Title:Why US Health Secretary RFK Jr Fired Entire Vaccine Advisory Panel
Link:https://www.ndtv.com/world-news/why-us-health-secretary-rfk-jr-fired-entire-vaccine-advisory-panel-8630105#publisher=newsstand
Prediction:REAL
Confidence:71%
Immunization Score:29
--------------------------------------------------------------------------------
Title:Goa Minister Must Apologise Publicly Else We Will Go On Strike, Warns Doctor
Link:https://www.ndtv.com/india-news/goa-doctor-rudresh-kuttikar-says-vishwajit-rane-must-issue-a-public-apology-immediately-8629795#publisher=newsstand
Prediction:uncertain
Confidence:65%
Immunization Score:35
--------------------------------------------------------------------------------
Title:Trump Sends Thousands Of More Troops To LA As Immigration Protests Intensify
Link:https://www.ndtv.com/world-news/los-angeles-news-trump-sends-thousands-of-more-troops-to-la-as-immigration-protests-intensify-8629672#publisher=newsstand
Prediction:REAL
Confidence:71%
Immunization 