# Fake News Detector - Realtime Inference
The notebook covers the Realtime Inference workflow run on the model build on ISOT Fake News detection dataset, provided by Kaggle.

The Kaggle Link : https://www.kaggle.com/datasets/clmentbisaillon/fake-and-real-news-dataset

In [7]:
# Step 1: Install required packages
!pip install pandas --quiet
!python -m nltk.downloader punkt_tab wordnet stopwords > /dev/null

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [8]:
import re
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import joblib

## Process individual text entries

In [9]:
def preprocess_text(text):
    if not isinstance(text, str):
        return ""

    # Lowercase
    text = text.lower()

    # Remove special characters/numbers except basic punctuation
    text = re.sub(r'[^a-zA-Z\s.,!?]', '', text)

    # Remove URLs
    text = re.sub(r'https?://\S+|www\.\S+', '', text)

    # Tokenize
    tokens = word_tokenize(text)

    # Remove stopwords and lemmatize
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]

    # Remove short words (length < 2)
    tokens = [word for word in tokens if len(word) > 1]

    return ' '.join(tokens)

## Load the model

In [10]:
model = joblib.load("fake_news_model.pkl")

## Prediction workflow

In [13]:
def predict(title, text):
    if not isinstance(title, str) or not isinstance(text, str):
        return {
            "label": "Invalid",
            "confidence": "0%",
            "error": "Both title and text must be strings."
        }

    combined = f"{title.strip()} {text.strip()}".strip()

    if not combined:
        return {
            "label": "Invalid",
            "confidence": "0%",
            "error": "Combined input is empty after stripping."
        }

    processed = preprocess_text(combined)
    if not processed:
        return {
            "label": "Invalid",
            "confidence": "0%",
            "error": "Text could not be processed meaningfully."
        }

    pred = model.predict([processed])[0]
    proba = model.predict_proba([processed])[0]

    return {
        "label": "Fake" if pred == 1 else "Real",
        "confidence": f"{max(proba) * 100:.2f}%",
        "probabilities": {
            "Real": round(proba[0], 4),
            "Fake": round(proba[1], 4)
        }
    }

## Test run

In [16]:
if __name__ == "__main__":
    sample_title = "NASA confirms"
    sample_text = "moon base construction starts in 2026."
    result = predict(sample_title, sample_text)
    print(result)

{'label': 'Fake', 'confidence': '58.23%', 'probabilities': {'Real': np.float64(0.4177), 'Fake': np.float64(0.5823)}}
