In [None]:
# ✅ STEP 1: Install Required Libraries
!pip install nltk gradio joblib


Collecting gradio
  Downloading gradio-5.26.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [None]:
# ✅ STEP 2: Import Libraries
import pandas as pd
import numpy as np
import nltk
import re
import string
import gradio as gr
import joblib

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
# ✅ STEP 3: Upload and Load Dataset
from google.colab import files
uploaded = files.upload()

# Instead of 'error_bad_lines' and 'warn_bad_lines',
# Use 'on_bad_lines' to handle problematic lines in newer pandas versions.
# This will skip lines that cause parsing errors, potentially losing some data but allowing the code to continue.
fake_df = pd.read_csv('Fake.csv', on_bad_lines='skip', encoding='utf-8') # Changed from error_bad_lines to on_bad_lines
real_df = pd.read_csv('True.csv', on_bad_lines='skip', encoding='utf-8') # Changed from error_bad_lines to on_bad_lines

Saving Fake.csv to Fake.csv
Saving True.csv to True.csv


In [None]:
# Add labels
fake_df['label'] = 0  # Fake
real_df['label'] = 1  # Real

# Combine and shuffle
data = pd.concat([fake_df, real_df], ignore_index=True)
data = data.sample(frac=1).reset_index(drop=True)

# ✅ STEP 4: Preprocess Text
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def preprocess(text):
    text = text.lower()
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>+', '', text)
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub(r'\n', '', text)
    text = re.sub(r'\w*\d\w*', '', text)
    words = text.split()
    words = [stemmer.stem(word) for word in words if word not in stop_words]
    return ' '.join(words)


In [None]:
# Apply preprocessing
data['content'] = data['title'] + " " + data['text']
data['content'] = data['content'].apply(preprocess)

# ✅ STEP 5: Split Data
X = data['content']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# ✅ STEP 6: Vectorize
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

# ✅ STEP 7: Train Classifier
model = MultinomialNB()
model.fit(X_train_vect, y_train)

# ✅ STEP 8: Evaluate
y_pred = model.predict(X_test_vect)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ✅ STEP 9: Save Model and Vectorizer
joblib.dump(model, "fake_news_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")





Accuracy: 0.9327394209354121
              precision    recall  f1-score   support

           0       0.93      0.94      0.94      4737
           1       0.93      0.93      0.93      4243

    accuracy                           0.93      8980
   macro avg       0.93      0.93      0.93      8980
weighted avg       0.93      0.93      0.93      8980



['vectorizer.pkl']

In [None]:
# ✅ STEP 10: Gradio Web App
import gradio as gr
import joblib  # Import joblib for loading the model and vectorizer

# Load the saved model and vectorizer
model = joblib.load("fake_news_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

# Define the predict_news function
def predict_news(news_text):
    # Preprocess the input text
    processed_text = preprocess(news_text)
    # Vectorize the processed text
    vectorized_text = vectorizer.transform([processed_text])
    # Make the prediction

In [None]:
import joblib  # Use joblib instead of pickle

# Load trained model and vectorizer
with open("fake_news_model.pkl", "rb") as f:
    model = joblib.load(f)  # Use joblib.load()

with open("vectorizer.pkl", "rb") as f:
    vectorizer = joblib.load(f)  # Use joblib.load()

# Example news
news_text = "China announces the successful launch of its new space station module, expanding its space program."

# Preprocess and vectorize
news_vector = vectorizer.transform([news_text])

# Predict
prediction = model.predict(news_vector)

# Output
if prediction[0] == 1:
    print("Real News ✅")
else:
    print("Fake News ❌")

Real News ✅


In [None]:
def predict_news(news):
    input_vec = vectorizer.transform([news])
    prediction = model.predict(input_vec)[0]
    return "🟢 Real News" if prediction == 1 else "🔴 Fake News"

# ✅ Gradio Web Interface
with gr.Blocks() as interface:
    gr.Markdown("<h1 style='text-align: center;'>📰 Fake News Detector By naive_bayes</h1>")
    gr.Markdown("<p style='text-align: center;'>Enter a news article and let AI tell you if it's Real or Fake!</p>")

    with gr.Row():
        news_input = gr.Textbox(label="✏️ Enter News Article", placeholder="Paste news article here...", lines=10)

    with gr.Row():
        submit_btn = gr.Button("🔍 Detect")
        result_output = gr.Textbox(label="📢 Prediction", interactive=False)

    submit_btn.click(fn=predict_news, inputs=news_input, outputs=result_output)

interface.launch(share=True, debug=False)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e44a8519acfa804f05.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:

# ✅ STEP 11: Download Buttons in Colab
from google.colab import files
files.download('fake_news_model.pkl')
files.download('vectorizer.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!pip install huggingface_hub



In [None]:
from huggingface_hub import notebook_login
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from huggingface_hub import HfApi, HfFolder, Repository
from pathlib import Path
import shutil

# Define repo name
repo_name = "fake-news-detector-model"  # or your custom name
model_path = Path(repo_name)
model_path.mkdir(exist_ok=True)

# ✅ Copy your model and vectorizer to the folder
shutil.copy("fake_news_model.pkl", model_path / "fake_news_model.pkl")
shutil.copy("vectorizer.pkl", model_path / "vectorizer.pkl")

PosixPath('fake-news-detector-model/vectorizer.pkl')

In [None]:
from huggingface_hub import create_repo, upload_folder

# Replace 'your-username' with your actual Hugging Face username
create_repo(repo_name, exist_ok=True)

upload_folder(
    folder_path=model_path,
    repo_id="AAFREEN-KAZMI/fake-news-detector-model",  # ✅ Replace with your actual username
    path_in_repo=".",
    repo_type="model"
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

fake_news_model.pkl:   0%|          | 0.00/161k [00:00<?, ?B/s]

vectorizer.pkl:   0%|          | 0.00/180k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/AAFREEN-KAZMI/fake-news-detector-model/commit/2147f228315cf3cfbb5ef704fcd07da780e0e759', commit_message='Upload folder using huggingface_hub', commit_description='', oid='2147f228315cf3cfbb5ef704fcd07da780e0e759', pr_url=None, repo_url=RepoUrl('https://huggingface.co/AAFREEN-KAZMI/fake-news-detector-model', endpoint='https://huggingface.co', repo_type='model', repo_id='AAFREEN-KAZMI/fake-news-detector-model'), pr_revision=None, pr_num=None)

In [1]:
!pip install --upgrade jupyter


Collecting jupyter
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting jupyterlab (from jupyter)
  Downloading jupyterlab-4.4.1-py3-none-any.whl.metadata (16 kB)
Collecting async-lru>=1.0.0 (from jupyterlab->jupyter)
  Downloading async_lru-2.0.5-py3-none-any.whl.metadata (4.5 kB)
Collecting jupyter-lsp>=2.0.0 (from jupyterlab->jupyter)
  Downloading jupyter_lsp-2.2.5-py3-none-any.whl.metadata (1.8 kB)
Collecting jupyter-server<3,>=2.4.0 (from jupyterlab->jupyter)
  Downloading jupyter_server-2.15.0-py3-none-any.whl.metadata (8.4 kB)
Collecting jupyterlab-server<3,>=2.27.1 (from jupyterlab->jupyter)
  Downloading jupyterlab_server-2.27.3-py3-none-any.whl.metadata (5.9 kB)
Collecting jedi>=0.16 (from ipython>=7.23.1->ipykernel->jupyter)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting jupyter-client>=6.1.12 (from ipykernel->jupyter)
  Downloading jupyter_client-7.4.9-py3-none-any.whl.metadata (8.5 kB)
Collecting jupyter-events>=0.11.0 

In [2]:
!pip install --upgrade nbconvert


