In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.8.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.5.1 (from gradio)
  Downloading gradio_client-1.5.1-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [None]:
import joblib
import gradio as gr
import re
import requests
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import nltk
import matplotlib.pyplot as plt

# Download stopwords
nltk.download('stopwords')

# Load pre-trained model and vectorizer
model = joblib.load('fake_news_model.pkl')  # Load your trained model
vectorizer = joblib.load('tfidf_vectorizer.pkl')  # Load your TfidfVectorizer

# Stemming function
ps = PorterStemmer()
def stemming(content):
    content = re.sub('[^a-zA-Z]', ' ', content)
    content = content.lower().split()
    content = [ps.stem(word) for word in content if word not in stopwords.words('english')]
    return ' '.join(content)  # Rejoin words into a single string

# Function to scrape content from a news URL
def fetch_news_content(url):
    try:
        # Send an HTTP GET request to the URL
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Parse HTML content with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract paragraphs (most news articles use <p> tags for content)
        paragraphs = soup.find_all('p')
        news_content = ' '.join([para.get_text() for para in paragraphs])

        return news_content.strip()  # Return the concatenated text
    except Exception as e:
        return f"Error fetching content: {e}"

# Prediction function that integrates scraping and creates a pie chart
def classify_news_from_url(news_url):
    # Fetching news content from the URL
    news_content = fetch_news_content(news_url)

    # Validating fetched content
    if "Error" in news_content or not news_content:
        return "Failed to fetch or process the content. Check the URL.", None

    # Preprocessing the content
    processed_text = stemming(news_content)

    # Predicting using the pre-trained model
    input_data = vectorizer.transform([processed_text])  # Vectorize the preprocessed text
    prediction = model.predict(input_data)[0]  # Predict label
    probabilities = model.predict_proba(input_data)[0]  # Predict probabilities

    # Generate classification result
    result = "Fake News" if prediction == 1 else "Real News"

    # Create a pie chart
    labels = ['Real News', 'Fake News']
    sizes = [probabilities[0], probabilities[1]]
    colors = ['green', 'red']
    explode = (0.1, 0)  # Highlight the first slice (Real News)
    plt.figure(figsize=(5, 5))
    plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140)
    plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

    # Return result and the pie chart
    return result, plt

# Gradio Interface
with gr.Blocks() as iface:
    gr.Markdown(
        """
        # Fake News Detector with Real-Time Web Scraping
        Enter a URL to fetch and classify news as **real** or **fake**. This tool scrapes the article content and uses a pre-trained machine learning model to classify the news.
        """
    )

    with gr.Row():
        with gr.Column(scale=1):

            url_input = gr.Textbox(
                lines=1,
                placeholder="Enter the news article URL here...",
                label="News URL",
                elem_id="news-url"
            )

            submit_button = gr.Button("Submit", elem_id="submit-btn")
            clear_button = gr.Button("Clear", elem_id="clear-btn")


            result_output = gr.Textbox(
                label="Decision",
                placeholder="Result will be displayed here...",
                elem_id="result"
            )

        with gr.Column(scale=2):

            pie_chart = gr.Plot(label="Prediction Breakdown (Pie Chart)")


    submit_button.click(fn=classify_news_from_url, inputs=url_input, outputs=[result_output, pie_chart])


    clear_button.click(fn=lambda: ("", None), inputs=[], outputs=[result_output, pie_chart])

    iface.css = """
        #submit-btn, #clear-btn {
            background-color: green;
            color: white;
        }
        #submit-btn:hover, #clear-btn:hover {
            background-color: darkgreen;
        }
    """

iface.launch()








[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://8d1b8e9d56934846ad.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:

from google.colab import files

# Upload files
uploaded = files.upload()

Saving fake_news_model.pkl to fake_news_model.pkl
Saving tfidf_vectorizer.pkl to tfidf_vectorizer.pkl
