In [None]:
# Importing necessary libraries for data handling, machine learning, and Gradio interface


# import pandas as pd
# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score
# import gradio as gr


In [1]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
#  We create our data for this model. we'll create a simple dataset of fake and real news articles.


data = {'news': [
        "The government passed a new healthcare bill.",
        "Aliens have landed on Earth and taken over the White House.",
        "Stock market crashes due to tech sector slump.",
        "Celebrities are secretly immortal and control the world.",
        "Scientists discover a cure for common cold.",
        "New species of fish found in the Pacific Ocean.",
        "NASA plans to send humans to Mars by 2030.",
        "Eating chocolate makes you live longer, studies show.",
        "Government announces tax cuts for small businesses.",
        "Dinosaurs were never extinct, they live underground.",
        "New technology helps blind people see using AI-powered glasses.",
        "Moon landing was faked and filmed in a studio.",
        "Climate change is accelerating faster than expected.",
        "Cows can talk but humans can't understand them.",
        "Breakthrough in quantum computing changes the future of tech."
    ],
    'label': [1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1]} 
     # 1 = Real, 0 = Fake

df = pd.DataFrame(data)
df.head() 
 # Display the first few rows


Unnamed: 0,news,label
0,The government passed a new healthcare bill.,1
1,Aliens have landed on Earth and taken over the...,0
2,Stock market crashes due to tech sector slump.,1
3,Celebrities are secretly immortal and control ...,0
4,Scientists discover a cure for common cold.,1


In [7]:
#  Preprocessing the data. We'll convert the text data into a format that the machine learning model can understand using TF-IDF Vectorization.


# Importing the TF-IDF Vectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

# Converting the news text into numerical features using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
x = vectorizer.fit_transform(df['news'])

# Labels (Real or Fake)
y = df['label']


In [6]:
y


0     1
1     0
2     1
3     0
4     1
5     1
6     1
7     0
8     1
9     0
10    1
11    0
12    1
13    0
14    1
Name: label, dtype: int64

In [8]:

# Splitting the data into training and testing sets. This helps us evaluate how well our model performs on unseen data

# Importing the train_test_split function
from sklearn.model_selection import train_test_split

# Splitting the dataset into training and testing sets (80% training, 20% testing)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


In [9]:

#  Building and training the model. We'll use a simple Logistic Regression model for this task.

# Importing the Logistic Regression model
from sklearn.linear_model import LogisticRegression

# Creating an instance of the model
model = LogisticRegression()

# Training the model on the training data
model.fit(x_train, y_train)


In [11]:
#  evaluate the model to see how well it performs on the test set. We will predict the labels for the test data and calculate the accuracy.


# Importing the accuracy_score function
from sklearn.metrics import accuracy_score

# Making predictions on the test set
y_pred = model.predict(x_test)

# Calculating the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 33.33%


In [12]:
#  To improve accuracy we use different model lets see what accuracy we get

# Importing the SVC (Support Vector Classifier) model
from sklearn.svm import SVC

# Creating an instance of the SVM model
svm_model = SVC(kernel='linear')

# Training the SVM model on the training data
svm_model.fit(x_train, y_train)

# Making predictions on the test set
y_pred_svm = svm_model.predict(x_test)

# Calculating the accuracy of the SVM model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Model Accuracy: {accuracy_svm * 100:.2f}%")


SVM Model Accuracy: 33.33%


In [13]:
# pip install gradio


Collecting gradio
  Downloading gradio-4.44.1-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.19.3 (from gradio)
  Downloading huggingface_hub-0.25.1-py3-none-any.whl.metadata (13 kB)
Collecting importlib-resources<7.0,>=1.3 (from gradio)
  Downloading importlib_resources-6.4.5-py3-none-any.whl.metadata (4.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp312-none-win_amd64.whl.metadata (51 kB)
     --------------------------------------


[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [14]:

#  Building the Gradio interface. This will allow users to input text and receive predictions about whether the news is real or fake.


import gradio as gr

# Function to predict if the news is real or fake
def predict_news(news):
    news_vectorized = vectorizer.transform([news])
    prediction = model.predict(news_vectorized)
    return "Real News" if prediction[0] == 1 else "Fake News"

# Setting up the Gradio interface
interface = gr.Interface(fn=predict_news, 
                         inputs=gr.inputs.Textbox(lines=2, placeholder="Enter news article..."), 
                         outputs="text", 
                         title="Fake News Detection",
                         description="Enter a news article to check if it's Real or Fake.")

# Launching the Gradio interface
interface.launch()



AttributeError: module 'gradio' has no attribute 'inputs'

In [15]:
# Function to predict if the news is real or fake
def predict_news(news):
    news_vectorized = vectorizer.transform([news])
    prediction = model.predict(news_vectorized)
    return "Real News" if prediction[0] == 1 else "Fake News"

# Setting up the Gradio interface
interface = gr.Interface(fn=predict_news, 
                         inputs=gr.Textbox(lines=2, placeholder="Enter news article..."), 
                         outputs=gr.Text(), 
                         title="Fake News Detection",
                         description="Enter a news article to check if it's Real or Fake.")

# Launching the Gradio interface
interface.launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


