In [1]:
pip install pandas scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix


In [3]:
# Load both the Fake and True datasets
fake_df = pd.read_csv('Fake.csv')
real_df = pd.read_csv('True.csv')

# Add label columns to the datasets
fake_df["label"] = "FAKE"
real_df["label"] = "REAL"

# Combine both datasets into one
df = pd.concat([fake_df, real_df], axis=0)

# Shuffle the data to mix fake and real news
df = df.sample(frac=1).reset_index(drop=True)

# Check the data
print(df.head())


                                               title  \
0   Vehemently Anti-Gay Pastor Arrested On 70 Cou...   
1  FLORIDA GOVERNOR GOES OFF ON OBAMA: “The Secon...   
2  Ramadan Abdullah Set Free On Bail After Police...   
3  With Obamacare vote, House Republicans free to...   
4  Iowa governor urges caucus-goers to say 'No' t...   

                                                text       subject  \
0  On May 9, Pastor David Reynolds, formerly of C...          News   
1  Florida Governor Rick Scott is not holding any...     left-news   
2  How is a man with ties to a US based terror or...      politics   
3  WASHINGTON (Reuters) - The Republican-controll...  politicsNews   
4  ALTOONA, Iowa (Reuters) - Iowa’s governor said...  politicsNews   

                date label  
0       May 15, 2016  FAKE  
1       Jun 17, 2016  FAKE  
2       Sep 13, 2017  FAKE  
3       May 5, 2017   REAL  
4  January 19, 2016   REAL  


In [4]:
X = df['text']  # This is the news content
y = df['label']  # This is the label (either 'FAKE' or 'REAL')


In [5]:
# Initialize the TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)

# Fit and transform the training data
X_tfidf = tfidf.fit_transform(X)

# Check the shape of the transformed data
print(X_tfidf.shape)


(44898, 121689)


In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Check the split
print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")


Training data shape: (35918, 121689)
Test data shape: (8980, 121689)


In [7]:
# Initialize and train the model
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(X_train, y_train)


In [8]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Print confusion matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9942093541202672
Confusion Matrix:
 [[4684   27]
 [  25 4244]]


In [9]:
# New test samples
test_samples = [
    "Breaking: Scientists discover water on Mars again!",
    "India's GDP grows 6.5% in the first quarter of 2025.",
    "Aliens found watching IPL final from moon base!",
    "The Finance Minister announces new tax reforms today."
]

# Predict using the trained model
for text in test_samples:
    transformed = tfidf.transform([text])  # Transform the input text to match the TF-IDF format
    result = model.predict(transformed)[0]  # Predict the label
    print(f"Text: {text}\n ➤ Prediction: {result}\n")


Text: Breaking: Scientists discover water on Mars again!
 ➤ Prediction: FAKE

Text: India's GDP grows 6.5% in the first quarter of 2025.
 ➤ Prediction: FAKE

Text: Aliens found watching IPL final from moon base!
 ➤ Prediction: FAKE

Text: The Finance Minister announces new tax reforms today.
 ➤ Prediction: FAKE



In [10]:
pip install --upgrade pip
pip install streamlit --no-cache-dir


SyntaxError: invalid syntax (1013264056.py, line 1)

In [14]:
pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.


In [15]:
pip install streamlit --no-cache-dir


Note: you may need to restart the kernel to use updated packages.


In [16]:
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split

# Load datasets
fake_df = pd.read_csv('Fake.csv')
real_df = pd.read_csv('True.csv')

# Add labels
fake_df['label'] = 'FAKE'
real_df['label'] = 'REAL'

# Combine and shuffle
df = pd.concat([fake_df, real_df])
df = df.sample(frac=1).reset_index(drop=True)

# Prepare data
X = df['text']
y = df['label']

# TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
X_tfidf = tfidf.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Train model
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(X_train, y_train)

# Streamlit UI
st.title("📰 Fake News Detection App")
user_input = st.text_area("Enter a news article or headline:")

if st.button("Predict"):
    input_tfidf = tfidf.transform([user_input])
    prediction = model.predict(input_tfidf)[0]
    st.success(f"🧐 The news is predicted to be: **{prediction}**")


2025-04-16 21:43:46.151 
  command:

    streamlit run C:\Users\anuna\AppData\Local\Programs\Python\Python313\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-04-16 21:43:46.168 Session state does not function when running a script without `streamlit run`


In [18]:
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split

# Load datasets
fake_df = pd.read_csv('Fake.csv')
real_df = pd.read_csv('True.csv')

# Add labels
fake_df['label'] = 'FAKE'
real_df['label'] = 'REAL'

# Combine and shuffle
df = pd.concat([fake_df, real_df])
df = df.sample(frac=1).reset_index(drop=True)

# Prepare data
X = df['text']
y = df['label']

# TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
X_tfidf = tfidf.fit_transform(X)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Train model
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(X_train, y_train)

# Streamlit UI
st.title("📰 Fake News Detection App")
user_input = st.text_area("Enter a news article or headline:")

if st.button("Predict"):
    input_tfidf = tfidf.transform([user_input])
    prediction = model.predict(input_tfidf)[0]
    st.success(f"🧐 The news is predicted to be: **{prediction}**")


