In [1]:
# STEP 1: Upload ZIP or CSV
from google.colab import files
uploaded = files.upload()


Saving archive (4).zip to archive (4).zip


In [3]:
# STEP 2: Unzip if needed
import zipfile
import os

# Automatically unzip uploaded zip file
for filename in uploaded:
    if filename.endswith(".zip"):
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall()
        print(f"Unzipped: {filename}")
    else:
        print(f"No unzip needed for: {filename}")


Unzipped: archive (4).zip


In [6]:
# STEP 3: Load both True and Fake news CSVs and label them
import pandas as pd

# Load individual CSV files
true_df = pd.read_csv('True.csv')
fake_df = pd.read_csv('Fake.csv')

# Add labels
true_df['label'] = 'REAL'
fake_df['label'] = 'FAKE'

# Combine datasets
df = pd.concat([true_df, fake_df], ignore_index=True)
print("✅ Data combined successfully.")
print(df.head())


✅ Data combined successfully.
                                               title  \
0  As U.S. budget fight looms, Republicans flip t...   
1  U.S. military to accept transgender recruits o...   
2  Senior U.S. Republican senator: 'Let Mr. Muell...   
3  FBI Russia probe helped by Australian diplomat...   
4  Trump wants Postal Service to charge 'much mor...   

                                                text       subject  \
0  WASHINGTON (Reuters) - The head of a conservat...  politicsNews   
1  WASHINGTON (Reuters) - Transgender people will...  politicsNews   
2  WASHINGTON (Reuters) - The special counsel inv...  politicsNews   
3  WASHINGTON (Reuters) - Trump campaign adviser ...  politicsNews   
4  SEATTLE/WASHINGTON (Reuters) - President Donal...  politicsNews   

                 date label  
0  December 31, 2017   REAL  
1  December 29, 2017   REAL  
2  December 31, 2017   REAL  
3  December 30, 2017   REAL  
4  December 29, 2017   REAL  


In [7]:
# STEP 4: Prepare and train the model
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Features and labels
X = df['text']
y = df['label']

# TF-IDF vectorization
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
X_tfidf = tfidf.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("\n✅ Evaluation Results:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))



✅ Evaluation Results:
Accuracy: 0.9854120267260579

Confusion Matrix:
 [[4574   76]
 [  55 4275]]

Classification Report:
               precision    recall  f1-score   support

        FAKE       0.99      0.98      0.99      4650
        REAL       0.98      0.99      0.98      4330

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [None]:
# STEP 5: Try user input
while True:
    news = input("\nEnter news article text (or type 'exit'): ")
    if news.lower() == 'exit':
        break
    news_vec = tfidf.transform([news])
    prediction = model.predict(news_vec)
    print("Prediction:", prediction[0])  # FAKE or REAL




Enter news article text (or type 'exit'): Media Embraced Biden White House's 'Cheap Fakes' Story Until Ill-Fated Debate, New Book Reveals
Prediction: FAKE

Enter news article text (or type 'exit'):  Fake Mexico Video Being Spread as Shamsherganj Incident, Say Cops, Warn of Action
Prediction: FAKE

Enter news article text (or type 'exit'): The Federal Reserve announced a quarter-point interest rate hike on Wednesday, citing strong economic growth and low unemployment as key factors in its decision.
Prediction: REAL
