In [1]:
# Step 1: Install Required Libraries

In [2]:
!pip install pandas numpy scikit-learn



In [7]:
# Step 2: Import Required Libraries

In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

print("Libraries imported successfully")

Libraries imported successfully


In [9]:
# Step 3: Combine fake and Real News Datasets

In [11]:
import pandas as pd

# Load both datasets
fake = pd.read_csv("Fake.csv")
true = pd.read_csv("True.csv")

# Add labels
fake["label"] = "FAKE"
true["label"] = "REAL"

# Combine both
df = pd.concat([fake, true])
df = df.sample(frac=1).reset_index(drop=True)

# Optional: save combined dataset
df.to_csv("fake-news.csv", index=False)

print("✅ Combined dataset ready")
df.head()

✅ Combined dataset ready


Unnamed: 0,title,text,subject,date,label
0,Turkey says talk of ending its EU accession un...,ISTANBUL (Reuters) - Turkey s European Union A...,worldnews,"September 4, 2017",REAL
1,IRONIC: WHITE SOUTHERNER BILL CLINTON Just Tol...,OOPS! IN 2008 BILL CLINTON SAID THE SAME THING...,politics,"Sep 9, 2016",FAKE
2,Turkey summons U.S. envoy over Washington stre...,ANKARA (Reuters) - Turkey summoned the U.S amb...,politicsNews,"May 22, 2017",REAL
3,#NewOrleans: BLACK PATRIOTS Ready To Fight Ant...,May 7th is likely going to be a day of clashes...,left-news,"May 7, 2017",FAKE
4,Britain wants Zimbabwe to rejoin international...,LONDON (Reuters) - Britain wants Zimbabwe to r...,worldnews,"November 22, 2017",REAL


In [12]:
# Step 4: Preprocess Data & Split into Training & Testing Sets

In [13]:
# Keep only relevant columns
df = df[['text', 'label']]

# Split input and output
x = df['text']
y = df['label']

# Train-test split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

print("✅ Train-test split done")
print("Training data size:", x_train.shape)
print("Testing data size:", x_test.shape)

✅ Train-test split done
Training data size: (35918,)
Testing data size: (8980,)


In [14]:
# Step 5: TF-IDF Vectorization

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Fit and transform the training data
tfidf_train = tfidf_vectorizer.fit_transform(x_train)

# Transform the test data
tfidf_test = tfidf_vectorizer.transform(x_test)

print("✅ TF-IDF vectorization complete")
print("Train vector shape:", tfidf_train.shape)
print("Test vector shape:", tfidf_test.shape)

✅ TF-IDF vectorization complete
Train vector shape: (35918, 111129)
Test vector shape: (8980, 111129)


In [16]:
# Step 6: Train the Model

In [17]:
from sklearn.linear_model import PassiveAggressiveClassifier

# Initialize model
pac = PassiveAggressiveClassifier(max_iter=50)

# Train the model
pac.fit(tfidf_train, y_train)

print("✅ Model training complete")

✅ Model training complete


In [18]:
# Step 7: Check Accuracy and Confusion Matrix

In [19]:
from sklearn.metrics import accuracy_score, confusion_matrix

# Predict on test data
y_pred = pac.predict(tfidf_test)

# Accuracy score
score = accuracy_score(y_test, y_pred)
print(f"✅ Accuracy: {round(score*100, 2)}%")

# Confusion Matrix
confusion = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(confusion)

✅ Accuracy: 99.44%

Confusion Matrix:
[[4707   33]
 [  17 4223]]


In [20]:
# Step 8: Predict Custom News Headline

In [21]:
# Custom news text
custom_news = ["Breaking: NASA finds evidence of aliens on Mars!"]

# Transform using TF-IDF
custom_news_vector = tfidf_vectorizer.transform(custom_news)

# Predict
prediction = pac.predict(custom_news_vector)
print("📰 Prediction for custom news:", prediction[0])

📰 Prediction for custom news: FAKE


In [22]:
# Step 9: Solve the Model and Vectorizer

In [23]:
import joblib

# Save model
joblib.dump(pac, "fake_news_model.pkl")

# Save vectorizer
joblib.dump(tfidf_vectorizer, "tfidf_vectorizer.pkl")

print("✅ Model & vectorizer saved successfully")

✅ Model & vectorizer saved successfully


## Step 10: Project Summary & Conclusion

🔹 This project successfully detects fake news using Machine Learning.

🔹 Dataset: Combined from Fake.csv and True.csv

🔹 Vectorization: TF-IDF used to convert text to numerical format

🔹 Model: PassiveAggressiveClassifier trained and tested

🔹 Accuracy: Around 93-94% achieved on the test set

🔹 Custom news predictions also tested

✅ Project Completed Successfully!