In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("✅ Libraries imported successfully!")

✅ Libraries imported successfully!


In [4]:
url = ("fake_news_datset.csv")
data = pd.read_csv(url)
print("✅ Dataset loaded successfully!")



✅ Dataset loaded successfully!


In [5]:
data = data.dropna()

# Features and labels

X = data['text']
y = data['label']

# Split into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("✅ Data prepared for training!")


✅ Data prepared for training!


In [6]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)
print("✅ Text converted to numerical vectors!")

✅ Text converted to numerical vectors!


In [7]:
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(tfidf_train, y_train)
print("✅ Model trained successfully!")

✅ Model trained successfully!


In [11]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

y_pred = model.predict(tfidf_test)
acc = accuracy_score(y_test, y_pred)

print(f"✅ Accuracy: {round(acc * 100, 2)}%")
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=0))



✅ Accuracy: 50.0%

Confusion Matrix:
 [[0 1]
 [0 1]]

Classification Report:
               precision    recall  f1-score   support

        FAKE       0.00      0.00      0.00         1
        REAL       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



In [12]:
sample_news = [
"NASA discovers new habitable planet beyond our solar system!",
"Celebrity says earth is flat and moon landing was fake."
]

sample_features = tfidf_vectorizer.transform(sample_news)
predictions = model.predict(sample_features)

for news, label in zip(sample_news, predictions):
      result = "✅ REAL NEWS" if label == 'REAL' else "🚫 FAKE NEWS"
print(f"\n📰 '{news}' → {result}")


📰 'Celebrity says earth is flat and moon landing was fake.' → 🚫 FAKE NEWS
