In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import joblib

In [None]:
def load_data(path="TsFake/data"):
    ld = pd.read_csv(path)
    print(ld.head())

In [None]:
# Step 3. Select features/labels
X = df['text'].fillna("")   # news content
y = df['label']             # 0 = REAL, 1 = FAKE (depends on dataset)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# Step 4. TF-IDF vectorization
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = tfidf.fit_transform(X_train)
tfidf_test = tfidf.transform(X_test)


In [None]:
# Step 5. Train model
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

# Save model + vectorizer
joblib.dump(pac, "../models/fake_news_pac.pkl")
joblib.dump(tfidf, "../models/tfidf.pkl")


In [None]:
# Step 6. Evaluate
y_pred = pac.predict(tfidf_test)

print("Accuracy:", round(accuracy_score(y_test, y_pred)*100, 2), "%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['REAL','FAKE'],
            yticklabels=['REAL','FAKE'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


In [None]:
# Step 7. Simple prediction function (useful later for the app)
def predict_news(text, model_path="../models/fake_news_pac.pkl", vec_path="../models/tfidf.pkl"):
    model = joblib.load(model_path)
    vectorizer = joblib.load(vec_path)
    text_tfidf = vectorizer.transform([text])
    return model.predict(text_tfidf)[0]

print(predict_news("Breaking: Aliens land on Earth!"))