In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

# Load the fake and true datasets
print("Loading dataset...")
df_fake = pd.read_csv('data/fake.csv')
df_true = pd.read_csv('data/true.csv')

# Add labels to each dataset
df_fake['label'] = 1  # 1 for fake news
df_true['label'] = 0  # 0 for true news

# Combine both datasets into a single DataFrame
df = pd.concat([df_fake, df_true], ignore_index=True)
df = df[['text', 'label']]  # We’ll only use the 'text' and 'label' columns

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer(max_features=10000, ngram_range=(1, 2), stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Save the vectorizer for future use
with open('models/fake_news_vectorizer.pkl', 'wb') as f:
    joblib.dump(vectorizer, f)

Loading dataset...


In [9]:
from sklearn.svm import SVC

# Train a Support Vector Machine model
print("Training SVM model...")
model_svm = SVC(kernel='linear', C=1, random_state=42)
model_svm.fit(X_train_tfidf, y_train)

# Save the model
joblib.dump(model_svm, 'models/fake_news_svm_model.pkl')
print("Model training complete.")

# Evaluate the model
print("Evaluating SVM model on the test dataset...")
y_pred_svm = model_svm.predict(X_test_tfidf)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"Accuracy: {accuracy_svm}")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_svm))
print("Classification Report:")
print(classification_report(y_test, y_pred_svm))

Training SVM model...
Model training complete.
Evaluating SVM model on the test dataset...
Accuracy: 0.994097995545657
Confusion Matrix:
[[4227   20]
 [  33 4700]]
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      4247
           1       1.00      0.99      0.99      4733

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [10]:
# Making predictions on new articles
def predict_news(article):
    vectorizer = joblib.load('models/fake_news_vectorizer.pkl')
    model = joblib.load('models/fake_news_model.pkl')
    article_tfidf = vectorizer.transform([article])
    prediction = model.predict(article_tfidf)
    return "Fake News" if prediction[0] == 1 else "Real News"

# Example articles
articles = [
    "President Joe Biden announced a new policy aimed at reducing taxes for middle-income families, sparking widespread debate among economists.",
    "Senator Mitch McConnell claims that last year’s election was rigged and calls for an independent investigation into the results.",
    "Governor Gavin Newsom has introduced a bill to expand healthcare subsidies for lower-income groups, drawing mixed reactions from both parties.",
    "Reports suggest that Prime Minister Boris Johnson secretly negotiated a deal with alien species to secure resources for the UK.",
    "Opposition leader Alexandria Ocasio-Cortez is organizing a rally to protest recent changes to immigration laws passed by the ruling party, which is supposedly funded by foreign governments.",
    "Intelligence officials allege that foreign agents influenced the election, but a leaked memo claims the entire story is fabricated by secret government agencies."
]


print("Classifying example articles...")
for article in articles:
    label = predict_news(article)
    print(f"Article: {article}\nPrediction: {label}\n")

Classifying example articles...
Article: President Joe Biden announced a new policy aimed at reducing taxes for middle-income families, sparking widespread debate among economists.
Prediction: Fake News

Article: Senator Mitch McConnell claims that last year’s election was rigged and calls for an independent investigation into the results.
Prediction: Fake News

Article: Governor Gavin Newsom has introduced a bill to expand healthcare subsidies for lower-income groups, drawing mixed reactions from both parties.
Prediction: Real News

Article: Reports suggest that Prime Minister Boris Johnson secretly negotiated a deal with alien species to secure resources for the UK.
Prediction: Fake News

Article: Opposition leader Alexandria Ocasio-Cortez is organizing a rally to protest recent changes to immigration laws passed by the ruling party, which is supposedly funded by foreign governments.
Prediction: Fake News

Article: Intelligence officials allege that foreign agents influenced the elec