In [1]:
#FINAL PROJECT 1
#Fake News Detection System

#  STEP 1: Upload files
from google.colab import files
uploaded = files.upload()

#  STEP 2: Import Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

#  STEP 3: Load Data
fake_df = pd.read_csv('Fake.csv')
true_df = pd.read_csv('True.csv')

#  STEP 4: Label the data
fake_df['label'] = 0  # Fake
true_df['label'] = 1  # True

#  STEP 5: Combine & Shuffle
df = pd.concat([fake_df, true_df])
df = df.sample(frac=1).reset_index(drop=True)

#  STEP 6: Check
print(df.shape)
print(df['label'].value_counts())

#  STEP 7: Split Features and Labels
X = df['text']
y = df['label']

#  STEP 8: Vectorize Text
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
X_vectorized = tfidf.fit_transform(X)

print(X_vectorized.shape)
print(y.shape)

#  STEP 9: Train/Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

#  STEP 10: Model Selection (Naive Bayes for example)
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
model.fit(X_train, y_train)

#  STEP 11: Predictions
y_pred = model.predict(X_test)

#  STEP 12: Evaluation
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

#  STEP 13: Visualize Confusion Matrix
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


KeyboardInterrupt: 