In [None]:
# ========================================
# Install required libraries
# ========================================
!pip install -q sentence-transformers scikit-learn google-generativeai

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import pickle
from sentence_transformers import SentenceTransformer

# ========================================
# Load Dataset (Fake + True)
# ========================================
fake = pd.read_csv("Fake.csv")   # Your Fake dataset
true = pd.read_csv("True.csv")   # Your True dataset

fake['label'] = 1  # Fake = 1
true['label'] = 0  # Real = 0

df = pd.concat([fake, true], axis=0).sample(frac=1, random_state=42).reset_index(drop=True)

X = df['text']
y = df['label']

# ========================================
# Train-Test Split
# ========================================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ========================================
# Load BERT Embeddings
# ========================================
bert = SentenceTransformer("all-MiniLM-L6-v2")

X_train_emb = bert.encode(X_train.tolist(), show_progress_bar=True)
X_test_emb = bert.encode(X_test.tolist(), show_progress_bar=True)

# ========================================
# Train Logistic Regression
# ========================================
model = LogisticRegression(max_iter=2000)
model.fit(X_train_emb, y_train)

# ========================================
# Evaluate
# ========================================
y_pred = model.predict(X_test_emb)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ========================================
# Save Model
# ========================================
pickle.dump(model, open("bert_fake_news_model.pkl", "wb"))

print("✅ Model saved as bert_fake_news_model.pkl")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1123 [00:00<?, ?it/s]

Batches:   0%|          | 0/281 [00:00<?, ?it/s]

Accuracy: 0.9579064587973274
              precision    recall  f1-score   support

           0       0.95      0.96      0.96      4270
           1       0.96      0.95      0.96      4710

    accuracy                           0.96      8980
   macro avg       0.96      0.96      0.96      8980
weighted avg       0.96      0.96      0.96      8980

✅ Model saved as bert_fake_news_model.pkl
