In [None]:
+import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import pickle

# Load cleaned data
fake_df_clean = pd.read_csv('../data/fake_clean.csv')
true_df_clean = pd.read_csv('../data/true_clean.csv')

# Merge and shuffle
df = pd.concat([fake_df_clean, true_df_clean], ignore_index=True)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Combine title and text for features
X = df['title'] + " " + df['text']
y = df['label']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Vectorize text
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# Save model and vectorizer
with open('../models/model.pkl', 'wb') as f:
    pickle.dump(model, f)
with open('../models/vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

# Save test data for evaluation
X_test.to_csv('../data/X_test.csv', index=False)
y_test.to_csv('../data/y_test.csv', index=False)