# 🤖 Notebook 3: Model Training
Train ML models (Logistic Regression, Naive Bayes, SVM) for sentiment classification.


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
import joblib

In [2]:
df = pd.read_csv('../data/processed/cleaned_reviews.csv')
X = df['clean_review']
y = df['sentiment']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Save the vectorizer
joblib.dump(vectorizer, '../models/vectorizer.joblib')

['../models/vectorizer.joblib']

In [6]:
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Naive Bayes': MultinomialNB(),
    'SVM': LinearSVC()
}

for name, model in models.items():
    print(f'Training {name}...')
    model.fit(X_train_vec, y_train)
    y_pred = model.predict(X_test_vec)
    acc = accuracy_score(y_test, y_pred)
    print(f'{name} Accuracy: {acc:.4f}')
    joblib.dump(model, f'../models/{name.replace(" ", "_").lower()}.joblib')


Training Logistic Regression...
Logistic Regression Accuracy: 0.8847
Training Naive Bayes...
Naive Bayes Accuracy: 0.8493
Training SVM...
SVM Accuracy: 0.8786
