In [17]:
# Imports
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
import os
# Load preprocessed data
X_train, X_test = pd.read_csv('data/X_train.csv'), pd.read_csv('data/X_test.csv')
y_train, y_test = pd.read_csv('data/y_train.csv'), pd.read_csv('data/y_test.csv')

# Initialize models
logit = LogisticRegression()
mnb = MultinomialNB()
svm = SVC()
rf = RandomForestClassifier()

# Dictionary to store results
models = {'Logistic Regression': logit, 
          'Naive Bayes': mnb,
          'SVM': svm,
          'Random Forest': rf
         }

results = {}

# Train each model and evaluate
for name, model in models.items():
  
  # Train model
  model.fit(X_train, y_train)
  
  # Make predictions
  y_pred = model.predict(X_test)
  
  # Evaluate metrics
  acc = accuracy_score(y_test, y_pred)
  prec = precision_score(y_test, y_pred)
  rec = recall_score(y_test, y_pred)
  f1 = f1_score(y_test, y_pred)
  
  # Store results
  results[name] = {'accuracy': acc, 
                   'precision': prec, 
                   'recall': rec, 
                   'f1': f1}
                   
# Print results  
print(results)

os.makedirs('models', exist_ok=True)
# Save best model 

# Verify models folder exists
if not os.path.exists('models'):
    os.makedirs('models')

# Save best model
best_model = max(results, key=results.get('accuracy')) 
joblib.dump(models[best_model], 'models/best_model.pkl')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


{'Logistic Regression': {'accuracy': 0.979372197309417, 'precision': 0.89375, 'recall': 0.959731543624161, 'f1': 0.9255663430420711}, 'Naive Bayes': {'accuracy': 0.9668161434977578, 'precision': 0.8218390804597702, 'recall': 0.959731543624161, 'f1': 0.8854489164086687}, 'SVM': {'accuracy': 0.957847533632287, 'precision': 0.7965116279069767, 'recall': 0.9194630872483222, 'f1': 0.8535825545171339}, 'Random Forest': {'accuracy': 0.9865470852017937, 'precision': 0.971830985915493, 'recall': 0.9261744966442953, 'f1': 0.9484536082474226}}


['models/best_model.pkl']