In [None]:
# SIMPLE MODELS 

In [None]:
import numpy as np
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

# ========== STEP 1: PREPROCESSING ==========
# Load your dataset (Assuming X and y are already defined)


# ========== STEP 3: MODEL TRAINING ==========
models = {
    "Random Forest": RandomForestClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
    "Support Vector Machine": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "AdaBoost": AdaBoostClassifier()
}

best_model = None
best_score = 0
best_model_name = ""
results = {}

for name, model in models.items():
    print(f"\nTraining {name}...")
    
    model.fit(X_train, y_train)
    
    # Predict on test data
    y_pred = model.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    print(f"{name} Accuracy: {accuracy:.4f}")
    print(f"Classification Report:\n{report}")
    
    # Save results
    results[name] = {
        "model": model,
        "accuracy": accuracy,
        "classification_report": report
    }
    
    # Track the best model
    if accuracy > best_score:
        best_score = accuracy
        best_model = model
        best_model_name = name

# ========== STEP 4: CONFUSION MATRIX FOR BEST MODEL ==========
print(f"\nBest Model: {best_model_name} with Accuracy {best_score:.4f}")
y_pred_best = best_model.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred_best)

# Plot the confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', linewidths=1, linecolor='black')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title(f'Confusion Matrix for Best Model: {best_model_name}')
plt.show()

# ========== STEP 5: SAVE BEST MODEL ==========
with open("best_model.pkl", "wb") as f:
    pickle.dump(best_model, f)

print(f"\nBest Model Saved: {best_model_name} with Accuracy {best_score:.4f}")
