In [None]:
# 🏁 1. Install and import dependencies (run once!)
# If using Colab or not installed:
# !pip install autogluon --upgrade

import pandas as pd
from autogluon.tabular import TabularPredictor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report




In [None]:
# 📂 2. Load data
train_path = "data/processed/train.csv"
test_path  = "data/processed/test.csv"

df_train = pd.read_csv(train_path)
df_test  = pd.read_csv(test_path)

print(df_train.shape, df_test.shape)
df_train.head()

# 🏷️ 3. Define label and feature columns
label = "grade_cat"   # or whatever your target is named
if label not in df_train.columns:
    raise ValueError(f"Column '{label}' not found! Double-check your data.")

# 🚀 4. Train AutoGluon with 3+ supervised learning methods
predictor = TabularPredictor(label=label, eval_metric="accuracy", path="models/autogluon/") \
    .fit(df_train, presets="best_quality", time_limit=None)

# 💡 By default, AutoGluon tries RF, XGBoost, LightGBM, CatBoost, KNN, NN (MLP), ExtraTrees, etc.

# 5️⃣ 5. Leaderboard: See all models and scores on validation data
lb = predictor.leaderboard(silent=True)
display(lb)

# 🧪 6. Predict and evaluate on test set
y_true = df_test[label]
y_pred = predictor.predict(df_test)

print("Accuracy: ", accuracy_score(y_true, y_pred))
print("Precision:", precision_score(y_true, y_pred, average="weighted"))
print("Recall:   ", recall_score(y_true, y_pred, average="weighted"))
print("F1-score: ", f1_score(y_true, y_pred, average="weighted"))
print("\nFull classification report:\n")
print(classification_report(y_true, y_pred))

# 🔍 7. (Optional) Feature importance plot
import matplotlib.pyplot as plt
fi = predictor.feature_importance(df_test)
plt.figure(figsize=(8,5))
fi["importance"].head(20).plot(kind="barh")
plt.title("Top Feature Importances")
plt.show()