<a href="https://colab.research.google.com/github/patsoong/CS506FinalProject/blob/main/SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [36]:
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd
import numpy as np

features_df = pd.read_csv("team_season_features_v2_clean-2.csv")

num_cols = features_df.select_dtypes(include="number").columns.tolist() #remove features not to be used in training data
for col in ["champion", "season"]:
    if col in num_cols:
        num_cols.remove(col)

X = features_df[num_cols].copy()
X = X.replace([np.inf, -np.inf], np.nan)
y = features_df["champion"].astype(int)

#temporal split
train_mask = features_df["season"] <= 2015
X_train, X_test = X[train_mask], X[~train_mask]
y_train, y_test = y[train_mask], y[~train_mask]

id_test = features_df.loc[~train_mask, ["season", "team", "champion"]].copy()

svm_base = SVC(
    kernel="rbf",
    C=6.5,
    gamma="scale",
    class_weight="balanced",
    probability=False,
    random_state=42
)

svm_cal = CalibratedClassifierCV(
    svm_base,
    method="sigmoid",
    cv=3
)

svm_model = Pipeline([
    ("impute", SimpleImputer(strategy="median")),
    ("scale", StandardScaler(with_mean=True, with_std=True)),
    ("clf", svm_cal),
])

# svm_model.fit(X_train, y_train)
# svm_probs = svm_model.predict_proba(X_test)[:, 1]

param_grid = {
    "clf__estimator__C": [1, 3, 6, 10, 20],
    "clf__estimator__gamma": ["scale", "auto", 0.1, 0.01],
    "clf__estimator__class_weight": [None, "balanced"]
}


grid = GridSearchCV(
    svm_model,
    param_grid,
    scoring="average_precision",   # best for your ranking / top-1 goal
    cv=5,
    n_jobs=-1,
    verbose=1
)

grid.fit(X_train, y_train)

print("Best parameters:", grid.best_params_)
svm_best = grid.best_estimator_

svm_probs = svm_best.predict_proba(X_test)[:, 1]

print("SVM ROC-AUC:", roc_auc_score(y_test, svm_probs))
print("SVM Average Precision (PR-AUC):", average_precision_score(y_test, svm_probs))

svm_pred_labels = (svm_probs > 0.5).astype(int)
print("SVM Binary Accuracy:", accuracy_score(y_test, svm_pred_labels))

id_test["svm_proba_win"] = svm_probs

idx = id_test.groupby("season")["svm_proba_win"].idxmax()

svm_predicted_champs = (
    id_test.loc[idx, ["season", "team", "svm_proba_win"]]
           .rename(columns={"team": "team_pred", "svm_proba_win": "pred_prob"})
           .reset_index(drop=True)
)

true_champs = (
    id_test.loc[id_test["champion"] == 1, ["season", "team"]]
           .rename(columns={"team": "team_true"})
           .reset_index(drop=True)
)

svm_eval = svm_predicted_champs.merge(true_champs, on="season", how="left")
svm_eval["correct"] = (svm_eval["team_pred"] == svm_eval["team_true"]).astype(int)

print("SVM Top-1 accuracy:", svm_eval["correct"].mean())
print("\nPredicted vs. True Champions by Season (SVM):")
print(svm_eval.sort_values("season"))

Fitting 5 folds for each of 40 candidates, totalling 200 fits
Best parameters: {'clf__estimator__C': 3, 'clf__estimator__class_weight': None, 'clf__estimator__gamma': 'auto'}
SVM ROC-AUC: 0.9748275862068966
SVM Average Precision (PR-AUC): 0.5162106352838619
SVM Binary Accuracy: 0.97
SVM Top-1 accuracy: 0.7

Predicted vs. True Champions by Season (SVM):
   season team_pred  pred_prob  team_true  correct
0    2016  Warriors   0.798561  Cavaliers        0
1    2017  Warriors   0.726183   Warriors        1
2    2018  Warriors   0.305195   Warriors        1
3    2019  Warriors   0.228243    Raptors        0
4    2020     Bucks   0.117201     Lakers        0
5    2021     Bucks   0.110546      Bucks        1
6    2022  Warriors   0.201904   Warriors        1
7    2023   Nuggets   0.094379    Nuggets        1
8    2024   Celtics   0.526774    Celtics        1
9    2025   Thunder   0.296330    Thunder        1
