In [1]:
from team_functions import dataPrep, getTeamRollingSeason, mapTeamID

In [2]:
import pandas as pd
import os

In [3]:
teams_df = pd.DataFrame([])
current_df = pd.DataFrame([])
for team_file in os.listdir('data/TeamsPrep'):
    team_id = team_file[:3]
    
    for year in [2020, 2021, 2022, 2023]:
        team_df = pd.read_excel(f'data/TeamsPrep/{team_file}', str(year))

        team_df['TeamID'] = mapTeamID(team_id)
        team_df['OppID'] = team_df['OppID'].apply(lambda x: mapTeamID(x))
        team_df['Season'] = year

        teams_df = pd.concat([teams_df, team_df])
        
    team_df = pd.read_excel(f'data/CurrentSeason/{team_file}')
    
    roll_df = getTeamRollingSeason(team_df)
    prep_df = dataPrep(roll_df, 'Games')
    
    prep_df['TeamID'] = mapTeamID(team_id)
    prep_df['OppID'] = prep_df['OppID'].apply(lambda x: mapTeamID(x))
    prep_df['Season'] = 2024
    
    current_df = pd.concat([current_df, prep_df])
    
teams_df = teams_df.drop(columns=['Date']).reset_index(drop=True)
current_df = current_df.drop(columns=['Date']).reset_index(drop=True)

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X = teams_df.drop(columns=['Target'])
y = teams_df['Target']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)

X_test = current_df.drop(columns=['Target'])
y_test = current_df['Target']

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [8]:
from sklearn.model_selection import GridSearchCV

In [9]:
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf'],
    'gamma': ['scale', 'auto'],
    'degree': [2, 3, 4],
    'class_weight': [None, 'balanced']
}

In [16]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [12]:
grid_search = GridSearchCV(estimator=SVC(), param_grid=param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

In [13]:
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

Best Parameters: {'C': 0.1, 'class_weight': 'balanced', 'degree': 2, 'gamma': 'auto', 'kernel': 'rbf'}
Best Score: 0.589825973268239


In [14]:
best_svm_classifier = SVC(**best_params)
best_svm_classifier.fit(X_train, y_train)

In [17]:
y_val_pred = best_svm_classifier.predict(X_val)
print('Report:\n', classification_report(y_val, y_val_pred))
print('Confusion Matrix:\n', confusion_matrix(y_val, y_val_pred))

Report:
               precision    recall  f1-score   support

           0       0.61      0.60      0.60       914
           1       0.60      0.61      0.61       910

    accuracy                           0.60      1824
   macro avg       0.60      0.60      0.60      1824
weighted avg       0.60      0.60      0.60      1824

Confusion Matrix:
 [[548 366]
 [355 555]]


In [19]:
y_test_pred = best_svm_classifier.predict(X_test)
print('Report:\n', classification_report(y_test, y_test_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_test_pred))

Report:
               precision    recall  f1-score   support

           0       0.68      0.35      0.46       686
           1       0.56      0.84      0.67       686

    accuracy                           0.59      1372
   macro avg       0.62      0.59      0.57      1372
weighted avg       0.62      0.59      0.57      1372

Confusion Matrix:
 [[240 446]
 [111 575]]
