In [None]:
import pandas as pd
import numpy as np
import nbimporter
from model import make_preds, get_X_and_y
from get_game_stats_data import get_game_stats_data_df

def pred_old_outcomes_pipeline(season_year, target_team_ids=None, target_game_date=None, training_and_testing=False):
    df = get_game_stats_data_df(
        season_year,
        target_team_ids=target_team_ids,
        target_game_date=target_game_date,
        training_and_testing=training_and_testing,
    )

    X, y, home_ids = get_X_and_y(df)

    final_preds, final_acc, final_recall, final_precision, final_f1, final_cm = make_preds(X, y, verbose=False)
    if final_preds is None:
        return (None,) * 6

    outcomes_preds = {home_id: (true_label, pred) for home_id, true_label, pred in zip(home_ids, y, final_preds)}

    return outcomes_preds, final_acc, final_recall, final_precision, final_f1, final_cm

In [None]:
season_year = "2023-24"
outcomes_preds, final_acc, final_recall, final_precision, final_f1, final_cm = pred_old_outcomes_pipeline(
    season_year, [1610612742, 1610612760, 1610612753, 1610612749, 1610612765, 1610612759, 1610612764, 1610612738, 1610612751, 1610612755, 1610612740, 1610612747, 
                  1610612750, 1610612756], "2024-04-14", training_and_testing=True
)
print("-" * 100)
print(outcomes_preds)
print(f"Accuracy: {final_acc:.4f}")
print(f"Recall: {final_recall:.4f}")
print(f"Precision: {final_precision:.4f}")
print(f"F1 Score: {final_f1:.4f}")
print("Confusion matrix:")
print(final_cm)
print("-" * 100)


In [None]:
import matplotlib.pyplot as plt

def trend_line_graph(plot_data, plot_type, season_year):
    x = range(len(plot_data))
    y = plot_data
    plt.scatter(x, y, label="Data Points")
    m, b = np.polyfit(x, y, 1)
    plt.plot(x, m*x + b, color="red", label="Trend Line")
    plt.title(f'{plot_type} Trend Over Time ({season_year} Season)')
    plt.xlabel('Game Days Since First Game')
    plt.ylabel(plot_type)
    plt.legend()
    plt.grid(True)
    plt.savefig(f'ml_imgs/{season_year}_{plot_type.lower()}_trend_line.png')
    plt.show()

def test_model_preds_over_time(season_year):
    df = pd.read_sql_table(f"game_stats_{season_year}", "sqlite:///../database/nba_game_stats.db")
    df = df[df['SEASON_ID'] == f'2{season_year[:season_year.index("-")]}']
    df.sort_values('GAME_DATE', inplace=True)
    game_dates = df['GAME_DATE'].unique()
    
    accs = []
    recalls = []
    precision = []
    f1 = []
    for game_date in game_dates:
        if len(df[df['GAME_DATE'] == game_date]) < 1:
            print(f"Game date {game_date} is empty")
        
        outcomes_preds, final_acc, final_recall, final_precision, final_f1, final_cm = pred_old_outcomes_pipeline(
            season_year, target_game_date=game_date, training_and_testing=True
        )
        
        if outcomes_preds is None:
            print(f"Had no proper data from {game_date}")
        else:
            accs.append(final_acc)
            recalls.append(final_recall)
            precision.append(final_precision)
            f1.append(final_f1)
            print(game_date)
            
    trend_line_graph(accs, "Accuracy", season_year)
    trend_line_graph(recalls, "Recall", season_year)
    trend_line_graph(precision, "Precision", season_year)
    trend_line_graph(f1, "F1", season_year)

In [None]:
season_year = "2024-25"
test_model_preds_over_time(season_year)