In [49]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import os
import joblib

In [50]:
data = pd.read_csv('../../app/result.csv')

In [51]:
def get_dir_path(file):
    # Get the current working directory
    cwd = os.getcwd()

    # Create the full path to the CSV file
    return os.path.join(cwd, "../../app", file) 

In [52]:
def load_model():
    try:
        model_path = get_dir_path('predict_model.pkl')
        model = joblib.load(model_path)
        return model
    except Exception as e:
        print(f"Error loading the model: {e}")
        return None
        
def load_scaler():
    try:
        scaler_path = get_dir_path('model_scaler.pkl')
        scaler = joblib.load(scaler_path)
        return scaler
    except Exception as e:
        print(f"Error loading the scaler: {e}")
        return None

In [53]:
# Define the features - include team identifiers
# Define stats that will be unavailable when trying to make predictions on the future
unknown = ['total', 'total_opp', 'won', 'home_pts_scored', 'away_pts_scored', 'home_pts_allowed', 'away_pts_allowed']

features = data.drop(columns=['spread', 'date', 'season'] + unknown)
# Identify numerical columns for scaling
numerical_columns = features.select_dtypes(include=['float64', 'int64']).columns

model = load_model()
scaler = load_scaler()

In [18]:
def get_recent_performance_stats(team, team_type, data):
    if team_type == 'home':
        team_data = data[data['home_team'] == team]
        team_data = team_data.select_dtypes(include=['float64', 'int64'])
        recent_stats = team_data.iloc[-1]
        recent_stats = recent_stats[[col for col in recent_stats.index if 'opp' not in col and 'away' not in col]]
    elif team_type == 'away':
        team_data = data[data['away_team'] == team]
        team_data = team_data.select_dtypes(include=['float64', 'int64'])
        recent_stats = team_data.iloc[-1]
        recent_stats = recent_stats[[col for col in recent_stats.index if 'opp' in col or 'away' in col]]
    else:
        raise ValueError("team_type must be 'home' or 'away'")
    
    if team_data.empty:
        raise ValueError(f"No recent performance stats found for team {team} as {team_type}.")
    
    #print(recent_stats.columns)
    return recent_stats

In [43]:
def prepare_features_for_prediction(home_team, away_team, data, scaler, numerical_columns):
    try:
        home_stats = get_recent_performance_stats(home_team, 'home', data)
        away_stats = get_recent_performance_stats(away_team, 'away', data)
    except ValueError as e:
        st.error(f"Error in getting recent performance stats: {e}")
        return None
    
    # Concatenate the stats
    feature_vector = pd.concat([home_stats, away_stats]).to_frame().T
    return feature_vector
    # feature_vector = feature_vector[numerical_columns]
    
    # # Scale the feature vector
    # feature_vector_scaled = scaler.transform(feature_vector)
    
    # return feature_vector_scaled

In [44]:
home_team = 'LAL'
away_team = 'GSW'

In [57]:
prepared_cols = list(prepare_features_for_prediction(home_team, away_team, features, scaler, numerical_columns).columns)

['fg', 'fga', 'fgpct', '3p', '3pa', '3ppct', 'ft', 'fta', 'ftpct', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'tspct', 'efgpct', '3par', 'ftr', 'orbpct', 'drbpct', 'trbpct', 'astpct', 'stlpct', 'blkpct', 'tovpct', 'ortg', 'drtg', 'fg_max', 'fga_max', 'fgpct_max', '3p_max', '3pa_max', '3ppct_max', 'ft_max', 'fta_max', 'ftpct_max', 'orb_max', 'drb_max', 'trb_max', 'ast_max', 'stl_max', 'blk_max', 'tov_max', 'pf_max', 'pts_max', '+/-_max', 'tspct_max', 'efgpct_max', '3par_max', 'ftr_max', 'orbpct_max', 'drbpct_max', 'trbpct_max', 'astpct_max', 'stlpct_max', 'blkpct_max', 'tovpct_max', 'usgpct_max', 'ortg_max', 'drtg_max', 'home', 'home_avg_pts_scored', 'home_avg_pts_allowed', 'home_rolling_avg_fg', 'home_rolling_avg_fga', 'home_rolling_avg_3p', 'home_rolling_avg_3pa', 'home_rolling_avg_ft', 'home_rolling_avg_fta', 'home_rolling_avg_ast', 'home_rolling_avg_trb', 'home_rolling_avg_home_pts_scored', 'home_rolling_avg_home_pts_allowed', 'fg_opp', 'fga_opp', 'fgpct_opp', '3p

In [55]:
cols = ['fg', 'fga', 'fgpct', '3p', '3pa', '3ppct', 'ft', 'fta', 'ftpct', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'tspct', 'efgpct', '3par', 'ftr', 'orbpct', 'drbpct', 'trbpct', 'astpct', 'stlpct', 'blkpct', 'tovpct', 'ortg', 'drtg', 'fg_max', 'fga_max', 'fgpct_max', '3p_max', '3pa_max', '3ppct_max', 'ft_max', 'fta_max', 'ftpct_max', 'orb_max', 'drb_max', 'trb_max', 'ast_max', 'stl_max', 'blk_max', 'tov_max', 'pf_max', 'pts_max', '+/-_max', 'tspct_max', 'efgpct_max', '3par_max', 'ftr_max', 'orbpct_max', 'drbpct_max', 'trbpct_max', 'astpct_max', 'stlpct_max', 'blkpct_max', 'tovpct_max', 'usgpct_max', 'ortg_max', 'drtg_max', 'home', 'fg_opp', 'fga_opp', 'fgpct_opp', '3p_opp', '3pa_opp', '3ppct_opp', 'ft_opp', 'fta_opp', 'ftpct_opp', 'orb_opp', 'drb_opp', 'trb_opp', 'ast_opp', 'stl_opp', 'blk_opp', 'tov_opp', 'pf_opp', 'pts_opp', 'tspct_opp', 'efgpct_opp', '3par_opp', 'ftr_opp', 'orbpct_opp', 'drbpct_opp', 'trbpct_opp', 'astpct_opp', 'stlpct_opp', 'blkpct_opp', 'tovpct_opp', 'ortg_opp', 'drtg_opp', 'fg_max_opp', 'fga_max_opp', 'fgpct_max_opp', '3p_max_opp', '3pa_max_opp', '3ppct_max_opp', 'ft_max_opp', 'fta_max_opp', 'ftpct_max_opp', 'orb_max_opp', 'drb_max_opp', 'trb_max_opp', 'ast_max_opp', 'stl_max_opp', 'blk_max_opp', 'tov_max_opp', 'pf_max_opp', 'pts_max_opp', '+/-_max_opp', 'tspct_max_opp', 'efgpct_max_opp', '3par_max_opp', 'ftr_max_opp', 'orbpct_max_opp', 'drbpct_max_opp', 'trbpct_max_opp', 'astpct_max_opp', 'stlpct_max_opp', 'blkpct_max_opp', 'tovpct_max_opp', 'usgpct_max_opp', 'ortg_max_opp', 'drtg_max_opp', 'home_opp', 'home_avg_pts_scored', 'home_avg_pts_allowed', 'away_avg_pts_scored', 'away_avg_pts_allowed', 'home_rolling_avg_fg', 'home_rolling_avg_fga', 'home_rolling_avg_3p', 'home_rolling_avg_3pa', 'home_rolling_avg_ft', 'home_rolling_avg_fta', 'home_rolling_avg_ast', 'home_rolling_avg_trb', 'home_rolling_avg_home_pts_scored', 'home_rolling_avg_home_pts_allowed', 'away_rolling_avg_fg_opp', 'away_rolling_avg_fga_opp', 'away_rolling_avg_3p_opp', 'away_rolling_avg_3pa_opp', 'away_rolling_avg_ft_opp', 'away_rolling_avg_fta_opp', 'away_rolling_avg_ast_opp', 'away_rolling_avg_trb_opp', 'away_rolling_avg_away_pts_scored', 'away_rolling_avg_away_pts_allowed']

In [56]:
# Convert lists to sets
set_cols = set(cols)
set_prepared_cols = set(prepared_cols)

# Find items not shared between the two sets
not_in_cols = set_prepared_cols - set_cols
not_in_prepared_cols = set_cols - set_prepared_cols

# Combine the results
not_shared = not_in_cols.union(not_in_prepared_cols)

# Print the results
print(f"Items in prepared_cols but not in cols: {not_in_cols}")
print(f"Items in cols but not in prepared_cols: {not_in_prepared_cols}")
print(f"Items not shared between the two lists: {not_shared}")

Items in prepared_cols but not in cols: set()
Items in cols but not in prepared_cols: set()
Items not shared between the two lists: set()
