In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3
# import sklearn

# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.linear_model import LinearRegression
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# import tensorflow as tf
# from tensorflow import keras

# import statsmodels.api as sm

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)

In [None]:
conn = sqlite3.connect('NBA-Boxscore-Database.sqlite')

In [None]:
query = 'SELECT * from game_info'
game_info = pd.read_sql(query, con=conn)
game_info

In [None]:
query = 'SELECT * from team_stats'
team_stats = pd.read_sql(query, con=conn)
team_stats

In [None]:
game_ids = list(team_stats['game_id'].unique())

In [None]:
stats_df = team_stats.drop(['MP', 'PM', 'USGp', 'ORtg', 'DRtg', 'BPM'], axis=1)
stats_df

In [None]:
def create_gid_query(game_id, date, lim, team='away'):
    away_gid_query = f'''
    SELECT gi.game_id
    FROM game_info gi
    WHERE (gi.away_team = (SELECT away_team FROM game_info WHERE game_id = '{game_id}')
            AND gi.date < '{date}')
       OR (gi.home_team = (SELECT away_team FROM game_info WHERE game_id = '{game_id}')
            AND gi.date < '{date}')
    ORDER BY gi.date DESC
    LIMIT {lim};
    '''
    
    home_gid_query = f'''
    SELECT gi.game_id
    FROM game_info gi
    WHERE (gi.away_team = (SELECT home_team FROM game_info WHERE game_id = '{game_id}')
            AND gi.date < '{date}')
       OR (gi.home_team = (SELECT home_team FROM game_info WHERE game_id = '{game_id}')
            AND gi.date < '{date}')
    ORDER BY gi.date DESC
    LIMIT {lim};
    '''
    
    if team == 'away':
        return away_gid_query
    elif team == 'home':
        return home_gid_query

In [None]:
def create_stats_df(query, conn, stats, team='away'):
    
    gid_query_df = pd.read_sql(query, con=conn)
    stat_df = gid_query_df.merge(stats)
    
    if team == 'away':
        stat_df = stat_df[stat_df['team'] == away_team].reset_index(drop=True)
        stat_agg = stat_df.loc[:,'FG':'TOVp'].mean()
        stat_agg = stat_agg.to_frame().T
        columns = [f'a_{col}' for col in stat_agg.columns]
        col_name_dict = dict(zip(stat_agg.columns, columns))
        stat_agg.rename(columns=col_name_dict, inplace=True)
        
    elif team =='home':
        stat_df = stat_df[stat_df['team'] == home_team].reset_index(drop=True)
        stat_agg = stat_df.loc[:,'FG':'TOVp'].mean()
        stat_agg = stat_agg.to_frame().T
        columns = [f'h_{col}' for col in stat_agg.columns]
        col_name_dict = dict(zip(stat_agg.columns, columns))
        stat_agg.rename(columns=col_name_dict, inplace=True)
    
    return stat_agg

In [None]:
stats_df = team_stats.drop(['MP', 'PM', 'USGp', 'ORtg', 'DRtg', 'BPM'], axis=1)
final_df_columns = list(game_info.columns) + [f'a_{column}' for column in stats_df.loc[:,'FG':'TOVp']] + [f'h_{column}' for column in stats_df.loc[:,'FG':'TOVp']]
team_stats_full_10 = pd.DataFrame(columns=final_df_columns)

for game_id in game_ids:

    date = game_info[game_info['game_id'] == game_id]['date'].values[0]
    away_team = game_info[game_info['game_id'] == game_id]['away_team'].values[0]
    home_team = game_info[game_info['game_id'] == game_id]['home_team'].values[0]

    away_gid_query = create_gid_query(game_id = game_id, date=date, lim=10, team='away')
    away_stats = create_stats_df(query=away_gid_query, conn=conn, stats=stats_df, team='away')

    home_gid_query = create_gid_query(game_id=game_id, date=date, lim=10, team='home')
    home_stats = create_stats_df(query=home_gid_query, conn=conn, stats=stats_df, team='home')
    
    agg_stats = pd.concat([away_stats, home_stats], axis=1)
    gid_info = game_info[game_info['game_id'] == game_id].reset_index(drop=True)
    
    stats = pd.concat([gid_info, agg_stats], axis=1)
    team_stats_full_10 = team_stats_full_10.append(stats, ignore_index=True)

team_stats_full_10.to_csv('team_stats_full_10.csv')