In [None]:
import pandas as pd
import sqlite3

# Connect to the SQLite database
con = sqlite3.connect("data/nba.sqlite")

# Define relevant season IDs for analysis
seasons = ['22019', '22020', '22021', '22022']

# Question 1: Win-Loss Ratio Difference (Home vs. Away)
print("Question 1: Win-Loss Ratio Difference (Home vs. Away) by Season")
wl_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, wl_home AS wl, season_id FROM game WHERE season_id IN ({', '.join(seasons)})", 
    con
)
wl_home['wl'] = wl_home['wl'].map({'W': 1, 'L': 0})  # Using map to avoid downcasting warning
wl_home_grouped = wl_home.groupby(['team', 'season_id'])['wl'].mean().reset_index(name='home_win_ratio')

wl_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, wl_away AS wl, season_id FROM game WHERE season_id IN ({', '.join(seasons)})", 
    con
)
wl_away['wl'] = wl_away['wl'].map({'W': 1, 'L': 0})  # Using map to avoid downcasting warning
wl_away_grouped = wl_away.groupby(['team', 'season_id'])['wl'].mean().reset_index(name='away_win_ratio')

win_loss_diff = pd.merge(wl_home_grouped, wl_away_grouped, on=['team', 'season_id'])
win_loss_diff['win_loss_diff'] = win_loss_diff['home_win_ratio'] - win_loss_diff['away_win_ratio']
print(win_loss_diff.head(), "\n")  # Display the first few rows

# Question 2: Average Points Difference (Home vs. Away)
print("\nQuestion 2: Average Points Difference (Home vs. Away) by Season")
pts_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, AVG(pts_home) AS avg_pts_home, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_home, season_id", 
    con
)
pts_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, AVG(pts_away) AS avg_pts_away, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_away, season_id", 
    con
)
points_diff = pd.merge(pts_home, pts_away, on=['team', 'season_id'])
points_diff['points_diff'] = points_diff['avg_pts_home'] - points_diff['avg_pts_away']
print(points_diff.head(), "\n")  # Display the first few rows

# Question 3: Rebounds Difference (Offensive and Defensive)
print("\nQuestion 3: Rebounds Difference (Offensive and Defensive) by Season")
reb_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, AVG(oreb_home) AS avg_oreb_home, AVG(dreb_home) AS avg_dreb_home, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_home, season_id", 
    con
)
reb_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, AVG(oreb_away) AS avg_oreb_away, AVG(dreb_away) AS avg_dreb_away, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_away, season_id", 
    con
)
rebounds_diff = pd.merge(reb_home, reb_away, on=['team', 'season_id'])
rebounds_diff['offensive_rebound_diff'] = rebounds_diff['avg_oreb_home'] - rebounds_diff['avg_oreb_away']
rebounds_diff['defensive_rebound_diff'] = rebounds_diff['avg_dreb_home'] - rebounds_diff['avg_dreb_away']
print(rebounds_diff.head(), "\n")  # Display the first few rows

# Question 4: Three-Point Field Goals Percentage Difference (Home vs. Away)
print("\nQuestion 4: Three-Point FG% Difference (Home vs. Away) by Season")
fg3_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, AVG(fg3_pct_home) AS avg_fg3_pct_home, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_home, season_id", 
    con
)
fg3_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, AVG(fg3_pct_away) AS avg_fg3_pct_away, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_away, season_id", 
    con
)
fg3_diff = pd.merge(fg3_home, fg3_away, on=['team', 'season_id'])
fg3_diff['fg3_pct_diff'] = fg3_diff['avg_fg3_pct_home'] - fg3_diff['avg_fg3_pct_away']
print(fg3_diff.head(), "\n")  # Display the first few rows

# Question 5: Free Throw Percentage Difference (Home vs. Away)
print("\nQuestion 5: Free Throw Percentage Difference (Home vs. Away) by Season")
ft_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, AVG(ft_pct_home) AS avg_ft_pct_home, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_home, season_id", 
    con
)
ft_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, AVG(ft_pct_away) AS avg_ft_pct_away, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_away, season_id", 
    con
)
ft_diff = pd.merge(ft_home, ft_away, on=['team', 'season_id'])
ft_diff['ft_pct_diff'] = ft_diff['avg_ft_pct_home'] - ft_diff['avg_ft_pct_away']
print(ft_diff.head(), "\n")  # Display the first few rows
