**Issue 5**

Business Objective: What was the impact of the COVID-19 pandemic on the level of home court advantage?

Question 1: How has the difference in win-loss ratio between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)

Question 2: How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)

Question 3: How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)

Question 4: How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)

Question 5: How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game table)

In [1]:
import pandas as pd
import sqlite3

con = sqlite3.connect(r"C:\Users\Ishaan\Documents\GitHub\UMD-INST627-Fall2024\data\nba.sqlite")
games = pd.read_sql_query("SELECT * FROM game LIMIT 100", con)
active_players = pd.read_sql_query("SELECT full_name FROM player WHERE is_active=TRUE", con)
# Define relevant season IDs for analysis
seasons = ['22019', '22020', '22021', '22022']

Question 1:How has the difference in win-loss ratio between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)

In [2]:
# Win-Loss Ratio for Home teams
wl_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, wl_home AS wl, season_id FROM game WHERE season_id IN ({', '.join(seasons)})",
    con
)
wl_home['wl'] = wl_home['wl'].map({'W': 1, 'L': 0})

# Descriptive statistics for home win-loss ratio
wl_home_stats = wl_home.groupby('team')['wl'].describe()
print(wl_home_stats)

# Win-Loss Ratio for Away teams
wl_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, wl_away AS wl, season_id FROM game WHERE season_id IN ({', '.join(seasons)})",
    con
)
wl_away['wl'] = wl_away['wl'].map({'W': 1, 'L': 0})

# Descriptive statistics for away win-loss ratio
wl_away_stats = wl_away.groupby('team')['wl'].describe()
print(wl_away_stats)

# Merging and calculating win-loss difference
wl_home_grouped = wl_home.groupby(['team', 'season_id'])['wl'].mean().reset_index(name='home_win_ratio')
wl_away_grouped = wl_away.groupby(['team', 'season_id'])['wl'].mean().reset_index(name='away_win_ratio')
win_loss_diff = pd.merge(wl_home_grouped, wl_away_grouped, on=['team', 'season_id'])
win_loss_diff['win_loss_diff'] = win_loss_diff['home_win_ratio'] - win_loss_diff['away_win_ratio']
print(win_loss_diff.head())

                        count      mean       std  min  25%  50%  75%  max
team                                                                      
Atlanta Hawks           152.0  0.592105  0.493068  0.0  0.0  1.0  1.0  1.0
Boston Celtics          154.0  0.694805  0.461992  0.0  0.0  1.0  1.0  1.0
Brooklyn Nets           154.0  0.590909  0.493270  0.0  0.0  1.0  1.0  1.0
Charlotte Hornets       149.0  0.422819  0.495673  0.0  0.0  0.0  1.0  1.0
Chicago Bulls           152.0  0.513158  0.501479  0.0  0.0  1.0  1.0  1.0
Cleveland Cavaliers     154.0  0.519481  0.501250  0.0  0.0  1.0  1.0  1.0
Dallas Mavericks        156.0  0.596154  0.492248  0.0  0.0  1.0  1.0  1.0
Denver Nuggets          155.0  0.696774  0.461142  0.0  0.0  1.0  1.0  1.0
Detroit Pistons         151.0  0.304636  0.461785  0.0  0.0  0.0  1.0  1.0
Golden State Warriors   152.0  0.638158  0.482122  0.0  0.0  1.0  1.0  1.0
Houston Rockets         154.0  0.376623  0.486120  0.0  0.0  0.0  1.0  1.0
Indiana Pacers          1

Question 2:How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)

In [4]:
# Average Points for Home teams
pts_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, AVG(pts_home) AS avg_pts_home, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_home, season_id",
    con
)

# Descriptive statistics for home points
pts_home_stats = pts_home.groupby('team')['avg_pts_home'].describe()
print(pts_home_stats)

# Average Points for Away teams
pts_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, AVG(pts_away) AS avg_pts_away, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_away, season_id",
    con
)

# Descriptive statistics for away points
pts_away_stats = pts_away.groupby('team')['avg_pts_away'].describe()
print(pts_away_stats)

# Merging and calculating points difference
points_diff = pd.merge(pts_home, pts_away, on=['team', 'season_id'])
points_diff['points_diff'] = points_diff['avg_pts_home'] - points_diff['avg_pts_away']
print(points_diff.head())



                        count        mean       std         min         25%  \
team                                                                          
Atlanta Hawks             4.0  116.574396  2.210153  114.588235  115.209559   
Boston Celtics            4.0  115.414295  4.092806  110.756098  113.251524   
Brooklyn Nets             4.0  113.414465  3.127240  110.439024  111.901423   
Charlotte Hornets         4.0  108.830503  3.787392  104.225806  107.348118   
Chicago Bulls             4.0  110.951498  3.905192  106.117647  108.612745   
Cleveland Cavaliers       4.0  109.317920  2.606489  107.472222  107.922934   
Dallas Mavericks          4.0  113.124447  4.202402  108.365854  110.383130   
Denver Nuggets            4.0  115.772976  3.228622  111.837838  114.044825   
Detroit Pistons           4.0  107.944152  3.530619  105.073171  105.080793   
Golden State Warriors     4.0  114.016699  5.437272  106.970588  111.492647   
Houston Rockets           4.0  111.781843  4.634299 

Question 3: How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)

In [5]:
# Offensive and Defensive Rebounds for Home teams
reb_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, AVG(oreb_home) AS avg_oreb_home, AVG(dreb_home) AS avg_dreb_home, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_home, season_id",
    con
)

# Descriptive statistics for home rebounds
reb_home_stats = reb_home[['avg_oreb_home', 'avg_dreb_home']].describe()
print(reb_home_stats)

# Offensive and Defensive Rebounds for Away teams
reb_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, AVG(oreb_away) AS avg_oreb_away, AVG(dreb_away) AS avg_dreb_away, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_away, season_id",
    con
)

# Descriptive statistics for away rebounds
reb_away_stats = reb_away[['avg_oreb_away', 'avg_dreb_away']].describe()
print(reb_away_stats)

# Merging and calculating rebound differences
rebounds_diff = pd.merge(reb_home, reb_away, on=['team', 'season_id'])
rebounds_diff['offensive_rebound_diff'] = rebounds_diff['avg_oreb_home'] - rebounds_diff['avg_oreb_away']
rebounds_diff['defensive_rebound_diff'] = rebounds_diff['avg_dreb_home'] - rebounds_diff['avg_dreb_away']
print(rebounds_diff.head())


       avg_oreb_home  avg_dreb_home
count     120.000000     120.000000
mean       10.251354      34.469807
std         1.172462       1.840411
min         8.219512      30.682927
25%         9.341463      33.195122
50%        10.212697      34.475610
75%        10.975610      35.497798
max        13.536585      42.228571
       avg_oreb_away  avg_dreb_away
count     120.000000     120.000000
mean       10.092371      33.678377
std         1.203415       1.783909
min         7.024390      29.560976
25%         9.388889      32.453622
50%        10.069444      33.634824
75%        10.642276      34.786036
max        15.000000      42.236842
             team  avg_oreb_home  avg_dreb_home season_id  avg_oreb_away  \
0   Atlanta Hawks      10.205882      34.323529     22019       9.515152   
1   Atlanta Hawks      10.388889      36.527778     22020      10.722222   
2   Atlanta Hawks       9.341463      34.268293     22021      10.731707   
3   Atlanta Hawks      11.536585      34.439024 

Questions 4:How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)

In [6]:
# Three-Point Field Goal Percentage for Home teams
fg3_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, AVG(fg3_pct_home) AS avg_fg3_pct_home, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_home, season_id",
    con
)

# Descriptive statistics for home FG3 percentage
fg3_home_stats = fg3_home.groupby('team')['avg_fg3_pct_home'].describe()
print(fg3_home_stats)

# Three-Point Field Goal Percentage for Away teams
fg3_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, AVG(fg3_pct_away) AS avg_fg3_pct_away, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_away, season_id",
    con
)

# Descriptive statistics for away FG3 percentage
fg3_away_stats = fg3_away.groupby('team')['avg_fg3_pct_away'].describe()
print(fg3_away_stats)

# Merging and calculating FG3 percentage difference
fg3_diff = pd.merge(fg3_home, fg3_away, on=['team', 'season_id'])
fg3_diff['fg3_pct_diff'] = fg3_diff['avg_fg3_pct_home'] - fg3_diff['avg_fg3_pct_away']
print(fg3_diff.head())


                        count      mean       std       min       25%  \
team                                                                    
Atlanta Hawks             4.0  0.363417  0.020049  0.344735  0.346763   
Boston Celtics            4.0  0.371866  0.013642  0.351439  0.371232   
Brooklyn Nets             4.0  0.367319  0.026018  0.332171  0.355418   
Charlotte Hornets         4.0  0.352940  0.011956  0.339439  0.347451   
Chicago Bulls             4.0  0.364200  0.010320  0.349971  0.360310   
Cleveland Cavaliers       4.0  0.363784  0.003751  0.358171  0.363622   
Dallas Mavericks          4.0  0.361152  0.007601  0.354583  0.354932   
Denver Nuggets            4.0  0.365997  0.020973  0.344757  0.349970   
Detroit Pistons           4.0  0.351684  0.023413  0.320585  0.344021   
Golden State Warriors     4.0  0.370848  0.023565  0.337971  0.362048   
Houston Rockets           4.0  0.336623  0.017474  0.324694  0.325161   
Indiana Pacers            4.0  0.354619  0.008435  

Question 5: How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game tabl

In [8]:
# Free Throw Percentage for Home teams
ft_home = pd.read_sql_query(
    f"SELECT team_name_home AS team, AVG(ft_pct_home) AS avg_ft_pct_home, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_home, season_id",
    con
)

# Descriptive statistics for home FT percentage
ft_home_stats = ft_home.groupby('team')['avg_ft_pct_home'].describe()
print(ft_home_stats)

# Free Throw Percentage for Away teams
ft_away = pd.read_sql_query(
    f"SELECT team_name_away AS team, AVG(ft_pct_away) AS avg_ft_pct_away, season_id FROM game WHERE season_id IN ({', '.join(seasons)}) GROUP BY team_name_away, season_id",
    con
)

# Descriptive statistics for away FT percentage
ft_away_stats = ft_away.groupby('team')['avg_ft_pct_away'].describe()
print(ft_away_stats)

# Merging and calculating FT percentage difference
ft_diff = pd.merge(ft_home, ft_away, on=['team', 'season_id'])
ft_diff['ft_pct_diff'] = ft_diff['avg_ft_pct_home'] - ft_diff['avg_ft_pct_away']
print(ft_diff.head())


                        count      mean       std       min       25%  \
team                                                                    
Atlanta Hawks             4.0  0.811137  0.017775  0.785000  0.807417   
Boston Celtics            4.0  0.808423  0.008791  0.797333  0.805224   
Brooklyn Nets             4.0  0.783232  0.039055  0.724778  0.780591   
Charlotte Hornets         4.0  0.740712  0.013621  0.720659  0.738358   
Chicago Bulls             4.0  0.796119  0.036019  0.746882  0.784221   
Cleveland Cavaliers       4.0  0.762622  0.013922  0.750610  0.751090   
Dallas Mavericks          4.0  0.761709  0.015700  0.743268  0.751921   
Denver Nuggets            4.0  0.771523  0.032550  0.735780  0.748006   
Detroit Pistons           4.0  0.760269  0.024983  0.737455  0.744989   
Golden State Warriors     4.0  0.787342  0.022392  0.764098  0.772384   
Houston Rockets           4.0  0.746879  0.024246  0.721610  0.733507   
Indiana Pacers            4.0  0.784079  0.009067  