#### Business Objective: What was the impact of the COVID-19 pandemic on the level of home court advantage?
- Question 1: How has the difference in win-loss ratio between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)
- Question 2: How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)
- Question 3: How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)
- Question 4: How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)
- Question 5: How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game table)

In [1]:
import pandas as pd
import sqlite3

con = sqlite3.connect("data/nba.sqlite")
games = pd.read_sql_query("SELECT * FROM game LIMIT 100", con)
active_players = pd.read_sql_query("SELECT full_name FROM player WHERE is_active=TRUE", con)

In [61]:
#Question 1: How has the difference in win-loss ratio between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)

win_loss_query= """ SELECT season_id, team_name_home AS team, wl_home, team_name_away AS away_team, wl_away FROM game WHERE season_id IN (22019, 22020, 22021, 22022) GROUP BY team, season_id """
win_loss_data=pd.read_sql_query(win_loss_query, con)

win_loss_data['wl_home']= win_loss_data['wl_home'].map({'W': 1, 'L':0})
win_loss_data['wl_away']= win_loss_data['wl_away'].map({'W': 1, 'L':0})
win_loss_stats= win_loss_data.groupby('season_id').agg(home_win_ratio=('wl_home','mean'), away_win_ratio=('wl_home','mean')). reset_index()

win_loss_stats['win_loss_diff']= win_loss_stats['home_win_ratio'] - win_loss_stats['away_win_ratio']

win_loss_stats
win_loss_data.head()

Unnamed: 0,season_id,team,wl_home,away_team,wl_away
0,22019,Atlanta Hawks,1,Orlando Magic,0
1,22020,Atlanta Hawks,1,Detroit Pistons,0
2,22021,Atlanta Hawks,1,Dallas Mavericks,0
3,22022,Atlanta Hawks,1,Houston Rockets,0
4,22019,Boston Celtics,1,Toronto Raptors,0


In [55]:
#Question 2: How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)

avg_home_away_query= """SELECT season_id, team_name_home AS team, AVG(pts_home) AS avg_home, AVG(pts_away) AS avg_away FROM game WHERE season_id ='22019' OR season_id ='22020' OR season_id='22022' OR season_id='22023' GROUP BY team, season_id"""
avg_home_away_data=pd.read_sql_query(avg_home_away_query,con)


avg_home_away_data



Unnamed: 0,season_id,team,avg_home,avg_away
0,22019,Atlanta Hawks,114.588235,117.352941
1,22020,Atlanta Hawks,115.416667,109.083333
2,22022,Atlanta Hawks,119.634146,117.390244
3,22019,Boston Celtics,116.305556,108.055556
4,22020,Boston Celtics,114.083333,111.083333
...,...,...,...,...
85,22020,Utah Jazz,117.444444,104.222222
86,22022,Utah Jazz,117.951220,116.878049
87,22019,Washington Wizards,114.000000,115.861111
88,22020,Washington Wizards,119.000000,119.083333


In [83]:
#Question 3: How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)
rebound_query= """SELECT season_id, team_name_home AS team, AVG(oreb_home) AS avg_oreb_home, AVG(oreb_away) AS avg_oreb_away, AVG(dreb_away) AS avg_dreb_away, AVG(dreb_home) AS avg_dreb_home FROM game WHERE season_id ='22019' OR season_id ='22020' OR season_id='22022' OR season_id='22023' GROUP BY team, season_id"""
rebound_data=pd.read_sql_query(rebound_query,con)

rebound_data['oreb_diff'] = rebound_data['avg_oreb_home'] - rebound_data['avg_oreb_away']
rebound_data['dreb_diff'] = rebound_data['avg_dreb_home'] - rebound_data['avg_dreb_away']



rebound_data


Unnamed: 0,season_id,team,avg_oreb_home,avg_oreb_away,avg_dreb_away,avg_dreb_home,oreb_diff,dreb_diff
0,22019,Atlanta Hawks,10.205882,11.117647,34.764706,34.323529,-0.911765,-0.441176
1,22020,Atlanta Hawks,10.388889,10.333333,31.861111,36.527778,0.055556,4.666667
2,22022,Atlanta Hawks,11.536585,10.292683,34.121951,34.439024,1.243902,0.317073
3,22019,Boston Celtics,10.138889,9.888889,33.055556,35.194444,0.250000,2.138889
4,22020,Boston Celtics,10.694444,8.777778,31.777778,33.250000,1.916667,1.472222
...,...,...,...,...,...,...,...,...
85,22020,Utah Jazz,10.750000,9.694444,31.472222,38.666667,1.055556,7.194444
86,22022,Utah Jazz,12.000000,11.365854,31.951220,35.121951,0.634146,3.170732
87,22019,Washington Wizards,10.277778,10.194444,35.611111,32.861111,0.083333,-2.750000
88,22020,Washington Wizards,9.611111,10.194444,36.055556,35.944444,-0.583333,-0.111111


In [92]:
#Question 4: How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)

fg3_diff_19= """ SELECT season_id, AVG(fg3_pct_home)- AVG(fg3_pct_away) AS fg3_diff FROM game WHERE season_id='22019' """
fg3_diff_19_data=pd.read_sql_query(fg3_diff_19, con)

fg3_diff_22= """ SELECT season_id, AVG(fg3_pct_home)- AVG(fg3_pct_away) AS fg3_diff FROM game WHERE season_id='22022' """
fg3_diff_22_data=pd.read_sql_query(fg3_diff_22, con)


fg3_diff_19_data


Unnamed: 0,season_id,fg3_diff
0,22019,0.007389


In [94]:
fg3_diff_22_data

Unnamed: 0,season_id,fg3_diff
0,22022,0.01151


In [102]:
#Question 5: How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game table)

ft_pct_query= """ SELECT season_id, MIN(ft_pct_home) AS min_ft_pct_home, MIN(ft_pct_away) AS min_ft_pct_away, MAX(ft_pct_home) AS max_ft_pct_home, MAX(ft_pct_away) AS max_ft_pct_away, COUNT(*) AS games_played FROM game WHERE season_id= '22019'""" 
ft_pct_data_19= pd.read_sql_query(ft_pct_query, con)

ft_pct_data_19

Unnamed: 0,season_id,min_ft_pct_home,min_ft_pct_away,max_ft_pct_home,max_ft_pct_away,games_played
0,22019,0.4,0.375,1.0,1.0,1059


In [103]:
ft_pct_query= """ SELECT season_id, MIN(ft_pct_home) AS min_ft_pct_home, MIN(ft_pct_away) AS min_ft_pct_away, MAX(ft_pct_home) AS max_ft_pct_home, MAX(ft_pct_away) AS max_ft_pct_away, COUNT(*) AS games_played FROM game WHERE season_id= '22022'""" 
ft_pct_data_22= pd.read_sql_query(ft_pct_query, con)

ft_pct_data_22

Unnamed: 0,season_id,min_ft_pct_home,min_ft_pct_away,max_ft_pct_home,max_ft_pct_away,games_played
0,22022,0.4,0.3,1.0,1.0,1230
