#### Business Objective: What was the impact of the COVID-19 pandemic on the level of home court advantage?
- Question 1: How has the difference in win-loss ratio between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)
- Question 2: How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)
- Question 3: How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)
- Question 4: How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)
- Question 5: How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game table)

In [11]:
import pandas as pd
import sqlite3

con = sqlite3.connect("data/nba.sqlite")
games = pd.read_sql_query("SELECT * FROM game LIMIT 100", con)
active_players = pd.read_sql_query("SELECT full_name FROM player WHERE is_active=TRUE", con)

In [60]:
#Query1 : Simple Differnece in Win-loss ratio avg 
wl_query= """ SELECT season_id, AVG(CASE wl_home WHEN 'W' THEN 1 ELSE 0 END) AS avg_home_win_ratio, AVG(CASE wl_away WHEN 'W' THEN 1 ELSE 0 END) AS avg_away_win_ratio, AVG(CASE wl_home WHEN 'W' THEN 1 ELSE 0 END) - AVG(CASE wl_away WHEN 'W' THEN 1 ELSE 0 END) AS avg_win_loss_diff FROM game WHERE season_id IN ('22019','22020','22022','22023') GROUP BY season_id ORDER BY  season_id """
wl_data= pd.read_sql_query(wl_query, con)
print(wl_data)

  season_id  avg_home_win_ratio  avg_away_win_ratio  avg_win_loss_diff
0     22019            0.551464            0.448536           0.102927
1     22020            0.543519            0.456481           0.087037
2     22022            0.580488            0.419512           0.160976


In [35]:
#Query 2 : Finding out the difference in points for the home and away teams
seasons= ['22019','22020','22022','22023']
seasons_join = ','.join([f"'{s}'" for s in seasons])
hpt_query = f""" SELECT team_name_home AS team, AVG(pts_home) AS avg_pts_home, season_id FROM game WHERE season_id IN ({seasons_join}) GROUP BY team_name_home, season_id"""
hpt_data= pd.read_sql_query(hpt_query, con)
apt_query = f""" SELECT team_name_away AS team, AVG(pts_away) AS avg_pts_away, season_id FROM game WHERE season_id IN ({seasons_join}) GROUP BY team_name_away, season_id"""
apt_data= pd.read_sql_query(apt_query, con)
diff_pts= pd.merge(hpt_data, apt_data, on =['team', 'season_id'])
diff_pts['diff_pts']= diff_pts['avg_pts_home'] - diff_pts['avg_pts_away']
print(diff_pts)


                  team  avg_pts_home season_id  avg_pts_away  diff_pts
0        Atlanta Hawks    114.588235     22019    108.848485  5.739750
1        Atlanta Hawks    115.416667     22020    111.972222  3.444444
2        Atlanta Hawks    119.634146     22022    117.219512  2.414634
3       Boston Celtics    116.305556     22019    111.000000  5.305556
4       Boston Celtics    114.083333     22020    111.166667  2.916667
..                 ...           ...       ...           ...       ...
85           Utah Jazz    117.444444     22020    115.388889  2.055556
86           Utah Jazz    117.951220     22022    116.195122  1.756098
87  Washington Wizards    114.000000     22019    114.833333 -0.833333
88  Washington Wizards    119.000000     22020    114.277778  4.722222
89  Washington Wizards    114.243902     22022    112.073171  2.170732

[90 rows x 5 columns]


In [44]:
#Query 3 : Checking the difference in avg of offensive and Defensive rebounds 
seasons= ['22019','22020','22022','22023']
seasons_join = ','.join([f"'{s}'" for s in seasons])
rebounds_query = f"""SELECT season_id, AVG(oreb_home-oreb_away) AS diff_oreb, AVG(dreb_home-dreb_away) AS diff_dreb FROM game WHERE season_id IN ({seasons_join}) GROUP BY team_name_home, season_id """
rebounds_data = pd.read_sql_query(rebounds_query,con)
print(rebounds_data[['season_id', 'diff_oreb','diff_dreb']])

   season_id  diff_oreb  diff_dreb
0      22019  -0.911765  -0.441176
1      22020   0.055556   4.666667
2      22022   1.243902   0.317073
3      22019   0.250000   2.138889
4      22020   1.916667   1.472222
..       ...        ...        ...
85     22020   1.055556   7.194444
86     22022   0.634146   3.170732
87     22019   0.083333  -2.750000
88     22020  -0.583333  -0.111111
89     22022  -0.975610   2.195122

[90 rows x 3 columns]


In [51]:
#Query 4: Checking the influence by finding out the number of games home team has over away team
query_diff= f"""SELECT oreb_home, oreb_away, season_id FROM game WHERE oreb_home - oreb_away >0 AND season_id IN ({seasons_join})"""
data_diff=pd.read_sql_query(query_diff,con)
data_diff['oreb_diff']= data_diff['oreb_home'] - data_diff['oreb_away']
games_diff = data_diff[data_diff['oreb_diff'] > 0]
games_count= games_diff.shape[0]
print(games_count)

1593


In [59]:
#Query 5 : Checking the correlation between free throw percentage and the total number of free throws attempted for home vs away teams
query_ctt = f""" SELECT season_id, team_name_home AS team, ft_pct_home,ftm_home,ft_pct_away,ftm_away FROM game WHERE season_id IN({seasons_join})"""
data_ctt = pd.read_sql_query(query_ctt, con)
home_data= data_ctt[['season_id','team','ft_pct_home','ftm_home']]
away_data= data_ctt[['season_id','team','ft_pct_away','ftm_away']]
combined_data = pd.concat([home_data, away_data])
corr= combined_data[['ft_pct_home','ftm_home','ft_pct_away','ftm_away']].corr().iloc[0,1]
print(f"correlation : {corr}")

correlation : 0.36380888668843236
