#### Business Objective: What was the impact of the COVID-19 pandemic on the level of home court advantage?
- Question 1: How has the difference in win-loss ratio between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)
- Question 2: How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)
- Question 3: How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)
- Question 4: How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)
- Question 5: How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game table)

In [85]:
import pandas as pd
import sqlite3

con = sqlite3.connect("data/nba.sqlite")

seasons = ['2019', '2020', '2021', '2022']

# Question 1

## intro
#### set up dataframe and make it so win-loss ratio can be calculated

In [97]:
covid_wl_home = pd.read_sql_query("SELECT team_name_home, wl_home, season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) ORDER BY season_id DESC", con)
covid_wl_home = covid_wl_home.replace('W', 1)
covid_wl_home = covid_wl_home.replace('L', 0)

covid_wl_away = pd.read_sql_query("SELECT team_name_away, wl_away, season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) ORDER BY season_id DESC", con)
covid_wl_away = covid_wl_away.replace('W', 1)
covid_wl_away = covid_wl_away.replace('L', 0)

  covid_wl_home = covid_wl_home.replace('L', 0)
  covid_wl_away = covid_wl_away.replace('L', 0)


## rising action
#### get the win-loss ratios for each season

In [101]:
covid_wl_home_group = covid_wl_home.groupby(['season_id']).agg({'wl_home': ['mean']})
covid_wl_home_group

Unnamed: 0_level_0,wl_home
Unnamed: 0_level_1,mean
season_id,Unnamed: 1_level_2
22019,0.551464
22020,0.543519
22021,0.543902
22022,0.580488


In [99]:
covid_wl_away_group = covid_wl_away.groupby(['season_id']).agg({'wl_away': ['mean']})
covid_wl_away_group

Unnamed: 0_level_0,wl_away
Unnamed: 0_level_1,mean
season_id,Unnamed: 1_level_2
22019,0.448536
22020,0.456481
22021,0.456098
22022,0.419512


## climax
#### get the win-loss ratios per season for each team

In [105]:
covid_wl_home_group = covid_wl_home.groupby(['team_name_home', 'season_id']).agg({'wl_home': ['mean']})
covid_wl_home_group

Unnamed: 0_level_0,Unnamed: 1_level_0,wl_home
Unnamed: 0_level_1,Unnamed: 1_level_1,mean
team_name_home,season_id,Unnamed: 2_level_2
Atlanta Hawks,22019,0.411765
Atlanta Hawks,22020,0.694444
Atlanta Hawks,22021,0.658537
Atlanta Hawks,22022,0.585366
Boston Celtics,22019,0.722222
...,...,...
Utah Jazz,22022,0.560976
Washington Wizards,22019,0.444444
Washington Wizards,22020,0.527778
Washington Wizards,22021,0.512195


In [107]:
covid_wl_away_group = covid_wl_away.groupby(['team_name_away', 'season_id']).agg({'wl_away': ['mean']})
covid_wl_away_group

Unnamed: 0_level_0,Unnamed: 1_level_0,wl_away
Unnamed: 0_level_1,Unnamed: 1_level_1,mean
team_name_away,season_id,Unnamed: 2_level_2
Atlanta Hawks,22019,0.181818
Atlanta Hawks,22020,0.444444
Atlanta Hawks,22021,0.390244
Atlanta Hawks,22022,0.414634
Boston Celtics,22019,0.611111
...,...,...
Utah Jazz,22022,0.341463
Washington Wizards,22019,0.250000
Washington Wizards,22020,0.416667
Washington Wizards,22021,0.341463


## falling action
#### find the difference in win-loss ratios between home and away overall

In [116]:
covid_wl_home_group = covid_wl_home.groupby('season_id').agg({'wl_home': 'mean'}).reset_index()
covid_wl_away_group = covid_wl_away.groupby('season_id').agg({'wl_away': 'mean'}).reset_index()

win_ratio_df = covid_wl_home_group.merge(covid_wl_away_group, on='season_id')

win_ratio_df['home_away_diff'] = win_ratio_df['wl_home'] - win_ratio_df['wl_away']

print(win_ratio_df)


  season_id   wl_home   wl_away  home_away_diff
0     22019  0.551464  0.448536        0.102927
1     22020  0.543519  0.456481        0.087037
2     22021  0.543902  0.456098        0.087805
3     22022  0.580488  0.419512        0.160976


## conclusion
#### find the difference in win-loss ratios between home and away for each team

In [118]:
covid_wl_home_group = covid_wl_home.groupby(['team_name_home', 'season_id']).agg({'wl_home': 'mean'}).reset_index()
covid_wl_away_group = covid_wl_away.groupby(['team_name_away', 'season_id']).agg({'wl_away': 'mean'}).reset_index()

covid_wl_home_group = covid_wl_home_group.rename(columns={'team_name_home': 'team_name'})
covid_wl_away_group = covid_wl_away_group.rename(columns={'team_name_away': 'team_name'})

team_win_ratio_df = covid_wl_home_group.merge(covid_wl_away_group, on=['team_name', 'season_id'])

team_win_ratio_df['home_away_diff'] = team_win_ratio_df['wl_home'] - team_win_ratio_df['wl_away']

print(team_win_ratio_df)


              team_name season_id   wl_home   wl_away  home_away_diff
0         Atlanta Hawks     22019  0.411765  0.181818        0.229947
1         Atlanta Hawks     22020  0.694444  0.444444        0.250000
2         Atlanta Hawks     22021  0.658537  0.390244        0.268293
3         Atlanta Hawks     22022  0.585366  0.414634        0.170732
4        Boston Celtics     22019  0.722222  0.611111        0.111111
..                  ...       ...       ...       ...             ...
115           Utah Jazz     22022  0.560976  0.341463        0.219512
116  Washington Wizards     22019  0.444444  0.250000        0.194444
117  Washington Wizards     22020  0.527778  0.416667        0.111111
118  Washington Wizards     22021  0.512195  0.341463        0.170732
119  Washington Wizards     22022  0.463415  0.390244        0.073171

[120 rows x 5 columns]


# Question 2

## intro
#### set up columns and averages for dataframe

In [264]:
result = []
for year in seasons:
    points_query = f"""
    SELECT 
        AVG(pts_home) AS avg_pts_home,
        AVG(pts_away) AS avg_pts_away
    FROM game
    WHERE SUBSTR(season_id, -4) = '{year}'
    """
    
    points_data = pd.read_sql_query(points_query, con)
    
    if not points_data.empty:
        avg_pts_home = points_data['avg_pts_home'][0]
        avg_pts_away = points_data['avg_pts_away'][0]
        
        points_diff = avg_pts_home - avg_pts_away
        
        result.append({
            'season_year': year,
            'avg_pts_home': avg_pts_home,
            'avg_pts_away': avg_pts_away,
            'points_diff': points_diff
        })

result_df = pd.DataFrame(result)
print(result_df)

  season_year  avg_pts_home  avg_pts_away  points_diff
0        2019    112.696413    110.651794     2.044619
1        2020    112.466283    111.369243     1.097039
2        2021    111.232659    109.271676     1.960983
3        2022    115.539683    112.931457     2.608225


## rising action
#### get average points home and away for each team per season

In [14]:
covid_pts_home = pd.read_sql_query("SELECT team_name_home, AVG(pts_home), season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_home, season_id", con)
covid_pts_home

Unnamed: 0,team_name_home,AVG(pts_home),season_id
0,Atlanta Hawks,114.588235,22019
1,Atlanta Hawks,115.416667,22020
2,Atlanta Hawks,116.658537,22021
3,Atlanta Hawks,119.634146,22022
4,Boston Celtics,116.305556,22019
...,...,...,...
115,Utah Jazz,117.951220,22022
116,Washington Wizards,114.000000,22019
117,Washington Wizards,119.000000,22020
118,Washington Wizards,111.902439,22021


In [140]:
covid_pts_away = pd.read_sql_query("SELECT team_name_away, AVG(pts_away), season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_away, season_id", con)
covid_pts_away

Unnamed: 0,team_name_away,AVG(pts_away),season_id
0,Atlanta Hawks,108.848485,22019
1,Atlanta Hawks,111.972222,22020
2,Atlanta Hawks,111.219512,22021
3,Atlanta Hawks,117.219512,22022
4,Boston Celtics,111.000000,22019
...,...,...,...
115,Utah Jazz,116.195122,22022
116,Washington Wizards,114.833333,22019
117,Washington Wizards,114.277778,22020
118,Washington Wizards,105.341463,22021


## climax
### get individual stats for home and away for each season

In [16]:
covid_pts_home_19 = pd.read_sql_query("SELECT team_name_home, pts_home, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_home, season_id", con)
covid_pts_home_19['pts_home'].describe()

count     30.000000
mean     111.066667
std       14.971083
min       84.000000
25%      100.500000
50%      110.500000
75%      120.000000
max      158.000000
Name: pts_home, dtype: float64

In [18]:
covid_pts_home_21 = pd.read_sql_query("SELECT team_name_home, pts_home, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_home, season_id", con)
covid_pts_home_21['pts_home'].describe()

count     30.000000
mean     111.066667
std       12.940003
min       73.000000
25%      104.500000
50%      111.000000
75%      120.750000
max      138.000000
Name: pts_home, dtype: float64

In [22]:
covid_pts_away_19 = pd.read_sql_query("SELECT team_name_away, pts_away, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_away, season_id", con)
covid_pts_away_19['pts_away'].describe()

count     30.00000
mean     111.00000
std       17.15246
min       85.00000
25%       99.00000
50%      107.50000
75%      122.00000
max      159.00000
Name: pts_away, dtype: float64

In [24]:
covid_pts_away_21 = pd.read_sql_query("SELECT team_name_away, pts_away, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_away, season_id", con)
covid_pts_away_21['pts_away'].describe()

count     30.000000
mean     115.266667
std        8.893753
min       99.000000
25%      107.500000
50%      115.000000
75%      122.500000
max      131.000000
Name: pts_away, dtype: float64

## falling action
#### calculate the points difference between home and away for each team

In [153]:
covid_pts_home = pd.read_sql_query(
    "SELECT team_name_home AS team_name, AVG(pts_home) AS avg_pts_home, season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_home, season_id", con
)
covid_pts_away = pd.read_sql_query(
    "SELECT team_name_away AS team_name, AVG(pts_away) AS avg_pts_away, season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_away, season_id", con
)

covid_pts_combined = pd.merge(covid_pts_home, covid_pts_away, on=['team_name', 'season_id'])
covid_pts_combined['points_diff'] = covid_pts_combined['avg_pts_home'] - covid_pts_combined['avg_pts_away']
covid_pts_combined = covid_pts_combined[['season_id', 'team_name', 'avg_pts_home', 'avg_pts_away', 'points_diff']]

covid_pts_combined

Unnamed: 0,season_id,team_name,avg_pts_home,avg_pts_away,points_diff
0,22019,Atlanta Hawks,114.588235,108.848485,5.739750
1,22020,Atlanta Hawks,115.416667,111.972222,3.444444
2,22021,Atlanta Hawks,116.658537,111.219512,5.439024
3,22022,Atlanta Hawks,119.634146,117.219512,2.414634
4,22019,Boston Celtics,116.305556,111.000000,5.305556
...,...,...,...,...,...
115,22022,Utah Jazz,117.951220,116.195122,1.756098
116,22019,Washington Wizards,114.000000,114.833333,-0.833333
117,22020,Washington Wizards,119.000000,114.277778,4.722222
118,22021,Washington Wizards,111.902439,105.341463,6.560976


## conclusion
#### compare the overall average points away to home between seasons

In [165]:
covid_pts_home_19_avg = covid_pts_home_19['pts_home'].mean()
covid_pts_home_21_avg = covid_pts_home_21['pts_home'].mean()

covid_pts_away_19_avg = covid_pts_away_19['pts_away'].mean()
covid_pts_away_21_avg = covid_pts_away_21['pts_away'].mean()

final_results = {
    'season_id': ['2019', '2020'],
    'avg_pts_home': [covid_pts_home_19_avg, covid_pts_home_21_avg],
    'avg_pts_away': [covid_pts_away_19_avg, covid_pts_away_21_avg],
    'points_diff': [covid_pts_home_19_avg - covid_pts_away_19_avg, covid_pts_home_21_avg - covid_pts_away_21_avg]
}
final_results_df = pd.DataFrame(final_results)
print(final_results_df)


  season_id  avg_pts_home  avg_pts_away  points_diff
0      2019    111.066667    111.000000     0.066667
1      2020    111.066667    115.266667    -4.200000


Question 3

## introduction
#### set up columns and averages for dataframe

In [27]:
result = []
for year in seasons:
    rebounds_query = f"""
    SELECT 
        AVG(oreb_home) AS avg_oreb_home,
        AVG(oreb_away) AS avg_oreb_away,
        AVG(dreb_home) AS avg_dreb_home,
        AVG(dreb_away) AS avg_dreb_away
    FROM game
    WHERE SUBSTR(season_id, -4) = '{year}'
    """
    
    rebounds_data = pd.read_sql_query(rebounds_query, con)
    
    if not rebounds_data.empty:
        avg_oreb_home = rebounds_data['avg_oreb_home'][0]
        avg_oreb_away = rebounds_data['avg_oreb_away'][0]
        avg_dreb_home = rebounds_data['avg_dreb_home'][0]
        avg_dreb_away = rebounds_data['avg_dreb_away'][0]
        
        oreb_diff = avg_oreb_home - avg_oreb_away
        dreb_diff = avg_dreb_home - avg_dreb_away
        
        result.append({
            'season_year': year,
            'avg_oreb_home': avg_oreb_home,
            'avg_oreb_away': avg_oreb_away,
            'oreb_diff': oreb_diff,
            'avg_dreb_home': avg_dreb_home,
            'avg_dreb_away': avg_dreb_away,
            'dreb_diff': dreb_diff
        })

result_df = pd.DataFrame(result)
print(result_df)

  season_year  avg_oreb_home  avg_oreb_away  oreb_diff  avg_dreb_home  \
0        2019      10.179353       9.794401   0.384952      35.209974   
1        2020       9.896382       9.742599   0.153783      34.866776   
2        2021      10.309249      10.236994   0.072254      34.416185   
3        2022      10.485570      10.419913   0.065657      33.575758   

   avg_dreb_away  dreb_diff  
0      34.256343   0.953631  
1      34.178454   0.688322  
2      33.875000   0.541185  
3      32.632756   0.943001  


## rising action
#### calculate and view average rebounds home and away for each season and team

In [29]:
covid_reb_home = pd.read_sql_query("SELECT team_name_home, AVG(oreb_home), AVG(dreb_home), season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_home, season_id", con)
covid_reb_home

Unnamed: 0,team_name_home,AVG(oreb_home),AVG(dreb_home),season_id
0,Atlanta Hawks,10.205882,34.323529,22019
1,Atlanta Hawks,10.388889,36.527778,22020
2,Atlanta Hawks,9.341463,34.268293,22021
3,Atlanta Hawks,11.536585,34.439024,22022
4,Boston Celtics,10.138889,35.194444,22019
...,...,...,...,...
115,Utah Jazz,12.000000,35.121951,22022
116,Washington Wizards,10.277778,32.861111,22019
117,Washington Wizards,9.611111,35.944444,22020
118,Washington Wizards,9.024390,34.512195,22021


In [174]:
covid_reb_away = pd.read_sql_query("SELECT team_name_away, AVG(oreb_away), AVG(dreb_away), season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_away, season_id", con)
covid_reb_away

Unnamed: 0,team_name_away,AVG(oreb_away),AVG(dreb_away),season_id
0,Atlanta Hawks,9.515152,32.424242,22019
1,Atlanta Hawks,10.722222,33.611111,22020
2,Atlanta Hawks,10.731707,33.609756,22021
3,Atlanta Hawks,10.902439,31.878049,22022
4,Boston Celtics,11.194444,35.611111,22019
...,...,...,...,...
115,Utah Jazz,11.609756,33.024390,22022
116,Washington Wizards,10.055556,30.888889,22019
117,Washington Wizards,9.750000,35.083333,22020
118,Washington Wizards,8.951220,33.731707,22021


## climax
#### calculate overall rebound stats

In [31]:
covid_reb_home = pd.read_sql_query("SELECT team_name_home, oreb_home, dreb_home, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_home, season_id", con)
covid_reb_home['oreb_home'].describe()

count    30.000000
mean     11.133333
std       3.401149
min       5.000000
25%       9.000000
50%      11.000000
75%      12.750000
max      21.000000
Name: oreb_home, dtype: float64

In [33]:
covid_reb_home = pd.read_sql_query("SELECT team_name_home, oreb_home, dreb_home, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_home, season_id", con)
covid_reb_home['oreb_home'].describe()

count    30.000000
mean      9.700000
std       3.602202
min       3.000000
25%       7.000000
50%      10.000000
75%      12.750000
max      19.000000
Name: oreb_home, dtype: float64

In [35]:
covid_reb_home = pd.read_sql_query("SELECT team_name_home, oreb_home, dreb_home, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_home, season_id", con)
covid_reb_home['dreb_home'].describe()

count    30.000000
mean     36.533333
std       5.763939
min      26.000000
25%      32.000000
50%      36.500000
75%      41.000000
max      50.000000
Name: dreb_home, dtype: float64

In [37]:
covid_reb_home = pd.read_sql_query("SELECT team_name_home, oreb_home, dreb_home, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_home, season_id", con)
covid_reb_home['dreb_home'].describe()

count    30.000000
mean     36.166667
std       5.866817
min      26.000000
25%      31.000000
50%      36.500000
75%      40.000000
max      49.000000
Name: dreb_home, dtype: float64

In [41]:
covid_reb_away = pd.read_sql_query("SELECT team_name_away, oreb_away, dreb_away, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_away, season_id", con)
covid_reb_away['oreb_away'].describe()

count    30.000000
mean      9.833333
std       4.291799
min       3.000000
25%       6.250000
50%      10.000000
75%      12.000000
max      20.000000
Name: oreb_away, dtype: float64

In [43]:
covid_reb_away = pd.read_sql_query("SELECT team_name_away, oreb_away, dreb_away, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_away, season_id", con)
covid_reb_away['oreb_away'].describe()

count    30.000000
mean      9.133333
std       3.115405
min       5.000000
25%       7.000000
50%       8.500000
75%      11.000000
max      18.000000
Name: oreb_away, dtype: float64

In [45]:
covid_reb_away = pd.read_sql_query("SELECT team_name_away, oreb_away, dreb_away, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_away, season_id", con)
covid_reb_away['dreb_away'].describe()

count    30.000000
mean     35.766667
std       5.624352
min      25.000000
25%      32.000000
50%      35.000000
75%      40.000000
max      47.000000
Name: dreb_away, dtype: float64

In [47]:
covid_reb_away = pd.read_sql_query("SELECT team_name_away, oreb_away, dreb_away, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_away, season_id", con)
covid_reb_away['dreb_away'].describe()

count    30.000000
mean     36.166667
std       5.180090
min      24.000000
25%      33.250000
50%      36.000000
75%      40.000000
max      47.000000
Name: dreb_away, dtype: float64

## falling action
#### find the difference in rebounds between home and away for each team and season (pre-covid)

In [184]:
covid_reb_home_19_20 = pd.read_sql_query("""
    SELECT team_name_home AS team_name, AVG(oreb_home) AS avg_oreb_home, AVG(dreb_home) AS avg_dreb_home, season_id 
    FROM game 
    WHERE season_id IN (22019, 22020) 
    GROUP BY team_name_home, season_id
""", con)

covid_reb_away_19_20 = pd.read_sql_query("""
    SELECT team_name_away AS team_name, AVG(oreb_away) AS avg_oreb_away, AVG(dreb_away) AS avg_dreb_away, season_id 
    FROM game 
    WHERE season_id IN (22019, 22020) 
    GROUP BY team_name_away, season_id
""", con)

covid_reb_combined_19_20 = pd.merge(covid_reb_home_19_20, covid_reb_away_19_20, on=['team_name', 'season_id'])
covid_reb_combined_19_20['oreb_diff'] = covid_reb_combined_19_20['avg_oreb_home'] - covid_reb_combined_19_20['avg_oreb_away']
covid_reb_combined_19_20['dreb_diff'] = covid_reb_combined_19_20['avg_dreb_home'] - covid_reb_combined_19_20['avg_dreb_away']

covid_reb_combined_19_20 = covid_reb_combined_19_20[['season_id', 'team_name', 'avg_oreb_home', 'avg_oreb_away', 'oreb_diff', 'avg_dreb_home', 'avg_dreb_away', 'dreb_diff']]
print(covid_reb_combined_19_20.head())

  season_id       team_name  avg_oreb_home  avg_oreb_away  oreb_diff  \
0     22019   Atlanta Hawks      10.205882       9.515152   0.690731   
1     22020   Atlanta Hawks      10.388889      10.722222  -0.333333   
2     22019  Boston Celtics      10.138889      11.194444  -1.055556   
3     22020  Boston Celtics      10.694444      10.555556   0.138889   
4     22019   Brooklyn Nets      10.722222      10.555556   0.166667   

   avg_dreb_home  avg_dreb_away  dreb_diff  
0      34.323529      32.424242   1.899287  
1      36.527778      33.611111   2.916667  
2      35.194444      35.611111  -0.416667  
3      33.250000      34.000000  -0.750000  
4      37.527778      37.000000   0.527778  


## conclusion
#### find the difference in rebounds between home and away for each team and season (post-covid)

In [187]:
covid_reb_home_21_22 = pd.read_sql_query("""
    SELECT team_name_home AS team_name, AVG(oreb_home) AS avg_oreb_home, AVG(dreb_home) AS avg_dreb_home, season_id 
    FROM game 
    WHERE season_id IN (22021, 22022) 
    GROUP BY team_name_home, season_id
""", con)

covid_reb_away_21_22 = pd.read_sql_query("""
    SELECT team_name_away AS team_name, AVG(oreb_away) AS avg_oreb_away, AVG(dreb_away) AS avg_dreb_away, season_id 
    FROM game 
    WHERE season_id IN (22021, 22022) 
    GROUP BY team_name_away, season_id
""", con)

covid_reb_combined_21_22 = pd.merge(covid_reb_home_21_22, covid_reb_away_21_22, on=['team_name', 'season_id'])

covid_reb_combined_21_22['oreb_diff'] = covid_reb_combined_21_22['avg_oreb_home'] - covid_reb_combined_21_22['avg_oreb_away']
covid_reb_combined_21_22['dreb_diff'] = covid_reb_combined_21_22['avg_dreb_home'] - covid_reb_combined_21_22['avg_dreb_away']
covid_reb_combined_21_22 = covid_reb_combined_21_22[['season_id', 'team_name', 'avg_oreb_home', 'avg_oreb_away', 'oreb_diff', 'avg_dreb_home', 'avg_dreb_away', 'dreb_diff']]
print(covid_reb_combined_21_22.head())

  season_id       team_name  avg_oreb_home  avg_oreb_away  oreb_diff  \
0     22021   Atlanta Hawks       9.341463      10.731707  -1.390244   
1     22022   Atlanta Hawks      11.536585      10.902439   0.634146   
2     22021  Boston Celtics      10.902439      10.146341   0.756098   
3     22022  Boston Celtics       9.682927       9.731707  -0.048780   
4     22021   Brooklyn Nets      10.975610       9.609756   1.365854   

   avg_dreb_home  avg_dreb_away  dreb_diff  
0      34.268293      33.609756   0.658537  
1      34.439024      31.878049   2.560976  
2      34.707317      36.390244  -1.682927  
3      36.926829      34.317073   2.609756  
4      34.536585      33.658537   0.878049  


# Question 4

## introduction
#### set up columns and averages for dataframe

In [50]:
result = []
for year in seasons:
    fg3_query = f"""
    SELECT 
        AVG(fg3_pct_home) AS avg_fg3_pct_home,
        AVG(fg3_pct_away) AS avg_fg3_pct_away
    FROM game
    WHERE SUBSTR(season_id, -4) = '{year}'
    """
    
    fg3_data = pd.read_sql_query(fg3_query, con)
    
    if not fg3_data.empty:
        avg_fg3_pct_home = fg3_data['avg_fg3_pct_home'][0]
        avg_fg3_pct_away = fg3_data['avg_fg3_pct_away'][0]
        
        result.append({
            'season_year': year,
            'avg_fg3_pct_home': avg_fg3_pct_home,
            'avg_fg3_pct_away': avg_fg3_pct_away
        })

result_df = pd.DataFrame(result)
print(result_df)

  season_year  avg_fg3_pct_home  avg_fg3_pct_away
0        2019          0.361411          0.353990
1        2020          0.367235          0.362348
2        2021          0.354915          0.350280
3        2022          0.362962          0.351737


## rising action
#### find avg 3pt field goal percentages for each team per season  (home and away)

In [52]:
covid_fg3_home = pd.read_sql_query("SELECT team_name_home, AVG(fg3_pct_home), season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_home, season_id", con)
covid_fg3_home

Unnamed: 0,team_name_home,AVG(fg3_pct_home),season_id
0,Atlanta Hawks,0.344735,22019
1,Atlanta Hawks,0.381444,22020
2,Atlanta Hawks,0.380049,22021
3,Atlanta Hawks,0.347439,22022
4,Boston Celtics,0.378417,22019
...,...,...,...
115,Utah Jazz,0.363171,22022
116,Washington Wizards,0.371778,22019
117,Washington Wizards,0.346611,22020
118,Washington Wizards,0.341512,22021


In [221]:
covid_fg3_away = pd.read_sql_query("SELECT team_name_away, AVG(fg3_pct_away), season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_away, season_id", con)
covid_fg3_away

Unnamed: 0,team_name_away,AVG(fg3_pct_away),season_id
0,Atlanta Hawks,0.319788,22019
1,Atlanta Hawks,0.358944,22020
2,Atlanta Hawks,0.363439,22021
3,Atlanta Hawks,0.359341,22022
4,Boston Celtics,0.353278,22019
...,...,...,...
115,Utah Jazz,0.340805,22022
116,Washington Wizards,0.365111,22019
117,Washington Wizards,0.359667,22020
118,Washington Wizards,0.344366,22021


## climax 
#### find overall 3pt field goal stats for teams home and away

In [54]:
covid_fg3_home = pd.read_sql_query("SELECT team_name_home, fg3_pct_home, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_home, season_id", con)
covid_fg3_home['fg3_pct_home'].describe()

count    30.000000
mean      0.332833
std       0.077337
min       0.217000
25%       0.274750
50%       0.333000
75%       0.356500
max       0.556000
Name: fg3_pct_home, dtype: float64

In [56]:
covid_fg3_home = pd.read_sql_query("SELECT team_name_home, fg3_pct_home, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_home, season_id", con)
covid_fg3_home['fg3_pct_home'].describe()

count    30.00000
mean      0.33530
std       0.09684
min       0.12100
25%       0.27600
50%       0.35350
75%       0.40325
max       0.55600
Name: fg3_pct_home, dtype: float64

In [60]:
covid_fg3_away = pd.read_sql_query("SELECT team_name_away, fg3_pct_away, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_away, season_id", con)
covid_fg3_away['fg3_pct_away'].describe()

count    30.000000
mean      0.344267
std       0.099422
min       0.152000
25%       0.289500
50%       0.346500
75%       0.379000
max       0.563000
Name: fg3_pct_away, dtype: float64

In [62]:
covid_fg3_away = pd.read_sql_query("SELECT team_name_away, fg3_pct_away, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_away, season_id", con)
covid_fg3_away['fg3_pct_away'].describe()

count    30.000000
mean      0.373267
std       0.074224
min       0.229000
25%       0.345250
50%       0.365000
75%       0.412750
max       0.538000
Name: fg3_pct_away, dtype: float64

## falling action
#### find the average percent differences between home and away for 3 pt field goals for each team per season

In [228]:
covid_fg3_home = pd.read_sql_query(
    "SELECT team_name_home AS team_name, AVG(fg3_pct_home) AS avg_fg3_pct_home, season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_home, season_id", con
)
covid_fg3_away = pd.read_sql_query(
    "SELECT team_name_away AS team_name, AVG(fg3_pct_away) AS avg_fg3_pct_away, season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_away, season_id", con
)

covid_fg3_combined = pd.merge(covid_fg3_home, covid_fg3_away, on=['team_name', 'season_id'])
covid_fg3_combined['fg3_pct_diff'] = covid_fg3_combined['avg_fg3_pct_home'] - covid_fg3_combined['avg_fg3_pct_away']
covid_fg3_combined


Unnamed: 0,team_name,avg_fg3_pct_home,season_id,avg_fg3_pct_away,fg3_pct_diff
0,Atlanta Hawks,0.344735,22019,0.319788,0.024947
1,Atlanta Hawks,0.381444,22020,0.358944,0.022500
2,Atlanta Hawks,0.380049,22021,0.363439,0.016610
3,Atlanta Hawks,0.347439,22022,0.359341,-0.011902
4,Boston Celtics,0.378417,22019,0.353278,0.025139
...,...,...,...,...,...
115,Utah Jazz,0.363171,22022,0.340805,0.022366
116,Washington Wizards,0.371778,22019,0.365111,0.006667
117,Washington Wizards,0.346611,22020,0.359667,-0.013056
118,Washington Wizards,0.341512,22021,0.344366,-0.002854


## conclusion
#### calculated summary stats to look at the range in 3 pt field goal differences

In [233]:
overall_avg_diff = covid_fg3_combined['fg3_pct_diff'].mean()
std_dev_diff = covid_fg3_combined['fg3_pct_diff'].std()

print(f"Overall Average Difference in 3-Point FG% (Home vs Away): {overall_avg_diff:.4f}")
print(f"Standard Deviation of 3-Point FG% Difference: {std_dev_diff:.4f}")

avg_diff_by_season = covid_fg3_combined.groupby('season_id')['fg3_pct_diff'].mean()
print(avg_diff_by_season)


Overall Average Difference in 3-Point FG% (Home vs Away): 0.0071
Standard Deviation of 3-Point FG% Difference: 0.0188
season_id
22019    0.007632
22020    0.006223
22021    0.003032
22022    0.011510
Name: fg3_pct_diff, dtype: float64


# Question 5

## introduction
#### set up columns and averages for dataframe

In [65]:
result = []
for year in seasons:
    ft_query = f"""
    SELECT 
        AVG(ft_pct_home) AS avg_ft_pct_home,
        AVG(ft_pct_away) AS avg_ft_pct_away
    FROM game
    WHERE SUBSTR(season_id, -4) = '{year}'
    """
    
    ft_data = pd.read_sql_query(ft_query, con)
    
    if not ft_data.empty:
        avg_ft_pct_home = ft_data['avg_ft_pct_home'][0]
        avg_ft_pct_away = ft_data['avg_ft_pct_away'][0]
        
        result.append({
            'season_year': year,
            'avg_ft_pct_home': avg_ft_pct_home,
            'avg_ft_pct_away': avg_ft_pct_away
        })

result_df = pd.DataFrame(result)
print(result_df)

  season_year  avg_ft_pct_home  avg_ft_pct_away
0        2019         0.771852         0.772411
1        2020         0.779016         0.775290
2        2021         0.775135         0.775920
3        2022         0.782598         0.780615


## rising action
#### calculate avg field goal percent between home and away for each team and season

In [67]:
covid_fg_home = pd.read_sql_query("SELECT team_name_home, AVG(fg_pct_home), season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_home, season_id", con)
covid_fg_home

Unnamed: 0,team_name_home,AVG(fg_pct_home),season_id
0,Atlanta Hawks,0.459618,22019
1,Atlanta Hawks,0.477889,22020
2,Atlanta Hawks,0.482902,22021
3,Atlanta Hawks,0.482561,22022
4,Boston Celtics,0.469833,22019
...,...,...,...
115,Utah Jazz,0.471341,22022
116,Washington Wizards,0.457583,22019
117,Washington Wizards,0.481972,22020
118,Washington Wizards,0.479000,22021


In [239]:
covid_fg_away = pd.read_sql_query("SELECT team_name_away, AVG(fg_pct_away), season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_away, season_id", con)
covid_fg_away

Unnamed: 0,team_name_away,AVG(fg_pct_away),season_id
0,Atlanta Hawks,0.440697,22019
1,Atlanta Hawks,0.460389,22020
2,Atlanta Hawks,0.459366,22021
3,Atlanta Hawks,0.486098,22022
4,Boston Celtics,0.454000,22019
...,...,...,...
115,Utah Jazz,0.476951,22022
116,Washington Wizards,0.458611,22019
117,Washington Wizards,0.467056,22020
118,Washington Wizards,0.465390,22021


## climax
#### calculate overall stats for field goal percentages

In [69]:
covid_fg_home = pd.read_sql_query("SELECT team_name_home, fg_pct_home, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_home, season_id", con)
covid_fg_home['fg_pct_home'].describe()

count    30.000000
mean      0.441867
std       0.061571
min       0.299000
25%       0.400500
50%       0.432500
75%       0.474000
max       0.626000
Name: fg_pct_home, dtype: float64

In [71]:
covid_fg_home = pd.read_sql_query("SELECT team_name_home, fg_pct_home, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_home, season_id", con)
covid_fg_home['fg_pct_home'].describe()

count    30.000000
mean      0.451600
std       0.052411
min       0.342000
25%       0.403500
50%       0.467000
75%       0.483250
max       0.556000
Name: fg_pct_home, dtype: float64

In [75]:
covid_fg_away = pd.read_sql_query("SELECT team_name_away, fg_pct_away, season_id FROM game WHERE season_id IN (22019) GROUP BY team_name_away, season_id", con)
covid_fg_away['fg_pct_away'].describe()

count    30.000000
mean      0.442967
std       0.063498
min       0.326000
25%       0.395750
50%       0.435500
75%       0.472500
max       0.625000
Name: fg_pct_away, dtype: float64

In [77]:
covid_fg_away = pd.read_sql_query("SELECT team_name_away, fg_pct_away, season_id FROM game WHERE season_id IN (22020) GROUP BY team_name_away, season_id", con)
covid_fg_away['fg_pct_away'].describe()

count    30.000000
mean      0.481567
std       0.045188
min       0.374000
25%       0.455500
50%       0.478500
75%       0.509750
max       0.561000
Name: fg_pct_away, dtype: float64

## rising action
#### calculate the difference in field goal percentage for home and away games

In [251]:
covid_fg_home = pd.read_sql_query(
    "SELECT team_name_home AS team_name, AVG(fg_pct_home) AS avg_fg_pct_home, season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_home, season_id", con
)
covid_fg_away = pd.read_sql_query(
    "SELECT team_name_away AS team_name, AVG(fg_pct_away) AS avg_fg_pct_away, season_id FROM game WHERE season_id IN (22022, 22021, 22020, 22019) GROUP BY team_name_away, season_id", con
)

covid_fg_combined = pd.merge(covid_fg_home, covid_fg_away, on=['team_name', 'season_id'])
covid_fg_combined['fg_pct_diff'] = covid_fg_combined['avg_fg_pct_home'] - covid_fg_combined['avg_fg_pct_away']
print(covid_fg_combined[['team_name', 'season_id', 'avg_fg_pct_home', 'avg_fg_pct_away', 'fg_pct_diff']].head())


        team_name season_id  avg_fg_pct_home  avg_fg_pct_away  fg_pct_diff
0   Atlanta Hawks     22019         0.459618         0.440697     0.018921
1   Atlanta Hawks     22020         0.477889         0.460389     0.017500
2   Atlanta Hawks     22021         0.482902         0.459366     0.023537
3   Atlanta Hawks     22022         0.482561         0.486098    -0.003537
4  Boston Celtics     22019         0.469833         0.454000     0.015833


## conclusion
#### calculating difference in overall average field goal percentages between home and away

In [262]:
overall_fg_home = covid_fg_home['avg_fg_pct_home'].mean()
overall_fg_away = covid_fg_away['avg_fg_pct_away'].mean()
overall_fg_diff = overall_fg_home - overall_fg_away
print(f"Overall Average FG% at Home: {overall_fg_home:.2f}")
print(f"Overall Average FG% Away: {overall_fg_away:.2f}")
print(f"Overall FG% Difference (Home - Away): {overall_fg_diff:.2f}")

Overall Average FG% at Home: 0.47
Overall Average FG% Away: 0.46
Overall FG% Difference (Home - Away): 0.01


In [267]:
con.close()