#### Business Objective: What was the impact of the COVID-19 pandemic on the level of home court advantage?
- Question 1: How has the difference in win-loss ratio between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)
- Question 2: How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)
- Question 3: How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)
- Question 4: How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)
- Question 5: How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game table)

In [38]:
import pandas as pd
import sqlite3

con = sqlite3.connect("data/nba.sqlite")

seasons = ['2019', '2020', '2021', '2022']
result = []

In [40]:
# Question 1
result = []
for year in seasons:
    ft_query = f"""
    SELECT 
        AVG(ft_pct_home) AS avg_ft_pct_home,
        AVG(ft_pct_away) AS avg_ft_pct_away
    FROM game
    WHERE SUBSTR(season_id, -4) = '{year}'
    """
    
    ft_data = pd.read_sql_query(ft_query, con)
    
    if not ft_data.empty:
        avg_ft_pct_home = ft_data['avg_ft_pct_home'][0]
        avg_ft_pct_away = ft_data['avg_ft_pct_away'][0]
        
        result.append({
            'season_year': year,
            'avg_ft_pct_home': avg_ft_pct_home,
            'avg_ft_pct_away': avg_ft_pct_away
        })

result_df = pd.DataFrame(result)
print(result_df)

  season_year  avg_ft_pct_home  avg_ft_pct_away
0        2019         0.771852         0.772411
1        2020         0.779016         0.775290
2        2021         0.775135         0.775920
3        2022         0.782598         0.780615


In [42]:
# Question 2
result = []
for year in seasons:
    points_query = f"""
    SELECT 
        AVG(pts_home) AS avg_pts_home,
        AVG(pts_away) AS avg_pts_away
    FROM game
    WHERE SUBSTR(season_id, -4) = '{year}'
    """
    
    points_data = pd.read_sql_query(points_query, con)
    
    if not points_data.empty:
        avg_pts_home = points_data['avg_pts_home'][0]
        avg_pts_away = points_data['avg_pts_away'][0]
        
        points_diff = avg_pts_home - avg_pts_away
        
        result.append({
            'season_year': year,
            'avg_pts_home': avg_pts_home,
            'avg_pts_away': avg_pts_away,
            'points_diff': points_diff
        })

result_df = pd.DataFrame(result)
print(result_df)

  season_year  avg_pts_home  avg_pts_away  points_diff
0        2019    112.696413    110.651794     2.044619
1        2020    112.466283    111.369243     1.097039
2        2021    111.232659    109.271676     1.960983
3        2022    115.539683    112.931457     2.608225


In [44]:
# Question 3
result = []
for year in seasons:
    rebounds_query = f"""
    SELECT 
        AVG(oreb_home) AS avg_oreb_home,
        AVG(oreb_away) AS avg_oreb_away,
        AVG(dreb_home) AS avg_dreb_home,
        AVG(dreb_away) AS avg_dreb_away
    FROM game
    WHERE SUBSTR(season_id, -4) = '{year}'
    """
    
    rebounds_data = pd.read_sql_query(rebounds_query, con)
    
    if not rebounds_data.empty:
        avg_oreb_home = rebounds_data['avg_oreb_home'][0]
        avg_oreb_away = rebounds_data['avg_oreb_away'][0]
        avg_dreb_home = rebounds_data['avg_dreb_home'][0]
        avg_dreb_away = rebounds_data['avg_dreb_away'][0]
        
        oreb_diff = avg_oreb_home - avg_oreb_away
        dreb_diff = avg_dreb_home - avg_dreb_away
        
        result.append({
            'season_year': year,
            'avg_oreb_home': avg_oreb_home,
            'avg_oreb_away': avg_oreb_away,
            'oreb_diff': oreb_diff,
            'avg_dreb_home': avg_dreb_home,
            'avg_dreb_away': avg_dreb_away,
            'dreb_diff': dreb_diff
        })

result_df = pd.DataFrame(result)
print(result_df)

  season_year  avg_oreb_home  avg_oreb_away  oreb_diff  avg_dreb_home  \
0        2019      10.179353       9.794401   0.384952      35.209974   
1        2020       9.896382       9.742599   0.153783      34.866776   
2        2021      10.309249      10.236994   0.072254      34.416185   
3        2022      10.485570      10.419913   0.065657      33.575758   

   avg_dreb_away  dreb_diff  
0      34.256343   0.953631  
1      34.178454   0.688322  
2      33.875000   0.541185  
3      32.632756   0.943001  


In [46]:
# Question 4
result = []
for year in seasons:
    fg3_query = f"""
    SELECT 
        AVG(fg3_pct_home) AS avg_fg3_pct_home,
        AVG(fg3_pct_away) AS avg_fg3_pct_away
    FROM game
    WHERE SUBSTR(season_id, -4) = '{year}'
    """
    
    fg3_data = pd.read_sql_query(fg3_query, con)
    
    if not fg3_data.empty:
        avg_fg3_pct_home = fg3_data['avg_fg3_pct_home'][0]
        avg_fg3_pct_away = fg3_data['avg_fg3_pct_away'][0]
        
        result.append({
            'season_year': year,
            'avg_fg3_pct_home': avg_fg3_pct_home,
            'avg_fg3_pct_away': avg_fg3_pct_away
        })

result_df = pd.DataFrame(result)
print(result_df)

  season_year  avg_fg3_pct_home  avg_fg3_pct_away
0        2019          0.361411          0.353990
1        2020          0.367235          0.362348
2        2021          0.354915          0.350280
3        2022          0.362962          0.351737


In [48]:
# Question 5
result = []
for year in seasons:
    ft_query = f"""
    SELECT 
        AVG(ft_pct_home) AS avg_ft_pct_home,
        AVG(ft_pct_away) AS avg_ft_pct_away
    FROM game
    WHERE SUBSTR(season_id, -4) = '{year}'
    """
    
    ft_data = pd.read_sql_query(ft_query, con)
    
    if not ft_data.empty:
        avg_ft_pct_home = ft_data['avg_ft_pct_home'][0]
        avg_ft_pct_away = ft_data['avg_ft_pct_away'][0]
        
        result.append({
            'season_year': year,
            'avg_ft_pct_home': avg_ft_pct_home,
            'avg_ft_pct_away': avg_ft_pct_away
        })

result_df = pd.DataFrame(result)
print(result_df)

  season_year  avg_ft_pct_home  avg_ft_pct_away
0        2019         0.771852         0.772411
1        2020         0.779016         0.775290
2        2021         0.775135         0.775920
3        2022         0.782598         0.780615


In [50]:
con.close()