## Business Objective: What was the impact of the COVID-19 pandemic on the level of home court advantage?
- Question 1: How has the difference in win-loss ratio between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)
- Question 2: How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)
- Question 3: How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)
- Question 4: How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)
- Question 5: How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game table)

In [355]:
import pandas as pd
import sqlite3
import seaborn as sns
import matplotlib.pyplot as plt

con = sqlite3.connect("data/nba.sqlite")
con_reading = pd.read_sql_query('SELECT * FROM game', con)

#### Question 1: How has the difference in win-loss percentage between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)

In [348]:
covid_wl = pd.read_sql_query("SELECT wl_home, wl_away, season_id FROM game WHERE season_id", con)
covid_wl = covid_wl.replace('W', 1)
covid_wl = covid_wl.replace('L', 0)

count_win = 1

# calculating the winning percentage of home and away game
def winning_percentage(game_column):
    game_list = covid_wl[game_column].to_list()
    total = len(game_list)
    win = game_list.count(count_win)
    return win / total

# home winning percentage
home_game = winning_percentage('wl_home')
# away winning percentage
away_game = winning_percentage('wl_away')

home_away_winning_percentage = {'Home Winning Percentage': [home_game],
                            'Away Winning Percentage': [away_game]}
home_away_winning_percentage = pd.DataFrame(home_away_winning_percentage)
home_away_winning_percentage

  covid_wl = covid_wl.replace('L', 0)


Unnamed: 0,Home Winning Percentage,Away Winning Percentage
0,0.618725,0.381244


#### Question 2: How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)

In [349]:
covid_wl = pd.read_sql_query("SELECT pts_home, pts_away, season_id FROM game WHERE season_id IN ('22019', '22020','22021', '22022')", con)
covid_wl = covid_wl.replace('W', 1)
covid_wl = covid_wl.replace('L', 0)


mean_pts_home = covid_wl.groupby('season_id').agg({
    'pts_home': 'mean'
})
mean_pts_home

Unnamed: 0_level_0,pts_home
season_id,Unnamed: 1_level_1
22019,112.863078
22020,112.562963
22021,111.477236
22022,115.935772


In [350]:
covid_wl = pd.read_sql_query("SELECT pts_home, pts_away, season_id FROM game WHERE season_id IN ('22019', '22020','22021', '22022')", con)
covid_wl = covid_wl.replace('W', 1)
covid_wl = covid_wl.replace('L', 0)


mean_pts_away = covid_wl.groupby('season_id').agg({
    'pts_away': 'mean'
})
mean_pts_away

Unnamed: 0_level_0,pts_away
season_id,Unnamed: 1_level_1
22019,110.732767
22020,111.619444
22021,109.754472
22022,113.435772


#### Question 3: How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)

In [351]:
covid_wl = pd.read_sql_query("SELECT oreb_home, oreb_away, dreb_home, dreb_away, season_id FROM game WHERE season_id IN ('22019', '22020','22021', '22022')", con)
covid_wl = covid_wl.replace('W', 1)
covid_wl = covid_wl.replace('L', 0)

home_od = covid_wl.groupby('season_id').agg({
    'oreb_home': 'mean',
    'dreb_home': 'mean'
})
away_od = covid_wl.groupby('season_id').agg({
    'oreb_away': 'mean',
    'dreb_away': 'mean'
})



# Combine both DataFrames into one
df = pd.concat([home_od, away_od], axis=0)
df

Unnamed: 0_level_0,oreb_home,dreb_home,oreb_away,dreb_away
season_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
22019,10.253069,35.281398,,
22020,9.888889,34.830556,,
22021,10.378049,34.377236,,
22022,10.469106,33.421951,,
22019,,,9.898017,34.234183
22020,,,9.77037,34.108333
22021,,,10.290244,33.854472
22022,,,10.4,32.585366


#### Question 4: How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)

In [352]:
covid_wl = pd.read_sql_query("SELECT fg3_pct_home, fg3_pct_away, season_id FROM game WHERE season_id IN ('22019', '22020','22021', '22022')", con)
covid_wl = covid_wl.replace('W', 1)
covid_wl = covid_wl.replace('L', 0)

covid_wl["season_fg3_home_med"] = covid_wl.groupby("season_id")["fg3_pct_home"].transform(lambda x: x.median())
print(covid_wl["season_fg3_home_med"])

0       0.361
1       0.361
2       0.361
3       0.361
4       0.361
        ...  
4594    0.364
4595    0.364
4596    0.364
4597    0.364
4598    0.364
Name: season_fg3_home_med, Length: 4599, dtype: float64


In [353]:
covid_wl = pd.read_sql_query("SELECT fg3_pct_home, fg3_pct_away, season_id FROM game WHERE season_id IN ('22019', '22020','22021', '22022')", con)
covid_wl = covid_wl.replace('W', 1)
covid_wl = covid_wl.replace('L', 0)

covid_wl["season_fg3_away_med"] = covid_wl.groupby("season_id")["fg3_pct_away"].transform(lambda x: x.median())
print(covid_wl["season_fg3_away_med"])

0       0.3540
1       0.3540
2       0.3540
3       0.3540
4       0.3540
         ...  
4594    0.3505
4595    0.3505
4596    0.3505
4597    0.3505
4598    0.3505
Name: season_fg3_away_med, Length: 4599, dtype: float64


#### Question 5: How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game table)

In [354]:
covid_wl = pd.read_sql_query("SELECT fg3_pct_home, fg3_pct_away, season_id FROM game WHERE season_id IN ('22019', '22020','22021', '22022')", con)
covid_wl = covid_wl.replace('W', 1)
covid_wl = covid_wl.replace('L', 0)

home_fg3 = covid_wl.groupby('season_id').agg({
    'fg3_pct_home': 'mean'
})
away_fg3 = covid_wl.groupby('season_id').agg({
    'fg3_pct_away': 'mean'
})

# Combine both DataFrames into one
df = pd.concat([home_fg3, away_fg3], axis=0)
df

Unnamed: 0_level_0,fg3_pct_home,fg3_pct_away
season_id,Unnamed: 1_level_1,Unnamed: 2_level_1
22019,0.361288,
22020,0.368503,
22021,0.354627,
22022,0.365037,
22019,,0.353899
22020,,0.36228
22021,,0.351595
22022,,0.353528
