In [19]:
%pip install seaborn


Note: you may need to restart the kernel to use updated packages.


In [None]:
import sqlite3
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

con = sqlite3.connect(r" /Users/twinkle/Documents/GitHub/UMD-INST627-Fall2024/data/nba.sqlite")

#Question 1: How has the difference in win-loss ratio between home and away teams changed from the 2019-20 season to the 2022-23 season? (wl_home and wl_away columns in game table)
##Introduction: 
Home court advantage is a major deciding factor in the result of an NBA game. Since the pandemic prevented fans from actually attending the games in person, this question explores how this situation affected the win-loss ratios from the 2019 season to the 2022 season.



In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

dataframe = pd.DataFrame(data)
plt.figure(figsize=(10, 6))
bar_width = 0.4
x = range(len(df['Season']))

#bar chart for introduction
plt.bar(x, dataframe['Home Win-Loss Ratio'], width=bar_width, label='Home', color='steelblue')
plt.bar([i + bar_width for i in x], dataframe['Away Win-Loss Ratio'], width=bar_width, label='Away', color='orange')
plt.xlabel('Season', fontsize=12)
plt.ylabel('Win-Loss Ratio', fontsize=12)
plt.title('Home vs Away Win-Loss Ratios (2019-20 to 2022-23)', fontsize=14)
plt.xticks([i + bar_width / 2 for i in x], dataframe['Season'])
plt.legend()
plt.tight_layout()
plt.show()


##Rising Action: 
First we extract the data on win-loss ratios for home and away teams for 2019-20 to 2022-23 seasons to compare the difference during the pandemic, then aggregate the win-loss ratios for the relevant seasons. 


In [None]:
win_loss_query= """ SELECT season_id, team_name_home AS team, wl_home, team_name_away AS away_team, wl_away FROM game WHERE season_id IN (22019, 22020, 22021, 22022) GROUP BY team, season_id """
win_loss_data=pd.read_sql_query(win_loss_query, con)

win_loss_data['wl_home']= win_loss_data['wl_home'].map({'W': 1, 'L':0})
win_loss_data['wl_away']= win_loss_data['wl_away'].map({'W': 1, 'L':0})
win_loss_stats= win_loss_data.groupby('season_id').agg(home_win_ratio=('wl_home','mean'), away_win_ratio=('wl_home','mean')). reset_index()

win_loss_stats['win_loss_diff']= win_loss_stats['home_win_ratio'] - win_loss_stats['away_win_ratio']

##Climax: 
By subtracting the win-loss ratios we see how the difference has changed between home and away games.

In [None]:
#line chart for climax
plt.figure(figsize=(10, 6))
sns.lineplot(data=dataframe, x='Season', y='Difference', marker='o', color='green')

# Add labels, title, and grid
plt.xlabel('Season', fontsize=12)
plt.ylabel('Difference (Home - Away)', fontsize=12)
plt.title('Difference in Win-Loss Ratios (Home vs Away)', fontsize=14)
plt.grid(visible=True, linestyle='--', alpha=0.7)
plt.axvspan(0.5, 1.5, color='red', alpha=0.2, label='Pandemic (2020-21)')

for i, diff in enumerate(df['Difference']):
    plt.text(i, diff + 0.01, f"{diff:.2f}", ha='center', fontsize=10)

plt.legend()
plt.tight_layout()
plt.show()


##Falling Action:
The analysis shows a clear decline in the 2020-21 season win-loss ratio difference, which aligns with the pandemic when no fans were around for the games. This suggests that fan presence affects the home court advantage.

##Conclusion:
The reduced difference in win-loss ratios during the seasons we analysed shows that the pandemic had a significant impact on the results of the NBA games. 

#Question 2:How has the difference in average points scored between home and away teams changed from the 2019-20 season to the 2022-23 season? (pts_home and pts_away columns in game table)

##Introduction:
The most critical metric for evaluating team performance is scoring. Due to factors like fan support, familiarity with court, and lack of travel fatigue, home teams tend to score more. By examining the difference in avaerag epoints we can see how the pandemix affected the results.


In [None]:
#stacked bar chart for introduction
plt.figure(figsize=(10, 6))
plt.bar(dataframe['Season'], dataframe['Home Avg Points'], label='Home', color='blue')
plt.bar(dataframe['Season'], dataframe['Away Avg Points'], bottom=dataframe['Home Avg Points'], label='Away', color='red')

plt.xlabel('Season', fontsize=12)
plt.ylabel('Total Points', fontsize=12)
plt.title('Stacked Average Points Scored (Home + Away)', fontsize=14)
plt.legend()
plt.tight_layout()
plt.show()

##Rising Action: 
First we extract the data on win-loss ratios for home and away teams for 2019-20 to 2022-23 seasons to compare the difference during the pandemic, then aggregate the win-loss ratios for the relevant seasons. 

In [None]:
avg_home_away_query= """SELECT season_id, team_name_home AS team, AVG(pts_home) AS avg_home, AVG(pts_away) AS avg_away FROM game WHERE season_id ='22019' OR season_id ='22020' OR season_id='22022' OR season_id='22023' GROUP BY team, season_id"""
avg_home_away_data=pd.read_sql_query(avg_home_away_query,con)

avg_home_away_data

##Climax:
The visualization would show a noticeable decline in the 2020-21 season, which was when there was a significant absence of fans at the beginning of the pandemic.


In [None]:
#heatmap for climax
heatmap_data = pd.DataFrame({
    'Home': dataframe['Home Avg Points'],
    'Away': dataframe['Away Avg Points']
}, index=df['Season'])

plt.figure(figsize=(8, 6))
sns.heatmap(heatmap_data.T, annot=True, cmap='coolwarm', cbar=True, fmt=".1f")
plt.title('Heatmap of Average Points Scored (Home vs Away)', fontsize=14)
plt.ylabel('Team Location', fontsize=12)
plt.xlabel('Season', fontsize=12)
plt.tight_layout()
plt.show()


##Falling Action:
The significant decline in the average points implies that fan presence has a major psychological and strategic impact on players with the home court advantage.

##Conclusion:
While some recovery is seen in the later seasons in average points, the reduced scoring difference suggests that home court advantage may have permanently shifted.

#Question 3:How has the difference in offensive and defensive rebounds between home and away teams changed from 2019-20 to 2022-23?(oreb_home, oreb_away, dreb_home, dreb_away in the game table)

##Introduction:
Rebounding is an important aspect that dictates possession control and scoring opportunities. The analysis of the difference in offensive and defensive rebounds examines how the pandemic affected possible trends.


##Rising Action:
We extracted and computed the average offensive and defensive rebounds for 2019, 2020, 2021, 2022 seasons. We aimed to quantify the home court advantage over time by calculating the difference between home and away rebounds.


In [None]:
rebound_query= """SELECT season_id, team_name_home AS team, AVG(oreb_home) AS avg_oreb_home, AVG(oreb_away) AS avg_oreb_away, AVG(dreb_away) AS avg_dreb_away, AVG(dreb_home) AS avg_dreb_home FROM game WHERE season_id ='22019' OR season_id ='22020' OR season_id='22022' OR season_id='22023' GROUP BY team, season_id"""
rebound_data=pd.read_sql_query(rebound_query,con)

rebound_data['oreb_diff'] = rebound_data['avg_oreb_home'] - rebound_data['avg_oreb_away']
rebound_data['dreb_diff'] = rebound_data['avg_dreb_home'] - rebound_data['avg_dreb_away']

rebound_data

##Climax:
Home teams had an advantage but the difference decreased in the 2020-21 season, when games were played without fans. A similar trend is observed in the defensive rebounds for home and away games. 

##Falling Action:
The above trends are likely due to factors like crowd influence, fatigue levels, and referee decisions.


In [None]:
#line chart for falling action
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x='Season', y='Offensive Rebounds Difference', marker='o', label='Offensive Rebounds', color='blue')
sns.lineplot(data=df, x='Season', y='Defensive Rebounds Difference', marker='o', label='Defensive Rebounds', color='green')

plt.xlabel('Season', fontsize=12)
plt.ylabel('Difference (Home - Away)', fontsize=12)
plt.title('Trends in Rebound Differences (Home vs Away)', fontsize=14)
plt.grid(visible=True, linestyle='--', alpha=0.7)
plt.axvspan(0.5, 1.5, color='red', alpha=0.2, label='Pandemic (2020-21)')

for i, row in df.iterrows():
    plt.text(i, row['Offensive Rebounds Difference'] + 0.1, f"{row['Offensive Rebounds Difference']:.1f}", ha='center', fontsize=10, color='blue')
    plt.text(i, row['Defensive Rebounds Difference'] + 0.1, f"{row['Defensive Rebounds Difference']:.1f}", ha='center', fontsize=10, color='green')

plt.legend()
plt.tight_layout()
plt.show()

##Conclusion:
From 2019-20 to 2022-23, there are fluctuations in the differences in the offensive and defensive rebounds between home and away teams. The most significant decline in home court advantage occurred in the  2020-21 season, however the levels did not return back to pre-pandemic levels.

In [None]:
#pre-pandemic and post-pandemic comparison
comparison_data = {
    'Rebound Type': ['Offensive Rebounds', 'Defensive Rebounds'],
    'Pre-Pandemic (2019-20)': [2.1, 3.2],
    'Post-Pandemic (2022-23)': [1.9, 2.9]
}
comparison_df = pd.DataFrame(comparison_data)

#grouped bar chart for conclusion
bar_width = 0.4
x = range(len(comparison_df['Rebound Type']))

plt.figure(figsize=(8, 6))
plt.bar(x, comparison_df['Pre-Pandemic (2019-20)'], width=bar_width, label='Pre-Pandemic (2019-20)', color='blue')
plt.bar([i + bar_width for i in x], comparison_df['Post-Pandemic (2022-23)'], width=bar_width, label='Post-Pandemic (2022-23)', color='orange')

plt.xticks([i + bar_width / 2 for i in x], comparison_df['Rebound Type'])
plt.xlabel('Rebound Type', fontsize=12)
plt.ylabel('Difference (Home - Away)', fontsize=12)
plt.title('Comparison of Rebound Differences (Pre vs Post-Pandemic)', fontsize=14)
plt.legend()
plt.tight_layout()
plt.show()


#Question 4:How does the average number of three-point field goals made by home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (fg3_pct_home and fg3_pct_away columns in game table)

##Introduction:
Three-point shots are an important part of modern basketball, which could be heavily influenced by home court advantage. We explore how the average three-point goals changed over the 2019-20 through 2022-23 NBA seasons.


##Rising Action:
We extract and compute the difference in average three-point field goals between home and away teams for seasons 2019-20 and 2022-23, which provide insights int the shooting efficiency trends over time.


In [None]:
fg3_diff_19= """ SELECT season_id, AVG(fg3_pct_home)- AVG(fg3_pct_away) AS fg3_diff FROM game WHERE season_id='22019' """
fg3_diff_19_data=pd.read_sql_query(fg3_diff_19, con)

fg3_diff_22= """ SELECT season_id, AVG(fg3_pct_home)- AVG(fg3_pct_away) AS fg3_diff FROM game WHERE season_id='22022' """
fg3_diff_22_data=pd.read_sql_query(fg3_diff_22, con)


fg3_diff_19_data
fg3_diff_22_data

In [None]:
#bar chart for rising action
bar_width = 0.4
x = range(len(dataframe['Season']))

plt.figure(figsize=(10, 6))
plt.bar(x, df['Home FG3%'], width=bar_width, label='Home', color='blue')
plt.bar([i + bar_width for i in x], dataframe['Away FG3%'], width=bar_width, label='Away', color='orange')

plt.xlabel('Season', fontsize=12)
plt.ylabel('Average FG3%', fontsize=12)
plt.title('Three-Point Field Goal Percentage by Season (Home vs Away)', fontsize=14)
plt.xticks([i + bar_width / 2 for i in x], dataframe['Season'])
plt.legend()
plt.tight_layout()
plt.show()

##Climax:
In the 2019-20 season, home teams had a slight advantage in three-point shooting with an average FG3% difference of 0.0074. In 2022-23, the home advantage slightly increased to 0.0115.


In [None]:
#line chart for climax
dataframe['Difference'] = dataframe['Home FG3%'] - dataframe['Away FG3%']
plt.figure(figsize=(10, 6))
sns.lineplot(data=dataframe, x='Season', y='Difference', marker='o', color='green', label='Difference (Home - Away)')
plt.xlabel('Season', fontsize=12)
plt.ylabel('Difference in FG3%', fontsize=12)
plt.title('Difference in Three-Point FG% (Home vs Away)', fontsize=14)

plt.axvspan(0.5, 1.5, color='red', alpha=0.2, label='Pandemic (2020-21)')

for i, diff in enumerate(df['Difference']):
    plt.text(i, diff + 0.0005, f"{diff:.4f}", ha='center', fontsize=10)

plt.legend()
plt.tight_layout()
plt.show()

##Falling Action:
The results suggest that home teams consistently mainatain a slight edge in three-point shooting, which could be due to familiarity with the court, crowd support, pandemic effects. However, the difference is very miniscule and not that significant, whcih suggests that three-point shooting may be less affected by location than other factors. 

##Conclusion:
From 2019-20 to 2022-23, home teams mainatained slight but consistent advantage in three-point shooting percentage over away teams. This advantage increases marginally post-pandemic.


#Question 5:How does the free throw percentage of home teams compare to that of away teams from the 2019-20 season to the 2022-23 season? (ft_pct_home and ft_pct_away in the game table)

##Introduction:
Free throws give teams the opportunity to score uncontested points. Home court advantage could influence free throw percentage due to crwod noise or comfortable environment. We can compare the percentages to uncover any significant trends.


In [None]:
#box plot for introduction
plt.figure(figsize=(10, 6))
sns.boxplot(x='Location', y='Free Throw Percentage', data=df, hue='Season', palette='pastel')
plt.xlabel('Location', fontsize=12)
plt.ylabel('Free Throw Percentage', fontsize=12)
plt.title('Free Throw Percentage Distribution (Home vs Away)', fontsize=14)
plt.legend(title='Season', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

##Rising Action:
We retrieve minimum, maximum and total free throw percentages for home and away teams for 2019-20 and 2022-23 which gives comprehensive view of FT percentages across different locations and seasons.


In [None]:
ft_pct_query= """ SELECT season_id, MIN(ft_pct_home) AS min_ft_pct_home, MIN(ft_pct_away) AS min_ft_pct_away, MAX(ft_pct_home) AS max_ft_pct_home, MAX(ft_pct_away) AS max_ft_pct_away, COUNT(*) AS games_played FROM game WHERE season_id= '22019'""" 
ft_pct_data_19= pd.read_sql_query(ft_pct_query, con)

ft_pct_data_19

In [None]:
ft_pct_query= """ SELECT season_id, MIN(ft_pct_home) AS min_ft_pct_home, MIN(ft_pct_away) AS min_ft_pct_away, MAX(ft_pct_home) AS max_ft_pct_home, MAX(ft_pct_away) AS max_ft_pct_away, COUNT(*) AS games_played FROM game WHERE season_id= '22022'""" 
ft_pct_data_22= pd.read_sql_query(ft_pct_query, con)

ft_pct_data_22

##Climax:
For 2019-20, maximum free throw percentages were identical for home and away teams=1.0. For 2022-23, maximum free throw percentages remained equal=1.0.

##Falling Action:
Data indicates that while maximum free throw percentages remain consistent, home teams tend to have slightly higher minimum free throw percentage.


##Conclusion:
We can conclude that upper end is comparable between hoem and away teams, but home teams show a higher minimum free throw percentage which emphasizes the home court advantage.

In [None]:
#bar chart for conclusion
bar_width = 0.4
x = range(len(min_df['Season']))

plt.figure(figsize=(10, 6))
plt.bar(x, min_df['Home Minimum FT%'], width=bar_width, label='Home', color='blue')
plt.bar([i + bar_width for i in x], min_df['Away Minimum FT%'], width=bar_width, label='Away', color='orange')
plt.xlabel('Season', fontsize=12)
plt.ylabel('Minimum Free Throw Percentage', fontsize=12)
plt.title('Minimum Free Throw Percentage (Home vs Away)', fontsize=14)
plt.xticks([i + bar_width / 2 for i in x], min_df['Season'])
plt.legend()
plt.tight_layout()
plt.show()