In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy.stats import linregress

# Store filepaths in a variables
file_one = "../resources/games.csv"

# Read all data files with the pandas library
games_df= pd.read_csv(file_one, encoding="ISO-8859-1")
games_df.sample(25)

In [None]:
# look for missing values
missing_values_count = games_df.isnull().sum()
missing_values_count[0:50]

In [None]:
# determine if dataframe is missing data
total_cells = np.product(games_df.shape)
total_missing = missing_values_count.sum()

# Present
(total_missing/total_cells) * 100

In [None]:
# build dataframes from Free Throw (FT) percentages and average
FTPCTHOME_shooting_mean_by_season = games_df.groupby('SEASON')['FT_PCT_home'].mean()
FTPCTAWAY_shooting_mean_by_season = games_df.groupby('SEASON')['FT_PCT_away'].mean()

# build dataframes from total Points Scored and average
AVGPTS_away_by_season = games_df.groupby('SEASON')['PTS_away'].mean()
AVGPTS_home_by_season = games_df.groupby('SEASON')['PTS_home'].mean()

# build dataframes from 3PT shooting percentage and average
FG3HOME_shooting_mean_by_season = games_df.groupby('SEASON')['FG3_PCT_home'].mean()
FG3AWAY_shooting_mean_by_season = games_df.groupby('SEASON')['FG3_PCT_away'].mean()

# build dataframes from total wins and count
WINHOME_by_season = games_df.groupby('SEASON')['HOME_TEAM_WINS'].sum()
WINTOTAL_by_season = games_df.groupby('SEASON')['HOME_TEAM_WINS'].count()

# calculate winning home percentage
WINPCTHOME_by_season = WINHOME_by_season/WINTOTAL_by_season

# assemble single summary dataframe
Summary_df = pd.DataFrame({"WIN% HOME": WINPCTHOME_by_season, 
                           "AVG PTS HOME": AVGPTS_home_by_season, "AVG PTS AWAY":                                           AVGPTS_away_by_season, 
                           "FT% AWAY AVG": FTPCTAWAY_shooting_mean_by_season, "FT% HOME AVG":                               FTPCTHOME_shooting_mean_by_season, 
                           "FG3%HOME AVG": FG3HOME_shooting_mean_by_season, "FG3%AWAY AVG":                                 FG3AWAY_shooting_mean_by_season})

# present
Summary_df

In [None]:
#create plot, title, and labels for bar chart
plt.title('Home Team PCT% by from 2003 - 2019')
x_series =[2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]
ticks = [value for value in x_series]
plt.xlabel('Season')
plt.ylabel('Home Team Wins (PCT%)')
plt.bar(list(Summary_df.index), Summary_df['WIN% HOME'],label ='Home Win PCT%', align='center', alpha=0.5)
plt.ylim(.54,.65)
plt.xticks(ticks, list(Summary_df.index), rotation="vertical")
plt.savefig("../output/Home Team PCT.png")
plt.show()
print(f"The Average Home Team Winning Percent for all Seasons 2003 - 2019 is {Summary_df['WIN% HOME'].mean()}")

In [None]:
#create plot, title, and labels for line chart
plt.title('Average FG3% Shooting Comparison')
plt.plot(FG3HOME_shooting_mean_by_season, label='Home')
plt.plot(FG3AWAY_shooting_mean_by_season, label='Away')
plt.xlabel('Season')
plt.ylabel('Percent FG3 Average')
plt.legend()
plt.savefig("../output/Average FG3 compared.png")
plt.show()
print(f"The Average Home FG3 Shooting Percent for all Seasons 2003 - 2019 is {Summary_df['FG3%HOME AVG'].mean()}")
print(f"The Average Away FG3 Shooting Percent for all Seasons 2003 - 2019 is {Summary_df['FG3%AWAY AVG'].mean()}")

In [None]:
#create plot, title, and labels for line chart
plt.title('Average FT% Shooting Comparison')
plt.plot(FTPCTHOME_shooting_mean_by_season, label = 'Home')
plt.plot(FTPCTAWAY_shooting_mean_by_season, label = 'Away')
plt.xlabel('Season')
plt.ylabel('Percent FT Average')
plt.legend()
plt.savefig("../output/Average FT comparison.png")
plt.show()
print(f"The Average Home FT Shooting Percent for all Seasons 2003 - 2019 is {Summary_df['FT% HOME AVG'].mean()}")
print(f"The Average Away FT Shooting Percent for all Seasons 2003 - 2019 is {Summary_df['FT% AWAY AVG'].mean()}")

In [None]:
#create plot, title, and labels for scatter plot chart
plt.title('Home Team FG3% vs Home Team Win Percent')
plt.scatter(FG3HOME_shooting_mean_by_season, Summary_df['WIN% HOME'])
plt.ylabel('Home Team Wins Percent')
plt.xlabel('FG3 Percent Average')

#set up linear regression model function
Correlation_Coeff = st.pearsonr(FG3HOME_shooting_mean_by_season, Summary_df['WIN% HOME'])[0]
Linear_Regress = st.linregress(FG3HOME_shooting_mean_by_season, Summary_df['WIN% HOME'])
LinValues = FG3HOME_shooting_mean_by_season * Linear_Regress[0] + Linear_Regress[1]

#create scatter plot with above data
plt.plot(FG3HOME_shooting_mean_by_season,LinValues,"r-")
plt.savefig("../output/Home FG3 Shooting to Win Percent.png")
plt.show()

print(f"The correlation between FG3%HOME and HOME_TEAM_WINS {Correlation_Coeff}")
print(f"The slope of the liner regression model between FG3%HOME and HOME_TEAM_WINSFG3%HOME and HOME_TEAM_WINS is {Linear_Regress[0]} and the y intercept is {Linear_Regress[1]}")

In [None]:
#create plot, title, and labels for scatter plot chart
plt.title('Home Team FT% vs Home Team Win Percent')
plt.scatter(FTPCTHOME_shooting_mean_by_season, Summary_df['WIN% HOME'])
plt.ylabel('Home Team Wins Percent')
plt.xlabel('FT% Percent Average')

#set up linear regression model function
Correlation_Coeff = st.pearsonr(FTPCTHOME_shooting_mean_by_season, Summary_df['WIN% HOME'])[0]
Linear_Regress = st.linregress(FTPCTHOME_shooting_mean_by_season, Summary_df['WIN% HOME'])
LinValues = FTPCTHOME_shooting_mean_by_season * Linear_Regress[0] + Linear_Regress[1]

#create scatter plot with above data
plt.plot(FTPCTHOME_shooting_mean_by_season,LinValues,"r-")
plt.savefig("../output/Home FT Shooting to Win Percent.png")
plt.show()

print(f"The correlation between FT% HOME and HOME_TEAM_WINS {Correlation_Coeff}")
print(f"The slope of the liner regression model between FT% HOME and HOME_TEAM_WINS is {Linear_Regress[0]} and the y intercept is {Linear_Regress[1]}")


In [None]:
#create plot, title, and labels for scatter plot chart
plt.title('Away Team FG3% vs Home Team Win Percent')
plt.scatter(FG3AWAY_shooting_mean_by_season, Summary_df['WIN% HOME'])
plt.ylabel('Home Team Wins Percent')
plt.xlabel('Away Team FG3% Percent Average')


#set up linear regression model function
Correlation_Coeff = st.pearsonr(FG3HOME_shooting_mean_by_season, Summary_df['WIN% HOME'])[0]
Linear_Regress = st.linregress(FG3HOME_shooting_mean_by_season, Summary_df['WIN% HOME'])
LinValues = FG3HOME_shooting_mean_by_season * Linear_Regress[0] + Linear_Regress[1]

#create scatter plot with above data
plt.plot(FG3HOME_shooting_mean_by_season,LinValues,"r-")
plt.show()

print(f"The correlation between FG3% AWAY and HOME_TEAM_WINS {Correlation_Coeff}")
print(f"The slope of the liner regression model between FG3% AWAY and HOME_TEAM_WINS is {Linear_Regress[0]} and the y intercept is {Linear_Regress[1]}")

In [None]:
#create plot, title, and labels for scatter plot chart
plt.title('Away Team FT% vs Home Team Win Percent')
plt.scatter(FTPCTAWAY_shooting_mean_by_season, Summary_df['WIN% HOME'])
plt.ylabel('Home Team Wins Percent')
plt.xlabel('Away Team FT% Percent Average')


#set up linear regression model function
Correlation_Coeff = st.pearsonr(FTPCTAWAY_shooting_mean_by_season, Summary_df['WIN% HOME'])[0]
Linear_Regress = st.linregress(FTPCTAWAY_shooting_mean_by_season, Summary_df['WIN% HOME'])
LinValues = FTPCTAWAY_shooting_mean_by_season * Linear_Regress[0] + Linear_Regress[1]

#create scatter plot with above data
plt.plot(FTPCTAWAY_shooting_mean_by_season,LinValues,"r-")
plt.savefig("../output/Away FT Shooting to Win Percent.png")
plt.show()

print(f"The correlation between FT% AWAY and HOME_TEAM_WINS {Correlation_Coeff}")
print(f"The slope of the liner regression model between FT% AWAY and HOME_TEAM_WINS is {Linear_Regress[0]} and the y intercept is {Linear_Regress[1]}")

In [None]:
#create plot, title, and labels for scatter plot chart
plt.title('Away Team FG3% vs Average Home Team Points')
plt.scatter(FG3AWAY_shooting_mean_by_season, AVGPTS_home_by_season)
plt.ylabel('Average Home Team Points')
plt.xlabel('FG3% Percent Average')


#set up linear regression model function
Correlation_Coeff = st.pearsonr(FG3AWAY_shooting_mean_by_season, AVGPTS_home_by_season)[0]
Linear_Regress = st.linregress(FG3AWAY_shooting_mean_by_season, AVGPTS_home_by_season)
LinValues = FG3AWAY_shooting_mean_by_season * Linear_Regress[0] + Linear_Regress[1]

#create scatter plot with above data
plt.plot(FG3AWAY_shooting_mean_by_season,LinValues,"r-")
plt.savefig("../output/Away Team FG3 Shooting to Win Percent.png")
plt.show()

print(f"The correlation between FG3% AWAY and HOME TEAM POINTS {Correlation_Coeff}")
print(f"The slope of the liner regression model between FG3% AWAY and HOME TEAM POINTS is {Linear_Regress[0]} and the y intercept is {Linear_Regress[1]}")


In [None]:
#create plot, title, and labels for scatter plot chart
plt.title('Away Team FT% vs Average Home Team Points')
plt.scatter(FTPCTAWAY_shooting_mean_by_season, AVGPTS_home_by_season)
plt.ylabel('Average Away Team Points')
plt.xlabel('FT% Percent Average')


#set up linear regression model function
Correlation_Coeff = st.pearsonr(FTPCTAWAY_shooting_mean_by_season, AVGPTS_home_by_season)[0]
Linear_Regress = st.linregress(FTPCTAWAY_shooting_mean_by_season, AVGPTS_home_by_season)
LinValues = FTPCTAWAY_shooting_mean_by_season * Linear_Regress[0] + Linear_Regress[1]

#create scatter plot with above data
plt.plot(FTPCTAWAY_shooting_mean_by_season,LinValues,"r-")
plt.show()

print(f"The correlation between FT% AWAY and HOME TEAM POINTS {Correlation_Coeff}")
print(f"The slope of the liner regression model between FT% AWAY and HOME TEAM POINTS is {Linear_Regress[0]} and the y intercept is {Linear_Regress[1]}")
