In [None]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, chi2_contingency
import matplotlib.pyplot as plt
import seaborn as sns

import codecademylib3
np.set_printoptions(suppress=True, precision = 2)

nba = pd.read_csv('./nba_games.csv')

# Subset Data to 2010 Season, 2014 Season
nba_2010 = nba[nba.year_id == 2010]
nba_2014 = nba[nba.year_id == 2014]

print(nba_2010.head())
print(nba_2014.head())

#Create the two series from 2010
knicks_pts = nba_2010.pts[nba.fran_id=='Knicks']
nets_pts = nba_2010.pts[nba.fran_id=='Nets']

#Difference between average points from 2010
knicks_pts_mean = knicks_pts.mean()
nets_pts_mean = nets_pts.mean()
diff_means_2010 = knicks_pts_mean - nets_pts_mean
print(diff_means_2010)

#Histograms
plt.hist(knicks_pts , color="blue", label="Knicks", \
normed=True, alpha=0.5)
plt.hist(nets_pts , color="red", label="Nets", \
normed=True, alpha=0.5)
plt.legend()
plt.show()

#Create the two series from 2014
knicks_pts_2014 = nba_2014.pts[nba.fran_id=='Knicks']
nets_pts_2014 = nba_2014.pts[nba.fran_id=='Nets']

#Difference between average points from 2010
knicks_pts_mean_2014 = knicks_pts_2014.mean()
nets_pts_mean_2014 = nets_pts_2014.mean()
diff_means_2014 = knicks_pts_mean_2014 - nets_pts_mean_2014
print(diff_means_2014)

#Boxplots for 2010 data
sns.boxplot(data = nba_2010, x = 'fran_id', y = 'pts')
plt.show()

#Table of frequencies from 2010
location_result_freq = pd.crosstab(nba_2010.game_result, \
nba_2010.game_location)
print(location_result_freq)

#Table of proportions from 2010
location_result_proportions = location_result_freq / len(nba_2010)
print(location_result_proportions)

#Chi-square
from scipy.stats import chi2_contingency
chi2, pval, dof, expected = chi2_contingency(location_result_freq)
print(expected)
print(chi2)

#Covariance
nba_2010_cov = np.cov(nba_2010.forecast, nba_2010.point_diff)
print(nba_2010_cov)

#Correlation
forecast_point_diff_corr, p = pearsonr(nba_2010.forecast, \
nba_2010.point_diff)
print(forecast_point_diff_corr)

#Scatter plot
plt.clf()
plt.scatter(x = nba_2010.forecast, y = nba_2010.point_diff)
plt.xlabel('Forecast')
plt.ylabel('Point Diff')
plt.show()