In [1]:

import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns
color = sns.color_palette()
%matplotlib inline

In [None]:
attendance_df = pd.read_csv("../input/nba_2017_attendance.csv");attendance_df.head()

In [None]:
endorsement_df = pd.read_csv("../input/nba_2017_endorsements.csv");endorsement_df.head()


In [None]:
valuations_df = pd.read_csv("../input/nba_2017_team_valuations.csv");valuations_df.head()


In [None]:
salary_df = pd.read_csv("../input/nba_2017_salary.csv");salary_df.head()

In [None]:
pie_df = pd.read_csv("../input/nba_2017_pie.csv");pie_df.head()

In [None]:
plus_minus_df = pd.read_csv("../input/nba_2017_real_plus_minus.csv");plus_minus_df.head()


In [None]:
br_stats_df = pd.read_csv("../input/nba_2017_br.csv");br_stats_df.head()


In [None]:
elo_df = pd.read_csv("../input/nba_2017_elo.csv");elo_df.head()


In [None]:
attendance_valuation_df = attendance_df.merge(valuations_df, how="inner", on="TEAM")

In [None]:
attendance_valuation_df.head()


In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"));sns.pairplot(attendance_valuation_df, hue="TEAM")

In [None]:
corr = attendance_valuation_df.corr()
sns.heatmap(corr, 
            xticklabels=corr.columns.values,
            yticklabels=corr.columns.values)

In [None]:
valuations = attendance_valuation_df.pivot("TEAM", "AVG", "VALUE_MILLIONS")



In [None]:
plt.subplots(figsize=(20,15))
ax = plt.axes()
ax.set_title("NBA Team AVG Attendance vs Valuation in Millions:  2016-2017 Season")
sns.heatmap(valuations,linewidths=.5, annot=True, fmt='g')

In [None]:
results = smf.ols('VALUE_MILLIONS ~AVG', data=attendance_valuation_df).fit()


In [None]:
print(results.summary())


In [None]:
sns.residplot(y="VALUE_MILLIONS", x="AVG", data=attendance_valuation_df)


In [None]:
attendance_valuation_elo_df = attendance_valuation_df.merge(elo_df, how="inner", on="TEAM")


In [None]:
attendance_valuation_elo_df.head()


In [None]:
corr_elo = attendance_valuation_elo_df.corr()
plt.subplots(figsize=(20,15))
ax = plt.axes()
ax.set_title("NBA Team Correlation Heatmap:  2016-2017 Season (ELO, AVG Attendance, VALUATION IN MILLIONS)")
sns.heatmap(corr_elo, 
            xticklabels=corr_elo.columns.values,
            yticklabels=corr_elo.columns.values)

In [None]:
corr_elo


In [None]:

ax = sns.lmplot(x="ELO", y="AVG", data=attendance_valuation_elo_df, hue="CONF", size=12)
ax.set(xlabel='ELO Score', ylabel='Average Attendence Per Game', title="NBA Team AVG Attendance vs ELO Ranking:  2016-2017 Season")


In [None]:
attendance_valuation_elo_df.groupby("CONF")["ELO"].median()


In [None]:
attendance_valuation_elo_df.groupby("CONF")["AVG"].median()


In [None]:
results = smf.ols('AVG ~ELO', data=attendance_valuation_elo_df).fit()


In [None]:
print(results.summary())


In [None]:
from sklearn.cluster import KMeans


In [None]:
k_means = KMeans(n_clusters=3)


In [None]:
cluster_source = attendance_valuation_elo_df.loc[:,["AVG", "ELO", "VALUE_MILLIONS"]]


In [None]:
kmeans = k_means.fit(cluster_source)


In [None]:
attendance_valuation_elo_df['cluster'] = kmeans.labels_


In [None]:
ax = sns.lmplot(x="ELO", y="AVG", data=attendance_valuation_elo_df,hue="cluster", size=12, fit_reg=False)
ax.set(xlabel='ELO Score', ylabel='Average Attendence Per Game', title="NBA Team AVG Attendance vs ELO Ranking Clustered on ELO, AVG, VALUE_MILLIONS:  2016-2017 Season")

In [None]:
kmeans.__dict__


In [None]:
kmeans.cluster_centers_


In [None]:
cluster_1 = attendance_valuation_elo_df["cluster"] == 1


In [None]:
attendance_valuation_elo_df[cluster_1]
