-
Notifications
You must be signed in to change notification settings - Fork 1
/
NBA.py
293 lines (236 loc) · 9.02 KB
/
NBA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
#!/usr/bin/env python
# coding: utf-8
import plotly.express as px
import streamlit as st
from nba_api.stats.static import players, teams
from pandas import DataFrame, concat
# from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.preprocessing import OrdinalEncoder
import datetime
from interfaces.nba_stats import fetch_data_with_delays, fetch_team_data_with_delays
from NBA_helpers import clean_df
# dummy data below
#region
# Example team and player setup
# team_ids = {"Lakers": "1610612747", "Nuggets": "1610612743"}
# player_names = [
# "Jamal Murray",
# "Nikola Jokic",
# "Kentavious Caldwell",
# "Paul George",
# "Mason Plumlee",
# ]
#endregion
# Define stats options
player_stats = [
"MIN",
"FGM",
"FGA",
"FG_PCT",
"FG3M",
"FG3A",
"FG3_PCT",
"FTM",
"FTA",
"FT_PCT",
"OREB",
"DREB",
"REB",
"AST",
"STL",
"BLK",
"TOV",
"PF",
"PTS",
"PLUS_MINUS",
]
team_stats = [
"W",
"L",
"W_PCT",
"PTS",
"REB",
"AST",
"FG_PCT",
"FG3_PCT",
"FT_PCT",
"STL",
"BLK",
"TOV",
"PF",
]
team_data = teams.get_teams()
Teams_IDs = {team["full_name"]: team["id"] for team in team_data}
Team_Dict = {}
player_data = players.get_players()
active_players = [player for player in player_data if player["is_active"]]
Player_IDs = {player["full_name"]: player["id"] for player in active_players}
Player_Dict = {}
# Dirty Session State Caching
# region
st.session_state.Team_Dict = Team_Dict
st.session_state.Player_Dict = Player_Dict
st.session_state.Teams_IDs = Teams_IDs
st.session_state.Player_IDs = Player_IDs
st.session_state.player_stats = player_stats
st.session_state.active_players = active_players
# endregion
# streamlit helper functions
#region
# check if key is in session state, if not add it
# useful for updating session state with persistence without needless updates
def update_session_state(key, variable):
if key not in st.session_state or variable != st.session_state[key]:
st.session_state[key] = variable
#endregion
# Data fetching functions
#region
@st.cache_data
def get_team_data(team_name):
tmp = fetch_team_data_with_delays({team_name: Teams_IDs[team_name]})
tmp_cln = {team_name : clean_df(tmp[team_name])}
Team_Dict.update(tmp_cln)
return Team_Dict
@st.cache_data
def get_player_data(player_name):
tmp = fetch_data_with_delays({player_name: Player_IDs[player_name]})
tmp_cln = {player_name : clean_df(tmp[player_name])}
Player_Dict.update(tmp_cln)
return Player_Dict
#endregion
def date_slider(dates):
min_date = dates.min().to_pydatetime()
max_date = dates.max().to_pydatetime()
step = datetime.timedelta(days=1)
return st.slider("Select Date Range", value=(min_date,max_date), step=step, format='MMM DD, YYYY')
# Fetch game log data for teams with delays
team_data = fetch_team_data_with_delays(Teams_IDs)
st.title("NBA Visualization")
page_select = st.selectbox("Select Page", ["Simple Graphs", "ML Models",
"Simulations"], index=0)
if page_select == "Simple Graphs":
st.write("Simple Graphs Page")
# Toggle between Player and Team Stats
view_mode = st.radio("View Mode", ["Player Stats", "Team Stats"])
# ML methods should be made available inside of an ML Specific view
# region
# """
# knn = st.checkbox("K-Nearest Neighbors Clustering")
# svc = st.checkbox("Support Vector Machine")
# kmn = st.checkbox("K-Means Clustering") """
# Lets not fetch data dynamically, the only advantage that will provide is for
# streaming; that should be a premium feauture of our end product
# Also, I think this current view_mode branching makes the code a bit long unnecessarily
# endregion
def view_team_stats(Team_Dict=Team_Dict):
selected_stat = st.selectbox("Select Team Stat", team_stats)
selected_team = st.selectbox("Select Team", list(Teams_IDs.keys()))
if selected_team not in Team_Dict.keys():
Team_Dict = get_team_data(selected_team)
def plot_team_data(team, stat):
df = Team_Dict[team]
if df.empty:
st.error(f"No data available for {team}. Please try another team.")
return None
if stat not in df.columns:
st.error(f"Stat {stat} not available. Please choose another stat.")
return None
fig = px.line(
df,
x="GAME_DATE", # Make sure this column exists and is in the correct format
y=stat, # This will now reference an existing column correctly
title=f"{team} {stat} by Game",
)
return fig
return (selected_team, selected_stat, plot_team_data)
def view_player_stats(Player_Dict=Player_Dict):
selected_stat = st.selectbox("Select Stat", player_stats)
selected_player = st.selectbox("Select Player", list(Player_IDs.keys()))
if not selected_player in Player_Dict.keys():
Player_Dict = get_player_data(selected_player)
Player_df = Player_Dict[selected_player]
slider = date_slider(Player_df["GameDate"])
def plot_data(player, stat):
df = Player_Dict[player]
df = df[(df["GameDate"] >= slider[0]) & (df["GameDate"] <= slider[1])]
if df.empty:
st.error(f"No data available for {player}. Please try another player.")
return None
fig = px.scatter(
df,
x="GameDate",
y=stat,
color="WL",
color_discrete_sequence=["red", "green"],
title=f"{player} {stat} by Game",
)
return fig
return (selected_player, selected_stat, plot_data)
if view_mode == "Player Stats":
selection, selected_stat, plot_data = view_player_stats()
elif view_mode == "Team Stats":
selection, selected_stat, plot_data = view_team_stats()
fig = plot_data(selection, selected_stat)
st.plotly_chart(fig)
elif page_select == "ML Models":
import ML_Model_Page
if ML_Model_Page.setup():
ML_Model_Page.is_Player()
else:
st.write("Team Model Prediction Needed")
# #region
# i = 0
# select_stat = st.selectbox("Select Input", player_stats)
# if not select_obj in Player_Dict.keys():
# Player_Dict = get_player_data(select_obj)
# df = Player_Dict[select_obj]
# X = concat([X, df[select_stat]], axis=1)
# with col1:
# txt = st.write(select_stat, key=f"stat_{i}")
# with col2:
# bt = st.button("Dead Code-Remove Stat")
# st.write(X.columns)
# return (txt, bt)
# inputs.append(select_X())
#endregion
elif page_select == "Simulations":
st.write("Simulations Page")
# region
# else:
# Only fetch team data if the Team Stats view is selected
# fig = plot_team_data(selected_team, selected_stat)
# st.plotly_chart(fig)
# Below is dead code for KNN and SVC, I think we should have a separate view for ML methods
# """
# if knn:
# # Fit a KNN model to the data
# e = OrdinalEncoder()
# dt = e.fit_transform(df['GameDate'].to_numpy().reshape(1, -1))
# m = KNeighborsClassifier(n_neighbors=2)
# tar = df["WL"].apply(lambda x: 1 if "W" else 0).to_numpy().reshape(-1, 1)
# m.fit(concat([dt, df[stat]], axis=1), tar)
# y_pred = m.predict(tar)
# df["Predicted"] = y_pred
# DecisionBoundaryDisplay.from_estimator(m,
# df[[stat, dt]],
# response_method="predict",
# ax=fig
# )
# # SVC is going to work better as it's own view, so you can add >2 params to fit.
# # Would multiclassification add value?
# elif svc:
# # Fit a Support Vector Machine model to the data
# m = SVC(kernel="linear")
# tar = df["WL"].apply(lambda x: 1 if "W" else 0).to_numpy().reshape(-1, 1)
# m.fit(df[[stat]], tar)
# y_pred = m.predict(tar)
# df["Predicted"] = y_pred
# DecisionBoundaryDisplay.from_estimator(m,
# df[["GameDate", stat]],
# response_method="predict",
# ax=fig)
# """
# endregion