In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo, cumestatsplayer
from nba_api.stats.endpoints import playergamelog

In [27]:
# read player names by command line
lst = ["Jamal Murray", "Nikola Jokic", "Kentavious Caldwell", "Paul George", "Mason Plumlee"]
lg = "00"

In [6]:
plyrs = players.get_players()

In [22]:
plyr_names = map(lambda y: y["full_name"], filter(lambda x: x['is_active'] == True, plyrs))

In [23]:
plyr_names = list(plyr_names)

# Player Search
* Can find `player_id` by Full Name
* DataBase of Player names can be quirky, maybe a regex?
* TODO:: Review Docstring for method to determine unexpected behavior (0 matches/multiple matches)

In [28]:
r = {nm : players.find_players_by_full_name(nm)[0]["id"] for nm in lst}
r

{'Jamal Murray': 1627750,
 'Nikola Jokic': 203999,
 'Kentavious Caldwell': 203484,
 'Paul George': 202331,
 'Mason Plumlee': 203486}

In [29]:
dat = {nm : playergamelog.PlayerGameLog(player_id=r[nm], season="2023").get_data_frames()[0] for nm in r.keys()}

Each item in this dictionary is a `{name : pandas DataFrame}` pair. The DataFrame `info()` output is below

In [5]:
pg = dat["Paul George"]
pg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61 entries, 0 to 60
Data columns (total 27 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   SEASON_ID        61 non-null     object 
 1   Player_ID        61 non-null     int64  
 2   Game_ID          61 non-null     object 
 3   GAME_DATE        61 non-null     object 
 4   MATCHUP          61 non-null     object 
 5   WL               61 non-null     object 
 6   MIN              61 non-null     int64  
 7   FGM              61 non-null     int64  
 8   FGA              61 non-null     int64  
 9   FG_PCT           61 non-null     float64
 10  FG3M             61 non-null     int64  
 11  FG3A             61 non-null     int64  
 12  FG3_PCT          61 non-null     float64
 13  FTM              61 non-null     int64  
 14  FTA              61 non-null     int64  
 15  FT_PCT           61 non-null     float64
 16  OREB             61 non-null     int64  
 17  DREB             6

# DataFrame Housekeeping
* Current
    * `WL` column is "L" or "W", object dtype. Map to 1:W, 0:L
    * `Matchup` contains opponent detail as well as home/away. Should attempt two RegEx matches in order to determine Home/Away status and then strip the opponent data. Return both pieces of info as a tuple, add each item in tuple to DataFrame. Once a stable method is in place, `git commit` then see how ChatGPT does
    * `GAME_DATE` might convert right into a DateTime object
    * `SEASON_ID` and `Game_ID` are nominal-numeric, unsure of what they could be used for... consult docs? TODO:: find docs


* Future
    * Video Available seems cool, will have to call `help` on library function that retrieves DataFrame

In [30]:
import re

def map_matchup(x):
    homeMatch = re.compile("(\w+) vs. (\w+)")
    awayMatch = re.compile("(\w+) @ (\w+)")
    if homeMatch.match(x):
        return ("Home", homeMatch.match(x).groups()[-1])
    elif awayMatch.match(x):
        return ("Away", awayMatch.match(x).groups()[-1])
    else:
        return None

In [31]:
def map_wl(x):
    if x == "W":
        return 1
    elif x == "L":
        return 0
    else:
        return None

In [32]:
def map_gamedate(x):
    return pd.to_datetime(x)

In [33]:
def clean_df(df):
    if "MATCHUP" in df.columns:
        res = df.MATCHUP.map(map_matchup)
        df["HomeAway"] = res.map(lambda x: x[0])
        df["Opponent"] = res.map(lambda x: x[1])
        df.drop(columns=["MATCHUP"], inplace=True, axis=1)
    elif "HomeAway" in df.columns and "Opponent" in df.columns:
        if "MATCHUP" in df.columns:
            df.drop(columns=["MATCHUP"], inplace=True, axis=1)
    else:
        print("Issue with DataFrame:\nMissing MATCHUP column\nNeed to re-query API")
    df["WL"] = df["WL"].map(map_wl)
    df["GameDate"] = df["GAME_DATE"].map(map_gamedate)
    return df

In [34]:
d = dict(
            zip(dat.keys(), list( map(clean_df, dat.values()) ))
        )

In [28]:
d.keys()

dict_keys(['Jamal Murray', 'Nikola Jokic', 'Kentavious Caldwell', 'Paul George', 'Mason Plumlee'])

In [None]:
fig, axes = plt.subplots(5)
plt.subplots_adjust(hspace=1.5)
for i in range(5):
    axes[i].hist(dat[i].PTS)
    axes[i].set_title(lst[i])

In [55]:
from dash import Dash, html, dcc, callback, Output, Input, State
import plotly.express as px

In [38]:
stats = ['MIN', 'FGM',
       'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
       'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
       'PLUS_MINUS']

In [68]:
df = d["Paul George"]

app = Dash("Player Viz")

app.layout = html.Div([
    html.Div([dcc.Dropdown(id="stat", options=stats, value="PTS")], id="stat_controls"),
    html.Div([dcc.Dropdown(id="player", options=[{"label": nm, "value": nm} for nm in lst], value=lst[0])], id="plyr_controls"),
    html.Button("Add Stat", id="stat_add"),
    html.Button("Remove Stat", id="stat_remove"),
    html.Button("Add Player", id="player_add"),
    html.Button("Remove Player", id="player_remove"),
    dcc.Graph(figure = {}, id="controls-and-graph")
])

@callback(
        Output("stat_controls", "children"),
        Input("stat_add", "n_clicks"),
        Input("stat_remove", "n_clicks"),
        State("stat_controls", "children"),
        allow_duplicate=True
)
def add_stats(y_clicks, n_clicks, curr_children):
    if y_clicks:
        return dcc.Dropdown(id="stat", options=stats, value=stats[0])
    elif n_clicks:
        if curr_children:
            curr_children.pop()
            return curr_children
        else:
            return []

    
@callback(
    Output(component_id="controls-and-graph", component_property="figure"),
    Input(component_id="player", component_property="value"),
    Input(component_id="stat", component_property="value")
)
def update_graph(player, stat):
    df = d[player]
    fig = px.line(df, x="GameDate", y=stat, title=f"{player} {stat} by Game")
    return fig

In [69]:
app.run(debug=True)

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[68], line 39, in update_graph(player=None, stat=None)
     33 @callback(
     34     Output(component_id="controls-and-graph", component_property="figure"),
     35     Input(component_id="player", component_property="value"),
     36     Input(component_id="stat", component_property="value")
     37 )
     38 def update_graph(player, stat):
---> 39     df = d[player]
        df =    SEASON_ID  Player_ID     Game_ID     GAME_DATE  WL  MIN  FGM  FGA  FG_PCT  \
0      22023     202331  0022300982  MAR 17, 2024   0   35    9   20   0.450   
1      22023     202331  0022300964  MAR 15, 2024   0   41   10   21   0.476   
2      22023     202331  0022300956  MAR 14, 2024   1   35   11   12   0.917   
3      22023     202331  0022300944  MAR 12, 2024   0   35    5   11   0.455   
4      22023     202331  0022300917  MAR 09, 2024   1   

# Some ungodly shit ig

In [None]:
from statsmodels.distributions.empirical_distribution import ECDF

In [None]:
denCDF = [ECDF(dat[i].PTS) for i in range (3)]

In [None]:
from statsmodels.distributions.copula.api import CopulaDistribution, GumbelCopula

In [None]:
jtden = CopulaDistribution(copula=GumbelCopula(), marginals=denCDF)

In [None]:
jtden.pdf([25,25,25])