## NBA Players
1. **Title** - Visualizing NBA Players on Latent Space

2. **Features**
 - Player_Info
     - DISPLAY_FIRST_LAST
     - BIRTHDATE
     - HEIGHT
     - WEIGHT
     - SCHOOL
     - POSITION
     - JERSEY
 - Career
     - Teams Played
     - Points Per Game
     - Assists Per Game
     - 3 Points Per Game
     - Steals Per Game
     - NBA Titles
     - MVP Titles
 
3. **Ideas**
 - Categorical Embeddings
 - Graphs
 - UMAP for dim reduction
 - Altair interactive for visualization

In [1]:
#!pip install -U pandas

In [2]:
from tqdm import tqdm_notebook
from nba_api.stats.endpoints import commonplayerinfo, playercareerstats
from nba_api.stats.static import players
from nba_api.stats.static import teams
import pandas as pd
import numpy as np
from datetime import datetime
pd.__version__

'1.0.0'

In [3]:
# Find players by full name.
players.find_players_by_full_name('michael jordan')

[{'first_name': 'Michael',
  'full_name': 'Michael Jordan',
  'id': 893,
  'is_active': False,
  'last_name': 'Jordan'}]

In [4]:
# Find players by full name.
players.find_players_by_full_name('shaq')

[{'first_name': 'Shaquille',
  'full_name': 'Shaquille Harrison',
  'id': 1627885,
  'is_active': True,
  'last_name': 'Harrison'},
 {'first_name': 'Shaquille',
  'full_name': "Shaquille O'Neal",
  'id': 406,
  'is_active': False,
  'last_name': "O'Neal"}]

In [5]:
# Find teams by full name.
teams.find_teams_by_full_name('mav')

[{'abbreviation': 'DAL',
  'city': 'Dallas',
  'full_name': 'Dallas Mavericks',
  'id': 1610612742,
  'nickname': 'Mavericks',
  'state': 'Texas',
  'year_founded': 1980}]

In [50]:
## Proxy Config
proxy_config = 'http://localhost:3128'

In [51]:
from nba_api.stats.endpoints import commonplayerinfo
jordan_id = 893
player_info = commonplayerinfo.CommonPlayerInfo(player_id = jordan_id, proxy=proxy_config, timeout=200)

In [52]:
player_info.available_seasons.get_json()

'{"headers": ["SEASON_ID"], "data": [["21984"], ["31984"], ["41984"], ["21985"], ["41985"], ["21986"], ["31986"], ["41986"], ["21987"], ["31987"], ["41987"], ["21988"], ["31988"], ["41988"], ["21989"], ["31989"], ["41989"], ["21990"], ["31990"], ["41990"], ["21991"], ["31991"], ["41991"], ["21992"], ["31992"], ["41992"], ["21994"], ["41994"], ["21995"], ["31995"], ["41995"], ["21996"], ["31996"], ["41996"], ["11997"], ["21997"], ["31997"], ["41997"], ["12001"], ["22001"], ["32001"], ["12002"], ["22002"], ["32002"]]}'

In [53]:
player_info.common_player_info.get_data_frame()

Unnamed: 0,PERSON_ID,FIRST_NAME,LAST_NAME,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FI_LAST,BIRTHDATE,SCHOOL,COUNTRY,LAST_AFFILIATION,...,TEAM_CITY,PLAYERCODE,FROM_YEAR,TO_YEAR,DLEAGUE_FLAG,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER
0,893,Michael,Jordan,Michael Jordan,"Jordan, Michael",M. Jordan,1963-02-17T00:00:00,North Carolina,USA,North Carolina/USA,...,Chicago,michael_jordan,1984,2002,N,Y,Y,1984,1,3


In [54]:
id_num = 893
player_info = commonplayerinfo.CommonPlayerInfo(player_id = id_num,  proxy=proxy_config, timeout=50)
player_info_df = player_info.common_player_info.get_data_frame()
player_info_df = player_info_df.replace(r'^\s*$', pd.NA ,regex=True).replace('',pd.NA)

In [55]:
type(player_info_df['HEIGHT'].notna().item())

bool

In [56]:
def return_player_info(id):
    player_info = commonplayerinfo.CommonPlayerInfo(player_id = id, proxy=proxy_config, timeout=50)
    player_info_df = player_info.common_player_info.get_data_frame()
    
    ## Replace Empty String w/ None
    player_info_df = player_info_df.replace(r'^\s*$', pd.NA ,regex=True).replace('',pd.NA)

    if player_info_df['HEIGHT'].notna().item():
        player_info_df['HEIGHT'] = player_info_df['HEIGHT'].apply(lambda x: x.replace("-",".") )
        
    if player_info_df['WEIGHT'].notna().item():
        player_info_df['WEIGHT'] = player_info_df['WEIGHT'].apply(lambda x: int(x))
    
    data = {
        "DISPLAY_FIRST_LAST": player_info_df['DISPLAY_FIRST_LAST'].item(),
        "PERSON_ID": player_info_df['PERSON_ID'].item(),
        "BIRTHDATE": datetime.strptime(player_info_df['BIRTHDATE'].item().split('T')[0],"%Y-%m-%d"),
        "HEIGHT": player_info_df['HEIGHT'].item(),
        "WEIGHT": player_info_df['WEIGHT'].item(),
        "SCHOOL": player_info_df['SCHOOL'].item(),
        "POSITION": player_info_df['POSITION'].item(),
        "JERSEY": player_info_df['JERSEY'].item() 
    }
    return data

In [57]:
return_player_info(893)

{'BIRTHDATE': datetime.datetime(1963, 2, 17, 0, 0),
 'DISPLAY_FIRST_LAST': 'Michael Jordan',
 'HEIGHT': '6.6',
 'JERSEY': '23',
 'PERSON_ID': 893,
 'POSITION': 'Guard',
 'SCHOOL': 'North Carolina',
 'WEIGHT': 216}

In [58]:
player_name = "Michael Jordan"
return_player_info(players.find_players_by_full_name(player_name)[0]['id'])

{'BIRTHDATE': datetime.datetime(1963, 2, 17, 0, 0),
 'DISPLAY_FIRST_LAST': 'Michael Jordan',
 'HEIGHT': '6.6',
 'JERSEY': '23',
 'PERSON_ID': 893,
 'POSITION': 'Guard',
 'SCHOOL': 'North Carolina',
 'WEIGHT': 216}

In [59]:
player_name = "Shaquille O'Neal"
return_player_info(players.find_players_by_full_name(player_name)[0]['id'])

{'BIRTHDATE': datetime.datetime(1972, 3, 6, 0, 0),
 'DISPLAY_FIRST_LAST': "Shaquille O'Neal",
 'HEIGHT': '7.1',
 'JERSEY': '34',
 'PERSON_ID': 406,
 'POSITION': 'Center',
 'SCHOOL': 'Louisiana State',
 'WEIGHT': 325}

In [60]:
player_name = "Larry Bird"
return_player_info(players.find_players_by_full_name(player_name)[0]['id'])

{'BIRTHDATE': datetime.datetime(1956, 12, 7, 0, 0),
 'DISPLAY_FIRST_LAST': 'Larry Bird',
 'HEIGHT': '6.9',
 'JERSEY': '33',
 'PERSON_ID': 1449,
 'POSITION': 'Forward',
 'SCHOOL': 'Indiana State',
 'WEIGHT': 220}

In [61]:
player_name = "Magic Johnson"
return_player_info(players.find_players_by_full_name(player_name)[0]['id'])

{'BIRTHDATE': datetime.datetime(1959, 8, 14, 0, 0),
 'DISPLAY_FIRST_LAST': 'Magic Johnson',
 'HEIGHT': '6.9',
 'JERSEY': '32',
 'PERSON_ID': 77142,
 'POSITION': 'Forward-Guard',
 'SCHOOL': 'Michigan State',
 'WEIGHT': 220}

In [62]:
return_player_info(1626147)

  mask = arr == x


{'BIRTHDATE': datetime.datetime(1993, 11, 19, 0, 0),
 'DISPLAY_FIRST_LAST': 'Justin Anderson',
 'HEIGHT': <NA>,
 'JERSEY': <NA>,
 'PERSON_ID': 1626147,
 'POSITION': <NA>,
 'SCHOOL': 'Virginia',
 'WEIGHT': <NA>}

## List Players

In [63]:
from nba_api.stats.static import players

In [71]:
active = players.get_active_players()
inactive = players.get_inactive_players()
active_ids = [x['id'] for x in active]
inactive_ids = [x['id'] for x in inactive]
len(active_ids), len(inactive_ids)

(519, 3982)

## Get Carrier info

In [72]:
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.endpoints import playoffpicture
from nba_api.stats.endpoints import  playerawards
from nba_api.stats.endpoints import  playerprofilev2

In [73]:
def return_career_info(id):
    
    career = playercareerstats.PlayerCareerStats(player_id = id, proxy=proxy_config, get_request=True, timeout=200)
    career_df_detailed = career.get_data_frames()[0]
    career_df_summary = career.get_data_frames()[1]
    awards = playerawards.PlayerAwards(id, proxy=proxy_config)
    
    data = {
        "GP":  career_df_summary['GP'].item(),
        "PTS":  career_df_summary['PTS'].item(),
        "FG3M": career_df_summary['FG3M'].item(),
        "REB": career_df_summary['REB'].item(),
        "OREB": career_df_summary['OREB'].item(),
        "DREB": career_df_summary['DREB'].item(),
        "AST": career_df_summary['AST'].item(),
        "STL": career_df_summary['STL'].item(),
        "BLK": career_df_summary['BLK'].item(),
        "TOV": career_df_summary['TOV'].item(),
        "PF": career_df_summary['PF'].item(),      

        "TEAMS_PLAYED": list(career_df_detailed['TEAM_ABBREVIATION'].unique()),
        "AWARDS":  list(awards.get_data_frames()[0]['DESCRIPTION'].unique())
    } 
    return data

In [74]:
### Michael Jordan 893
# Magic Johnson 77142
# Larry Bird 1449
return_career_info(1449)

{'AST': 5695,
 'AWARDS': ['NBA Player of the Week',
  'NBA Sporting News Rookie of the Year',
  'Hall of Fame Inductee',
  'All-Defensive Team',
  'Olympic Gold Medal',
  'All-NBA',
  'NBA Sporting News Most Valuable Player of the Year',
  'All-Rookie Team',
  'NBA Player of the Month',
  'NBA Finals Most Valuable Player',
  'NBA Rookie of the Year',
  'NBA Most Valuable Player',
  'NBA All-Star Most Valuable Player'],
 'BLK': 755,
 'DREB': 7217,
 'FG3M': 649,
 'GP': 897,
 'OREB': 1757,
 'PF': 2279,
 'PTS': 21791,
 'REB': 8974,
 'STL': 1556,
 'TEAMS_PLAYED': ['BOS'],
 'TOV': 2816}

In [75]:
return_career_info(1626147)

{'AST': 143,
 'AWARDS': [],
 'BLK': 69,
 'DREB': 420,
 'FG3M': 138,
 'GP': 219,
 'OREB': 136,
 'PF': 258,
 'PTS': 1157,
 'REB': 556,
 'STL': 97,
 'TEAMS_PLAYED': ['DAL', 'PHI', 'TOT', 'ATL', 'BKN'],
 'TOV': 129}

## DataFrame - Get all Players

In [76]:
id = 893
mj_info = return_player_info(id)
mj_career = return_career_info(id)
all_cols = list(mj_career.keys()) + list(mj_info.keys())
all_cols

['GP',
 'PTS',
 'FG3M',
 'REB',
 'OREB',
 'DREB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'TEAMS_PLAYED',
 'AWARDS',
 'DISPLAY_FIRST_LAST',
 'PERSON_ID',
 'BIRTHDATE',
 'HEIGHT',
 'WEIGHT',
 'SCHOOL',
 'POSITION',
 'JERSEY']

In [77]:
all_player_df = pd.DataFrame(columns= all_cols)
all_player_df

Unnamed: 0,GP,PTS,FG3M,REB,OREB,DREB,AST,STL,BLK,TOV,...,TEAMS_PLAYED,AWARDS,DISPLAY_FIRST_LAST,PERSON_ID,BIRTHDATE,HEIGHT,WEIGHT,SCHOOL,POSITION,JERSEY


In [78]:
all_list = active_ids + inactive_ids

In [79]:
# player_info = return_player_info(id)
# player_career = return_career_info(id)
# entry = dict(player_info, **player_career)
# entry.update(player_career)
# all_player_df = all_player_df.append(entry, ignore_index = True)
# all_player_df

In [80]:
## Adding Active Ids
for i in tqdm_notebook(all_list):
    try:
        player_info = return_player_info(i)
        player_career = return_career_info(i)
        entry = dict(player_info, **player_career)
        entry.update(player_career)
        all_player_df = all_player_df.append(entry, ignore_index = True)
        
    except:
        pass

  mask = arr == x





In [81]:
all_player_df

Unnamed: 0,GP,PTS,FG3M,REB,OREB,DREB,AST,STL,BLK,TOV,...,TEAMS_PLAYED,AWARDS,DISPLAY_FIRST_LAST,PERSON_ID,BIRTHDATE,HEIGHT,WEIGHT,SCHOOL,POSITION,JERSEY
0,515,5020,0,3890,1776,2114,593,452,517,737,...,[OKC],[All-Rookie Team],Steven Adams,203500,1993-07-20,6.11,265,Pittsburgh,Center,12
1,204,2052,4,1529,421,1108,546,166,168,331,...,[MIA],[NBA Player of the Week],Bam Adebayo,1628389,1997-07-18,6.9,255,Kentucky,Center-Forward,13
2,1000,19552,180,8347,2679,5668,1977,729,1106,1569,...,"[POR, SAS]","[NBA Player of the Week, All-NBA, All-Rookie T...",LaMarcus Aldridge,200746,1985-07-19,6.11,250,Texas,Center-Forward,12
3,41,211,40,80,8,72,74,11,7,40,...,[NOP],[],Nickeil Alexander-Walker,1629638,1998-09-02,6.5,205,Virginia Tech,Guard,0
4,68,432,65,89,8,81,68,12,7,56,...,"[UTA, MEM]",[],Grayson Allen,1628960,1995-10-08,6.4,198,Duke,Guard,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4471,19,23,0,35,11,24,4,2,5,3,...,[ORL],[],Stephen Zimmerman,1627757,1996-09-09,7.0,240,UNLV,Center,33
4472,98,458,70,256,28,228,82,35,31,83,...,[CHI],[],Paul Zipser,1627835,1994-02-18,6.8,226,,Forward,16
4473,7,2,0,8,3,5,1,1,3,4,...,[DET],[],Jim Zoet,78647,1953-12-30,7.1,240,Kent State,Center,34
4474,53,118,,46,,,73,,,,...,[MIL],[],Bill Zopf,78648,1948-06-07,6.1,170,Duquesne,Guard,6


In [86]:
!ls

1.text_classifier_roberta.ipynb
1.text_classifier_roberta_NEW.ipynb
1.text_classifier_roberta_datsaset_resampler.ipynb
2.uncertainty_swag.ipynb
[1m[36m2017-06-custom-intent-engines[m[m
3.causality_review.ipynb
4.dpp_diversity_phrases.ipynb
4.dpp_image.ipynb
4.nips2019_papers.ipynb
4.nips2019_papers_simple_altair.ipynb
4.sample_dpp.ipynb
5.causal_inference_resumés.ipynb
5.dowhy_tutorial.ipynb
5.mediation_analysis.ipynb
6.gpytorch_uncertainty.ipynb
6.gpytorch_uncertainty_multiclass_cifar.ipynb
6.gpytorch_uncertainty_text_classifier.ipynb
6.pymc3_gp_classification_text.ipynb
6.pyro_text_classification.ipynb
7.VOGN_toy_example.ipynb
7.uncertainty_vogn.ipynb
7.uncertainty_vogn_initial.ipynb
8.nba_entity_embeddings.ipynb
8.propensity_score_initial.ipynb
all_player_df.pkl
causal_model.png
intent_186.csv
intents_phrases_183.pkl
intents_phrases_186.pkl
model_elmo_swag_uncertainty.pth
nips_2018.pkl
nips_2018_bert.pkl
nips_2018_elmo.pkl
[1m[36mtmp[m[m


In [88]:
with open("all_player_df.pkl", 'wb') as f:
    pickle.dump((all_player_df), f)

In [90]:
!ls *.pkl

all_player_df.pkl       intents_phrases_186.pkl nips_2018_bert.pkl
intents_phrases_183.pkl nips_2018.pkl           nips_2018_elmo.pkl


In [91]:
import pickle

with open("all_player_df.pkl",'rb') as f:
    testin = pickle.load(f)

In [93]:
testin[testin['DISPLAY_FIRST_LAST']=='Michael Jordan']

Unnamed: 0,GP,PTS,FG3M,REB,OREB,DREB,AST,STL,BLK,TOV,...,TEAMS_PLAYED,AWARDS,DISPLAY_FIRST_LAST,PERSON_ID,BIRTHDATE,HEIGHT,WEIGHT,SCHOOL,POSITION,JERSEY
2355,1072,32292,581,6672,1668,5004,5633,2514,893,2924,...,"[CHI, WAS]","[NBA Player of the Month, All-NBA, All-Defensi...",Michael Jordan,893,1963-02-17,6.6,216,North Carolina,Guard,23


## Sources
1. https://towardsdatascience.com/categorical-embedding-and-transfer-learning-dd3c4af6345d
2. https://towardsdatascience.com/link-prediction-with-neo4j-part-2-predicting-co-authors-using-scikit-learn-78b42356b44c  
3. https://medium.com/@davidheffernan_99410/an-introduction-to-using-categorical-embeddings-ee686ed7e7f9  
4. https://medium.com/@george.drakos62/decoded-entity-embeddings-of-categorical-variables-in-neural-networks-1d2468311635  

## Implementations
1. https://www.kaggle.com/keremt/pytorch-entity-embeddings
2. https://pypi.org/project/entity-embeddings-categorical/

## NBA API
1. https://pypi.org/project/nba-api/

## UMAP
1. https://umap-learn.readthedocs.io/