## Install the packages

In [1]:
# %pip install nba_api
# %pip install pandas

## Import the libraries

In [1]:
import pandas as pd
import time

from nba_api.stats.static import players
from nba_api.stats.static import teams
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import playernextngames
from nba_api.stats.endpoints import commonplayerinfo

## Basic search parameters

In [2]:
# season = '2020-21'
# season = '2019-20'
season = '2021-22'
season_type = 'Regular Season'

## Get active players IDs

In [3]:
active_players = players.get_active_players()
active_players[0:2]

[{'id': 1630173,
  'full_name': 'Precious Achiuwa',
  'first_name': 'Precious',
  'last_name': 'Achiuwa',
  'is_active': True},
 {'id': 203500,
  'full_name': 'Steven Adams',
  'first_name': 'Steven',
  'last_name': 'Adams',
  'is_active': True}]

In [4]:
players_ids = [str(player['id']) for player in active_players]
players_ids[0:2]

['1630173', '203500']

In [5]:
len(players_ids)

587

## Get teams IDs

In [6]:
nba_teams_full_info = teams.get_teams()
nba_teams_full_info[:2]

[{'id': 1610612737,
  'full_name': 'Atlanta Hawks',
  'abbreviation': 'ATL',
  'nickname': 'Hawks',
  'city': 'Atlanta',
  'state': 'Atlanta',
  'year_founded': 1949},
 {'id': 1610612738,
  'full_name': 'Boston Celtics',
  'abbreviation': 'BOS',
  'nickname': 'Celtics',
  'city': 'Boston',
  'state': 'Massachusetts',
  'year_founded': 1946}]

In [7]:
nba_teams_short_info = [{'id': team['id'], 'full_name': team['full_name'], 'abbreviation': team['abbreviation']} for team in nba_teams_full_info]
nba_teams_short_info[0:2]

[{'id': 1610612737, 'full_name': 'Atlanta Hawks', 'abbreviation': 'ATL'},
 {'id': 1610612738, 'full_name': 'Boston Celtics', 'abbreviation': 'BOS'}]

In [8]:
nba_teams_ids = [team['id'] for team in nba_teams_full_info]
nba_teams_ids[0:2]

[1610612737, 1610612738]

## Get stats by game (player total)

In [9]:
columns = ['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE']

df = pd.DataFrame(columns=columns)

In [10]:
for idx, id in enumerate(players_ids):
    try:
        data = playergamelog.PlayerGameLog(
            player_id=id,
            season=season,
            season_type_all_star=season_type,
            league_id_nullable='00',
            timeout=10)
    
        df = pd.concat([df, data.get_data_frames()[0]])
        
        # Preventing timeout exceptions
        time.sleep(.600)
    except:
        print(f'There was some problem while gathering data.\nPlayer id: {id}\nIteration #{idx}\n')

df.head(3)

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22021,1630173,22101095,"MAR 24, 2022",TOR vs. CLE,W,22,3,10,0.3,...,4,4,1,0,0,0,2,10,8,1
1,22021,1630173,22101076,"MAR 21, 2022",TOR @ CHI,L,28,3,9,0.333,...,8,9,1,0,0,1,1,6,-11,1
2,22021,1630173,22101069,"MAR 20, 2022",TOR @ PHI,W,34,9,15,0.6,...,4,9,1,0,0,0,4,21,7,1


In [11]:
df.shape

(22803, 27)

In [12]:
print(f"The final dataset contais stats about {df['Player_ID'].nunique()} players")

The final dataset contais stats about 535 players


In [13]:
df_2021_22 = df.copy()

This difference in relation to the length of players_ids is acceptable, due various reasons, like the players who dont play any game during the season, or players who got hurt in the pre-season.

## Save the DataFrame

### 2020-21

In [16]:
# Pickle
df.to_pickle('df_2021_22.pkl')

# Unpickle
# df_2020_21 = pd.read_pickle('Datasets/df_all_players_raw.pkl')

The original name of df_2020_21 was df_all_players_raw (and the pickle file was named df_all_player_raw.pkl). This name was replaced because it was confusing.

### 2020-21

In [17]:
import pickle
import pandas as pd

# Renaming the dataframe from 2020-21
# Get the data from the previous file
# df_2020_21 = pd.read_pickle('Datasets/df_all_players_raw.pkl')

# Saving with the new name
# with open('Datasets/df_2020_21.pkl', 'wb') as f:
    # pickle.dump(df_2020_21, f)

# Unpickle
with open('Datasets/df_2020_21.pkl', 'rb') as f:
    df_2020_21 = pickle.load(f)


### 2019-20

In [18]:
import pickle
import pandas as pd

# Pickle
# with open('Datasets/df_2019_20.pkl', 'wb') as f:
    # pickle.dump(df_2019_20, f)

# Unpickle
with open('Datasets/df_2019_20.pkl', 'rb') as f:
    df_2019_20 = pickle.load(f)

### 2018-19

In [19]:
import pickle
import pandas as pd

# Pickle
# with open('Datasets/df_2018_19.pkl', 'wb') as f:
    # pickle.dump(df_2018_19, f)

# Unpickle
with open('Datasets/df_2018_19.pkl', 'rb') as f:
    df_2018_19 = pickle.load(f)

### Join the datasets

In [21]:
print(f'Shape of 2018-19 dataset: {df_2018_19.shape}')
print(f'Shape of 2019-20 dataset: {df_2019_20.shape}')
print(f'Shape of 2020-21 dataset: {df_2020_21.shape}')
print(f'Shape of 2021-22 dataset: {df_2021_22.shape}')

Shape of 2018-19 dataset: (18388, 27)
Shape of 2019-20 dataset: (18416, 27)
Shape of 2020-21 dataset: (21349, 27)
Shape of 2021-22 dataset: (22803, 27)


In [22]:
df = pd.concat([df_2018_19, df_2019_20, df_2020_21, df_2021_22], axis=0, ignore_index=True)
df.head(3)

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22018,203500,21801226,"APR 10, 2019",OKC @ MIL,W,12,1,5,0.2,...,0,2,0,0,1,0,1,4,-1,1
1,22018,203500,21801218,"APR 09, 2019",OKC vs. HOU,W,37,3,11,0.273,...,5,13,1,0,0,0,1,8,2,1
2,22018,203500,21801197,"APR 07, 2019",OKC @ MIN,W,27,4,5,0.8,...,4,6,1,1,0,3,6,8,31,1


In [23]:
df.shape

(80956, 27)

In [24]:
df_2018_19.shape[0] + df_2019_20.shape[0] + df_2020_21.shape[0] + df_2021_22.shape[0]

80956

In [25]:
df.isna().sum().sum()

62

In [26]:
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
df.isna().sum().sum()

0

In [28]:
df.head(2)

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22018,203500,21801226,"APR 10, 2019",OKC @ MIL,W,12,1,5,0.2,...,0,2,0,0,1,0,1,4,-1,1
1,22018,203500,21801218,"APR 09, 2019",OKC vs. HOU,W,37,3,11,0.273,...,5,13,1,0,0,0,1,8,2,1


### Save the 'final' dataset

In [29]:
import pickle
import pandas as pd

# Pickle
with open('Datasets/df_2018_to_2022_raw.pkl', 'wb') as f:
    pickle.dump(df, f)

# Unpickle
# with open('Datasets/df_2018_to_2021_raw.pkl', 'rb') as f:
    # df = pickle.load(f)

## Adjust the columns names

In [30]:
df.columns

Index(['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE'],
      dtype='object')

In [31]:
new_column_names = ['Season ID', 'Player ID', 'Game ID', 'Game Date', 'Matchup', 'Won or Lost',
       'Minutes played', 'Field Goals Made', 'Field Goals Attempted', 'Field Goals %', 
       'Field Goals 3 Points Made', 'Field Goal 3 Points Attempt', 'Field Goal 3 Points %', 
       'Free Throws Made', 'Free Throws Attempt', 'Free Throw %', 'Offensive Rebounds', 'Defensive Rebounds', 'Rebounds', 
       'Assists', 'Steals', 'Blocks', 'Turnovers', 'Personal Fouls',
       'Points', 'Plus Minus', 'Video Available']
    
df.columns = new_column_names
df.columns

Index(['Season ID', 'Player ID', 'Game ID', 'Game Date', 'Matchup',
       'Won or Lost', 'Minutes played', 'Field Goals Made',
       'Field Goals Attempted', 'Field Goals %', 'Field Goals 3 Points Made',
       'Field Goal 3 Points Attempt', 'Field Goal 3 Points %',
       'Free Throws Made', 'Free Throws Attempt', 'Free Throw %',
       'Offensive Rebounds', 'Defensive Rebounds', 'Rebounds', 'Assists',
       'Steals', 'Blocks', 'Turnovers', 'Personal Fouls', 'Points',
       'Plus Minus', 'Video Available'],
      dtype='object')

## Drop the irrelevant features

In [32]:
columns_to_drop = ['Field Goals %', 'Field Goal 3 Points %', 
'Free Throw %', 'Rebounds', 'Plus Minus', 'Video Available']

df.drop(columns=columns_to_drop, inplace=True)
df.columns

Index(['Season ID', 'Player ID', 'Game ID', 'Game Date', 'Matchup',
       'Won or Lost', 'Minutes played', 'Field Goals Made',
       'Field Goals Attempted', 'Field Goals 3 Points Made',
       'Field Goal 3 Points Attempt', 'Free Throws Made',
       'Free Throws Attempt', 'Offensive Rebounds', 'Defensive Rebounds',
       'Assists', 'Steals', 'Blocks', 'Turnovers', 'Personal Fouls', 'Points'],
      dtype='object')

In [33]:
df.head(2)

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won or Lost,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Free Throws Made,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points
0,22018,203500,21801226,"APR 10, 2019",OKC @ MIL,W,12,1,5,0,...,2,4,2,0,0,0,1,0,1,4
1,22018,203500,21801218,"APR 09, 2019",OKC vs. HOU,W,37,3,11,0,...,2,4,8,5,1,0,0,0,1,8


## Checking the missing values

In [34]:
df.isna().sum().sum()

0

## Check for duplicated rows

In [35]:
df.duplicated().sum()

0

## Reset the index

In [36]:
df.reset_index(drop=True, inplace=True)

## Adjust the data types

### Adjust Game Date

In [37]:
df['Game Date'] =  pd.to_datetime(df['Game Date'])
df.head(2)

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won or Lost,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Free Throws Made,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points
0,22018,203500,21801226,2019-04-10,OKC @ MIL,W,12,1,5,0,...,2,4,2,0,0,0,1,0,1,4
1,22018,203500,21801218,2019-04-09,OKC vs. HOU,W,37,3,11,0,...,2,4,8,5,1,0,0,0,1,8


### Adjust Won or Lost

In [38]:
df['Won or Lost'].replace({'W': 1, 'L': 0}, inplace=True)
df.rename(columns={'Won or Lost': 'Won'}, inplace=True)
df.head(2)

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Free Throws Made,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points
0,22018,203500,21801226,2019-04-10,OKC @ MIL,1,12,1,5,0,...,2,4,2,0,0,0,1,0,1,4
1,22018,203500,21801218,2019-04-09,OKC vs. HOU,1,37,3,11,0,...,2,4,8,5,1,0,0,0,1,8


### Adjust the numeric features types

In [39]:
df.dtypes

Season ID                              object
Player ID                              object
Game ID                                object
Game Date                      datetime64[ns]
Matchup                                object
Won                                     int64
Minutes played                         object
Field Goals Made                       object
Field Goals Attempted                  object
Field Goals 3 Points Made              object
Field Goal 3 Points Attempt            object
Free Throws Made                       object
Free Throws Attempt                    object
Offensive Rebounds                     object
Defensive Rebounds                     object
Assists                                object
Steals                                 object
Blocks                                 object
Turnovers                              object
Personal Fouls                         object
Points                                 object
dtype: object

In [40]:
columns_to_int = ['Minutes played', 'Field Goals Made', 'Field Goals Attempted',
       'Field Goals 3 Points Made', 'Field Goal 3 Points Attempt',
       'Free Throws Made', 'Free Throws Attempt', 'Offensive Rebounds',
       'Defensive Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers',
       'Personal Fouls', 'Points']

columns_to_str = ['Season ID', 'Player ID', 'Game ID', 'Matchup']

In [41]:
df[columns_to_int] = df[columns_to_int].apply(pd.to_numeric)
df[columns_to_str] = df[columns_to_str].apply(lambda x: x.astype(str))
df.dtypes

Season ID                              object
Player ID                              object
Game ID                                object
Game Date                      datetime64[ns]
Matchup                                object
Won                                     int64
Minutes played                          int64
Field Goals Made                        int64
Field Goals Attempted                   int64
Field Goals 3 Points Made               int64
Field Goal 3 Points Attempt             int64
Free Throws Made                        int64
Free Throws Attempt                     int64
Offensive Rebounds                      int64
Defensive Rebounds                      int64
Assists                                 int64
Steals                                  int64
Blocks                                  int64
Turnovers                               int64
Personal Fouls                          int64
Points                                  int64
dtype: object

In [43]:
df.head(2)

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Free Throws Made,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points
0,22018,203500,21801226,2019-04-10,OKC @ MIL,1,12,1,5,0,...,2,4,2,0,0,0,1,0,1,4
1,22018,203500,21801218,2019-04-09,OKC vs. HOU,1,37,3,11,0,...,2,4,8,5,1,0,0,0,1,8


## Include aditional features

### Include player name

In [46]:
players_ids[0:3]

['1630173', '203500', '1628389']

In [47]:
active_players[0:2]

[{'id': 1630173,
  'full_name': 'Precious Achiuwa',
  'first_name': 'Precious',
  'last_name': 'Achiuwa',
  'is_active': True},
 {'id': 203500,
  'full_name': 'Steven Adams',
  'first_name': 'Steven',
  'last_name': 'Adams',
  'is_active': True}]

In [48]:
players_ids.index('203500')

1

In [49]:
active_players[players_ids.index('203500')]['full_name']

'Steven Adams'

In [50]:
df['Player Name'] = df['Player ID'].apply(lambda x: str(active_players[players_ids.index(str(x))]['full_name']))
df.head(2)

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Name
0,22018,203500,21801226,2019-04-10,OKC @ MIL,1,12,1,5,0,...,4,2,0,0,0,1,0,1,4,Steven Adams
1,22018,203500,21801218,2019-04-09,OKC vs. HOU,1,37,3,11,0,...,4,8,5,1,0,0,0,1,8,Steven Adams


In [51]:
df.tail(2)

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Name
80892,22021,1627826,22100034,2021-10-23,LAC vs. MEM,0,19,3,7,0,...,0,4,4,3,0,0,0,2,6,Ivica Zubac
80893,22021,1627826,22100016,2021-10-21,LAC @ GSW,0,17,4,7,0,...,10,1,2,2,0,0,0,2,14,Ivica Zubac


### Include the players team name

#### Games

##### 2018-19

In [52]:
season = '2018-19'
game_finder = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable=season_type)
games_2018_19 = game_finder.get_data_frames()[0]
games_2018_19.head(2)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22018,1610612746,LAC,LA Clippers,21801229,2019-04-10,LAC vs. UTA,W,264,143,...,0.767,12,40,52,34,7,5,12,27,6.0
1,22018,1610612759,SAS,San Antonio Spurs,21801227,2019-04-10,SAS vs. DAL,W,242,105,...,0.833,8,45,53,22,6,2,10,14,11.0


##### 2019-20

In [53]:
season = '2019-20'
game_finder = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable=season_type)
games_2019_20 = game_finder.get_data_frames()[0]
games_2019_20.head(2)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22019,1610612748,MIA,Miami Heat,21901316,2020-08-14,MIA @ IND,L,240,92,...,0.741,14,34,48,14,6,5,19,15,-17.0
1,22019,1610612743,DEN,Denver Nuggets,21901318,2020-08-14,DEN @ TOR,L,240,109,...,0.913,9,32,41,27,9,11,15,15,-8.0


##### 2020-21

In [54]:
season = '2020-21'
game_finder = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable=season_type)
games_2020_21 = game_finder.get_data_frames()[0]
games_2020_21.head(2)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22020,1610612750,MIN,Minnesota Timberwolves,22001071,2021-05-16,MIN vs. DAL,W,241,136,...,0.84,11,27,38,35,11,4,9,22,15.0
1,22020,1610612744,GSW,Golden State Warriors,22001070,2021-05-16,GSW vs. MEM,W,240,113,...,0.889,12,34,46,26,9,4,19,19,12.0


##### 2021-22

In [55]:
season = '2021-22'
game_finder = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable=season_type)
games_2021_22 = game_finder.get_data_frames()[0]
games_2021_22.head(2)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22021,1610612765,DET,Detroit Pistons,22101101,2022-03-25,DET vs. WAS,,145,64,...,0.917,7,24,31,12,4,3,11,14,-16.8
1,22021,1612709910,FWN,Fort Wayne Mad Ants,2022100448,2022-03-25,FWN vs. MCC,,179,97,...,0.667,8,28,36,19,8,5,16,13,-2.0


##### Join the datasets

In [56]:
games = pd.concat([games_2018_19, games_2019_20, games_2020_21, games_2021_22], ignore_index=True)
games = games[['SEASON_ID', 'TEAM_ABBREVIATION', 'GAME_ID', 'WL']]
games.head(2)

Unnamed: 0,SEASON_ID,TEAM_ABBREVIATION,GAME_ID,WL
0,22018,LAC,21801229,W
1,22018,SAS,21801227,W


In [57]:
def get_team_name(player_id, game_id):
    if df[(df['Player ID'] == player_id) & (df['Game ID'] == game_id)]['Won'].values[0]:
        return games[(games['GAME_ID'] == game_id) & (games['WL'] == 'W')]['TEAM_ABBREVIATION'].values[0]
    else:
        return games[(games['GAME_ID'] == game_id) & (games['WL'] == 'L')]['TEAM_ABBREVIATION'].values[0]

In [58]:
# Player ID = 1627826: Ivica Zubac
# Expected return: LAC
get_team_name(player_id='1627826', game_id='0022000002')

'LAC'

In [59]:
player_team = []

for idx, row in df.iterrows():
    player_team.append(get_team_name(row['Player ID'], row['Game ID']))

df['Player Team'] = player_team

In [60]:
df[(df['Player ID'] == '1627826') & (df['Won'] == 0)].head(1)

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Name,Player Team
18330,22018,1627826,21801205,2019-04-07,LAC @ GSW,0,21,2,8,0,...,3,5,1,0,0,0,0,7,Ivica Zubac,LAC


In [61]:
df[df['Player Name'] == 'LeBron James'].head(1)

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Name,Player Team
9511,22018,2544,21801135,2019-03-29,LAL vs. CHA,1,32,11,19,4,...,0,3,9,0,0,6,1,27,LeBron James,LAL


### Include Home x Away

In [62]:
df[df['Game ID'] == '0022001072']

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Name,Player Team
37013,22020,1629638,22001072,2021-05-16,NOP vs. LAL,0,37,5,15,2,...,1,3,5,1,0,4,0,14,Nickeil Alexander-Walker,NOP
39035,22020,202339,22001072,2021-05-16,NOP vs. LAL,0,22,5,9,2,...,0,4,2,0,2,0,0,13,Eric Bledsoe,NOP
40412,22020,203484,22001072,2021-05-16,LAL @ NOP,1,27,4,8,2,...,0,1,6,1,0,0,3,10,Kentavious Caldwell-Pope,LAL
40812,22020,1627936,22001072,2021-05-16,LAL @ NOP,1,19,4,8,1,...,1,2,5,0,1,1,2,9,Alex Caruso,LAL
41722,22020,203076,22001072,2021-05-16,LAL @ NOP,1,30,5,12,0,...,0,5,1,1,0,1,0,14,Anthony Davis,LAL
42364,22020,203083,22001072,2021-05-16,LAL @ NOP,1,21,6,11,0,...,5,8,0,2,2,2,5,13,Andre Drummond,LAL
43146,22020,1629117,22001072,2021-05-16,NOP vs. LAL,0,20,1,4,0,...,0,4,0,0,1,0,6,4,Wenyen Gabriel,NOP
45011,22020,1629637,22001072,2021-05-16,NOP vs. LAL,0,20,4,7,0,...,2,5,3,1,0,0,2,10,Jaxson Hayes,NOP
45193,22020,1626195,22001072,2021-05-16,NOP vs. LAL,0,30,8,11,0,...,4,9,1,0,2,2,4,19,Willy Hernangomez,NOP
45828,22020,1629659,22001072,2021-05-16,LAL @ NOP,1,14,2,6,1,...,0,1,1,2,0,1,1,6,Talen Horton-Tucker,LAL


#### The '@' in the 'Matchup' feature indicates that the game was in the second location

In [63]:
df['Home'] = df['Matchup'].apply(lambda x: 0 if x[4] == '@' else 1)

In [64]:
df[df['Game ID'] == '0022001072'][['Game ID', 'Matchup', 'Player Name', 'Player Team', 'Home']]

Unnamed: 0,Game ID,Matchup,Player Name,Player Team,Home
37013,22001072,NOP vs. LAL,Nickeil Alexander-Walker,NOP,1
39035,22001072,NOP vs. LAL,Eric Bledsoe,NOP,1
40412,22001072,LAL @ NOP,Kentavious Caldwell-Pope,LAL,0
40812,22001072,LAL @ NOP,Alex Caruso,LAL,0
41722,22001072,LAL @ NOP,Anthony Davis,LAL,0
42364,22001072,LAL @ NOP,Andre Drummond,LAL,0
43146,22001072,NOP vs. LAL,Wenyen Gabriel,NOP,1
45011,22001072,NOP vs. LAL,Jaxson Hayes,NOP,1
45193,22001072,NOP vs. LAL,Willy Hernangomez,NOP,1
45828,22001072,LAL @ NOP,Talen Horton-Tucker,LAL,0


### Include opponent

In [65]:
df['Opponent'] = df['Matchup'].apply(lambda x: x[-3:])
df.head()

Unnamed: 0,Season ID,Player ID,Game ID,Game Date,Matchup,Won,Minutes played,Field Goals Made,Field Goals Attempted,Field Goals 3 Points Made,...,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Name,Player Team,Home,Opponent
0,22018,203500,21801226,2019-04-10,OKC @ MIL,1,12,1,5,0,...,0,0,1,0,1,4,Steven Adams,OKC,0,MIL
1,22018,203500,21801218,2019-04-09,OKC vs. HOU,1,37,3,11,0,...,1,0,0,0,1,8,Steven Adams,OKC,1,HOU
2,22018,203500,21801197,2019-04-07,OKC @ MIN,1,27,4,5,0,...,1,1,0,3,6,8,Steven Adams,OKC,0,MIN
3,22018,203500,21801186,2019-04-05,OKC vs. DET,1,24,7,9,0,...,0,0,2,2,4,14,Steven Adams,OKC,1,DET
4,22018,203500,21801161,2019-04-02,OKC vs. LAL,1,33,6,11,0,...,3,1,5,2,2,13,Steven Adams,OKC,1,LAL


### Rearrange columns order

In [66]:
df.columns

Index(['Season ID', 'Player ID', 'Game ID', 'Game Date', 'Matchup', 'Won',
       'Minutes played', 'Field Goals Made', 'Field Goals Attempted',
       'Field Goals 3 Points Made', 'Field Goal 3 Points Attempt',
       'Free Throws Made', 'Free Throws Attempt', 'Offensive Rebounds',
       'Defensive Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers',
       'Personal Fouls', 'Points', 'Player Name', 'Player Team', 'Home',
       'Opponent'],
      dtype='object')

In [67]:
new_columns_order = ['Season ID', 'Player ID', 'Player Name',
                    'Game ID', 'Game Date', 'Matchup', 'Player Team', 'Opponent', 'Home', 'Won',
                    'Minutes played', 'Field Goals Made', 'Field Goals Attempted',
                    'Field Goals 3 Points Made', 'Field Goal 3 Points Attempt',
                    'Free Throws Made', 'Free Throws Attempt', 'Offensive Rebounds',
                    'Defensive Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers',
                    'Personal Fouls', 'Points']

In [68]:
df = df[new_columns_order]
df.head(2)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Free Throws Made,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points
0,22018,203500,Steven Adams,21801226,2019-04-10,OKC @ MIL,OKC,MIL,0,1,...,2,4,2,0,0,0,1,0,1,4
1,22018,203500,Steven Adams,21801218,2019-04-09,OKC vs. HOU,OKC,HOU,1,1,...,2,4,8,5,1,0,0,0,1,8


### Include PER (Player Efficiency Ratings)

PER = (FGM x 85.910 + Steals x 53.897 + 3PTM x 51.757 + FTM x 46.845 + Blocks x 39.190 + Offensive_Reb x 39.190 + Assists x 34.677 + Defensive_Reb x 14.707 — Foul x 17.174 — FT_Miss x 20.091 — FG_Miss x 39.190 — TO x 53.897) x (1 / Minutes)

Reference: https://towardsdatascience.com/predicting-the-outcome-of-nba-games-with-machine-learning-a810bb768f20

In [69]:
df.columns

Index(['Season ID', 'Player ID', 'Player Name', 'Game ID', 'Game Date',
       'Matchup', 'Player Team', 'Opponent', 'Home', 'Won', 'Minutes played',
       'Field Goals Made', 'Field Goals Attempted',
       'Field Goals 3 Points Made', 'Field Goal 3 Points Attempt',
       'Free Throws Made', 'Free Throws Attempt', 'Offensive Rebounds',
       'Defensive Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers',
       'Personal Fouls', 'Points'],
      dtype='object')

In [70]:
pers = []
for idx, row in df.iterrows():
    if row['Minutes played'] == 0:
        per = 0
    else:
        per = round((row['Field Goals Made'] * 85.910 + row['Steals'] * 53.897 + row['Field Goals 3 Points Made'] * 51.757 + row['Free Throws Made'] * 46.845 + row['Blocks'] * 39.190 + row['Offensive Rebounds'] * 39.190 + row['Assists'] * 34.677 + row['Defensive Rebounds'] * 14.707 - row['Personal Fouls'] * 17.174 - (row['Free Throws Attempt'] - row['Free Throws Made']) * 20.091 - (row['Free Throws Attempt'] - row['Free Throws Made']) * 39.190 - row['Turnovers'] * 53.897) * (1 / row['Minutes played']), 3)
    pers.append(per)

df['Player Efficiency Ratings'] = pers

In [71]:
df.head(2)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings
0,22018,203500,Steven Adams,21801226,2019-04-10,OKC @ MIL,OKC,MIL,0,1,...,4,2,0,0,0,1,0,1,4,13.453
1,22018,203500,Steven Adams,21801218,2019-04-09,OKC vs. HOU,OKC,HOU,1,1,...,4,8,5,1,0,0,0,1,8,17.227


## Save the DataFrame

In [72]:
import pickle
import pandas as pd

# Pickle
# with open('Datasets/df_2018_to_2022_raw_with_extra_features.pkl', 'wb') as f:
    # pickle.dump(df, f)

# Unpickle
# with open('Datasets/df_2018_to_2021_raw_with_extra_features.pkl', 'rb') as f:
    # df = pickle.load(f)

## Get the next games

In [35]:
playernextngames.PlayerNextNGames(
    number_of_games=3,
    player_id=players_ids[0], 
    season_all='2021-22', 
    season_type_all_star=season_type).get_data_frames()[0]

Unnamed: 0,GAME_ID,GAME_DATE,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_NAME,VISITOR_TEAM_NAME,HOME_TEAM_ABBREVIATION,VISITOR_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_NICKNAME,GAME_TIME,HOME_WL,VISITOR_WL
0,22100790,"FEB 04, 2022",1610612761,1610612737,Toronto,Atlanta,TOR,ATL,Raptors,Hawks,07:30 PM,27-23,25-26
1,22100810,"FEB 07, 2022",1610612766,1610612761,Charlotte,Toronto,CHA,TOR,Hornets,Raptors,07:00 PM,28-24,27-23
2,22100827,"FEB 09, 2022",1610612760,1610612761,Oklahoma City,Toronto,OKC,TOR,Thunder,Raptors,08:00 PM,16-34,27-23


## Function to retrieve the mean value of players stats

In [3]:
def get_mean_player_stats(df, player_id, ref_date, n_days, opponent='Any'):
    """
    df: Pandas DataFrame with the data
    ref_date: String with the reference date (YYYY-MM-DD)
    n_days: Number of records to be used in the mean
    opponent: String of the opponenet name, with 3 leters
    """
    columns_to_get_mean = ['Minutes played',
       'Field Goals Made', 'Field Goals Attempted',
       'Field Goals 3 Points Made', 'Field Goal 3 Points Attempt',
       'Free Throws Made', 'Free Throws Attempt', 'Offensive Rebounds',
       'Defensive Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers',
       'Points', 'Player Efficiency Ratings']
    opponent = opponent.upper()
    ref_date = pd.to_datetime(ref_date)
    
    if opponent == 'ANY':
        result = df[(df['Player ID'] == player_id) & (df['Game Date'] < ref_date)].sort_values('Game Date', ascending=False).loc[:][0:n_days]
        if result.shape[0] == 0:
            print("The search did not retrieve any result")
            return None
    else:
        result = df[(df['Player ID'] == player_id) & (df['Game Date'] < ref_date) & (df['Opponent'] == opponent)].sort_values('Game Date', ascending=False).loc[:][0:n_days]
        if result.shape[0] == 0:
            print("The search did not retrieve any result")
            return None
    return result[columns_to_get_mean].mean()

In [4]:
# Function test
player_id = '2544' # LeBron James
date = '2021-12-28'
opponent = 'GSW'
n_days = 4

get_mean_player_stats(df, player_id, date, n_days, opponent)

Minutes played                 30.666667
Field Goals Made                7.000000
Field Goals Attempted          13.666667
Field Goals 3 Points Made       2.333333
Field Goal 3 Points Attempt     5.333333
Free Throws Made                3.666667
Free Throws Attempt             6.000000
Offensive Rebounds              1.000000
Defensive Rebounds              6.000000
Assists                         6.666667
Steals                          1.000000
Blocks                          0.666667
Turnovers                       4.333333
Points                         20.000000
Player Efficiency Ratings      32.185667
dtype: float64

## Get Player Position

In [120]:
test = commonplayerinfo.CommonPlayerInfo(player_id='2544').get_data_frames()[0]
test.head()

Unnamed: 0,PERSON_ID,FIRST_NAME,LAST_NAME,DISPLAY_FIRST_LAST,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FI_LAST,PLAYER_SLUG,BIRTHDATE,SCHOOL,COUNTRY,...,PLAYERCODE,FROM_YEAR,TO_YEAR,DLEAGUE_FLAG,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,GREATEST_75_FLAG
0,2544,LeBron,James,LeBron James,"James, LeBron",L. James,lebron-james,1984-12-30T00:00:00,St. Vincent-St. Mary HS (OH),USA,...,lebron_james,2003,2021,N,Y,Y,2003,1,1,Y


In [121]:
test.columns

Index(['PERSON_ID', 'FIRST_NAME', 'LAST_NAME', 'DISPLAY_FIRST_LAST',
       'DISPLAY_LAST_COMMA_FIRST', 'DISPLAY_FI_LAST', 'PLAYER_SLUG',
       'BIRTHDATE', 'SCHOOL', 'COUNTRY', 'LAST_AFFILIATION', 'HEIGHT',
       'WEIGHT', 'SEASON_EXP', 'JERSEY', 'POSITION', 'ROSTERSTATUS',
       'GAMES_PLAYED_CURRENT_SEASON_FLAG', 'TEAM_ID', 'TEAM_NAME',
       'TEAM_ABBREVIATION', 'TEAM_CODE', 'TEAM_CITY', 'PLAYERCODE',
       'FROM_YEAR', 'TO_YEAR', 'DLEAGUE_FLAG', 'NBA_FLAG', 'GAMES_PLAYED_FLAG',
       'DRAFT_YEAR', 'DRAFT_ROUND', 'DRAFT_NUMBER', 'GREATEST_75_FLAG'],
      dtype='object')

In [122]:
test['POSITION'].values[0]

'Forward'

In [123]:
df.columns

Index(['Season ID', 'Player ID', 'Player Name', 'Game ID', 'Game Date',
       'Matchup', 'Player Team', 'Opponent', 'Home', 'Won', 'Minutes played',
       'Field Goals Made', 'Field Goals Attempted',
       'Field Goals 3 Points Made', 'Field Goal 3 Points Attempt',
       'Free Throws Made', 'Free Throws Attempt', 'Offensive Rebounds',
       'Defensive Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers',
       'Personal Fouls', 'Points', 'Player Efficiency Ratings'],
      dtype='object')

In [70]:
def get_player_position(id_list) -> dict:
    player_id_position = {}
    for idx, id in enumerate(id_list):
        info = commonplayerinfo.CommonPlayerInfo(player_id=id).get_data_frames()[0]
        player_id_position[id] = info['POSITION'].values[0]
        # Preventing timeout exceptions
        time.sleep(.600)
    return player_id_position

In [None]:
player_positions = get_player_position(players_ids)

In [52]:
# Player ID: '2544'
# Player Name: LeBron James
# Expected Value: Forward

player_positions['2544']

'Forward'

In [56]:
# Count players with missing possition
missing = []
for key, value in player_positions.items():
    if value == '':
        missing.append(key)

len(missing)

17

In [67]:
for player in active_players:
    if str(player['id']) in missing:
        print(player['full_name'])

Derrick Alston Jr.
Mitch Ballock
D.J. Carton
Matt Coleman III
Johnny Hamilton
Daulton Hommes
Feron Hunt
AJ Lawson
Matt Lewis
Isaiah Miller
Matt Ryan
Aamir Simms
Dru Smith
DJ Steward
DJ Stewart
MaCio Teague
Ethan Thompson


Only irrelevant players. Possible solutions:
- Remove them from the dataset
- Define a "default" position for them
- Insert manually

To be decided.

In [68]:
import pickle

# Pickle
# with open('player_positions.pkl', 'wb') as f:
#     pickle.dump(player_positions, f)

# Unpickle
# with open('player_positions.pkl', 'rb') as f:
    # player_positions = pickle.load(f)


## Get players' salary and position from DraftKings

Reference: https://swishanalytics.com/optimus/nba/daily-fantasy-salary-changes

In [None]:
# %pip install requests beautifulsoup4
# %pip install lxml

In [124]:
from bs4 import BeautifulSoup
import requests

In [125]:
url = "https://swishanalytics.com/optimus/nba/daily-fantasy-salary-changes"

html_content = requests.get(url).text

# Parse the html content
soup = BeautifulSoup(html_content, "lxml")
print(soup.prettify()[:200]) # print the parsed data of html

<!DOCTYPE html>
<html class="no-js" lang="en">
 <head>
  <style>
  </style>
  <script type="text/javascript">
   (function(e,b){if(!b.__SV){var a,f,i,g;window.mixpanel=b;b._i=[];b.init=function(a,e,d)


In [126]:
scripts = soup.findAll('script')

In [127]:
scripts[21]

<script>
      function Model(){
        this.players_dk = [{"player_id":"1121277","player_name":"Luka Doncic","nickname":"Mavericks","pos_main":"PG","fantasy_pts":"62.69","avg_pts":"56.41","fpts_diff":"+6.28","date":"2022-02-28","salary":"21,900","salary_diff":"21899","salary_diff_percentage":"2189900.0","salary_change_html":"<td class=\"width-15 green\" id=\"salary-col\">+$21899 (2189900.0%)<\/td>","salary_change":"2189900.0"},{"player_id":"214152","player_name":"LeBron James","nickname":"Lakers","pos_main":"SF","fantasy_pts":"51.93","avg_pts":"54.07","fpts_diff":"-2.14","date":"2022-02-28","salary":"20,700","salary_diff":"20695","salary_diff_percentage":"413900.0","salary_change_html":"<td class=\"width-15 green\" id=\"salary-col\">+$20695 (413900.0%)<\/td>","salary_change":"413900.0"},{"player_id":"338365","player_name":"Stephen Curry","nickname":"Warriors","pos_main":"PG","fantasy_pts":"48.84","avg_pts":"46.64","fpts_diff":"+2.20","date":"2022-02-28","salary":"10,800","salary_diff

In [128]:
table = scripts[21].contents[0]

In [129]:
table = table.split('{')
table

['\n      function Model()',
 '\n        this.players_dk = [',
 '"player_id":"1121277","player_name":"Luka Doncic","nickname":"Mavericks","pos_main":"PG","fantasy_pts":"62.69","avg_pts":"56.41","fpts_diff":"+6.28","date":"2022-02-28","salary":"21,900","salary_diff":"21899","salary_diff_percentage":"2189900.0","salary_change_html":"<td class=\\"width-15 green\\" id=\\"salary-col\\">+$21899 (2189900.0%)<\\/td>","salary_change":"2189900.0"},',
 '"player_id":"214152","player_name":"LeBron James","nickname":"Lakers","pos_main":"SF","fantasy_pts":"51.93","avg_pts":"54.07","fpts_diff":"-2.14","date":"2022-02-28","salary":"20,700","salary_diff":"20695","salary_diff_percentage":"413900.0","salary_change_html":"<td class=\\"width-15 green\\" id=\\"salary-col\\">+$20695 (413900.0%)<\\/td>","salary_change":"413900.0"},',
 '"player_id":"338365","player_name":"Stephen Curry","nickname":"Warriors","pos_main":"PG","fantasy_pts":"48.84","avg_pts":"46.64","fpts_diff":"+2.20","date":"2022-02-28","salary"

In [130]:
table[-36]

'"player_id":"1271713","player_name":"Isaiah Todd","nickname":"Wizards","pos_main":"PF","fantasy_pts":"1.21","avg_pts":"4.35","fpts_diff":"-3.14","date":"2022-02-26","salary":"3,500","salary_diff":"0","salary_diff_percentage":"0.0","salary_change_html":"<td class=\\"width-15 green\\" id=\\"salary-col\\">+$0 (0.0%)<\\/td>","salary_change":"0.0"}];\n        this.players_ya = [];\n        this.current_site = \'dk\';\n      }\n      Model.prototype = '

In [131]:
table[-35]

'\n        determineSortFunction: function(type)'

In [132]:
table[2]

'"player_id":"1121277","player_name":"Luka Doncic","nickname":"Mavericks","pos_main":"PG","fantasy_pts":"62.69","avg_pts":"56.41","fpts_diff":"+6.28","date":"2022-02-28","salary":"21,900","salary_diff":"21899","salary_diff_percentage":"2189900.0","salary_change_html":"<td class=\\"width-15 green\\" id=\\"salary-col\\">+$21899 (2189900.0%)<\\/td>","salary_change":"2189900.0"},'

In [133]:
table[1]

'\n        this.players_dk = ['

In [134]:
table = table[2:-35]

In [135]:
table[0]

'"player_id":"1121277","player_name":"Luka Doncic","nickname":"Mavericks","pos_main":"PG","fantasy_pts":"62.69","avg_pts":"56.41","fpts_diff":"+6.28","date":"2022-02-28","salary":"21,900","salary_diff":"21899","salary_diff_percentage":"2189900.0","salary_change_html":"<td class=\\"width-15 green\\" id=\\"salary-col\\">+$21899 (2189900.0%)<\\/td>","salary_change":"2189900.0"},'

In [136]:
table[-1]

'"player_id":"1271713","player_name":"Isaiah Todd","nickname":"Wizards","pos_main":"PF","fantasy_pts":"1.21","avg_pts":"4.35","fpts_diff":"-3.14","date":"2022-02-26","salary":"3,500","salary_diff":"0","salary_diff_percentage":"0.0","salary_change_html":"<td class=\\"width-15 green\\" id=\\"salary-col\\">+$0 (0.0%)<\\/td>","salary_change":"0.0"}];\n        this.players_ya = [];\n        this.current_site = \'dk\';\n      }\n      Model.prototype = '

In [137]:
import re
def get_name_position_and_salary(player_info: list) -> dict:
    def convert_salary_float(salary):
        if "," in salary:
            salary = salary.replace(',', '')
        return float(salary)

    try:
        player_name = re.search('"player_name":"[\w].+[\w][",]', player_info)[0].split(':')[1].split(',')[0]
        position = re.search('"pos_main":"[\w]*"', player_info)[0].split(':')[1][1:-1]
        salary = convert_salary_float(re.search('"salary":"[\w]*[,]*[\w]*"', player_info)[0].split(':')[1][1:-1])
    except:
        print(f"It was not possible to get player's info. \nInput: \n{player_info}")

    return({'player_name': player_name, 'position': position, 'salary': salary})
    
get_name_position_and_salary(table[-1])


{'player_name': '"Isaiah Todd"', 'position': 'PF', 'salary': 3500.0}

In [138]:
position_and_salary = [get_name_position_and_salary(entry) for entry in table]
position_and_salary

[{'player_name': '"Luka Doncic"', 'position': 'PG', 'salary': 21900.0},
 {'player_name': '"LeBron James"', 'position': 'SF', 'salary': 20700.0},
 {'player_name': '"Stephen Curry"', 'position': 'PG', 'salary': 10800.0},
 {'player_name': '"Russell Westbrook"', 'position': 'PG', 'salary': 15600.0},
 {'player_name': '"Karl-Anthony Towns"', 'position': 'C', 'salary': 10100.0},
 {'player_name': '"Pascal Siakam"', 'position': 'PF', 'salary': 8900.0},
 {'player_name': '"Fred VanVleet"', 'position': 'PG', 'salary': 8500.0},
 {'player_name': '"Andre Drummond"', 'position': 'C', 'salary': 6900.0},
 {'player_name': '"Spencer Dinwiddie"', 'position': 'PG', 'salary': 11100.0},
 {'player_name': '"D\'Angelo Russell"', 'position': 'PG', 'salary': 7600.0},
 {'player_name': '"Christian Wood"', 'position': 'C', 'salary': 8100.0},
 {'player_name': '"Cade Cunningham"', 'position': 'SG', 'salary': 14400.0},
 {'player_name': '"Jerami Grant"', 'position': 'PF', 'salary': 12300.0},
 {'player_name': '"Anthony Ed

In [139]:
import pickle

# Pickle
# with open('positions_and_salary.pkl', 'wb') as f:
#     pickle.dump(position_and_salary, f)

# Unpickle
# with open('positions_and_salary.pkl', 'rb') as f:
    # position_and_salary = pickle.load(f)
