In [16]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.endpoints import playercareerstats

# Extracting the Data

In [17]:
# creating a function to obtain a certain player's game log given their name and the desired season
def get_player_log(player_name, season):

    nba_players = players.get_active_players()
    desired_player = [player for player in nba_players if player['full_name'] == player_name]

    gamelog = playergamelog.PlayerGameLog(player_id=desired_player[0]['id'], season=season, season_type_all_star='Regular Season')
    return gamelog.get_data_frames()[0]

In [18]:
df = get_player_log('Luka Doncic', '2022-23')
df.to_csv('Luka_Doncic_test.csv')

# Wrangling the Data

In [19]:
pd.set_option('display.max_columns', None)
ld = pd.read_csv('Luka_Doncic_test.csv')
ld.head()

Unnamed: 0.1,Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,0,22022,1629029,22201139,"MAR 29, 2023",DAL @ PHI,L,37,9,20,0.45,4,9,0.444,2,2,1.0,1,9,10,8,0,1,4,3,24,-7,1
1,1,22022,1629029,22201123,"MAR 27, 2023",DAL @ IND,W,28,8,17,0.471,4,10,0.4,5,5,1.0,2,5,7,6,1,0,0,0,25,9,1
2,2,22022,1629029,22201113,"MAR 26, 2023",DAL @ CHA,L,40,12,29,0.414,7,16,0.438,9,10,0.9,1,11,12,8,2,2,4,2,40,-11,1
3,3,22022,1629029,22201101,"MAR 24, 2023",DAL vs. CHA,L,37,12,23,0.522,2,9,0.222,8,10,0.8,0,10,10,8,0,1,5,0,34,-1,1
4,4,22022,1629029,22201086,"MAR 22, 2023",DAL vs. GSW,L,41,11,27,0.407,2,10,0.2,6,10,0.6,1,6,7,17,1,0,5,3,30,2,1


In [20]:
ld.columns

Index(['Unnamed: 0', 'SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE',
       'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE'],
      dtype='object')

In [21]:
# dropping unnecessary columns
ld.drop(labels=['Unnamed: 0', 'SEASON_ID', 'Player_ID', 'Game_ID', 'VIDEO_AVAILABLE'], axis=1, inplace=True)

In [22]:
ld.head()

Unnamed: 0,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS
0,"MAR 29, 2023",DAL @ PHI,L,37,9,20,0.45,4,9,0.444,2,2,1.0,1,9,10,8,0,1,4,3,24,-7
1,"MAR 27, 2023",DAL @ IND,W,28,8,17,0.471,4,10,0.4,5,5,1.0,2,5,7,6,1,0,0,0,25,9
2,"MAR 26, 2023",DAL @ CHA,L,40,12,29,0.414,7,16,0.438,9,10,0.9,1,11,12,8,2,2,4,2,40,-11
3,"MAR 24, 2023",DAL vs. CHA,L,37,12,23,0.522,2,9,0.222,8,10,0.8,0,10,10,8,0,1,5,0,34,-1
4,"MAR 22, 2023",DAL vs. GSW,L,41,11,27,0.407,2,10,0.2,6,10,0.6,1,6,7,17,1,0,5,3,30,2


In [23]:
# I want to change the datatype of the column 'GAME_DATE' to a datetime
ld.dtypes

GAME_DATE      object
MATCHUP        object
WL             object
MIN             int64
FGM             int64
FGA             int64
FG_PCT        float64
FG3M            int64
FG3A            int64
FG3_PCT       float64
FTM             int64
FTA             int64
FT_PCT        float64
OREB            int64
DREB            int64
REB             int64
AST             int64
STL             int64
BLK             int64
TOV             int64
PF              int64
PTS             int64
PLUS_MINUS      int64
dtype: object

In [24]:
ld['GAME_DATE'] = ld['GAME_DATE'].astype('datetime64[ns]')

In [25]:
# I want to change how the information is displayed in the 'MATCHUP column
# Goal: make 2 columns; Opponent and Game Location
# This will help display the matchup information in a much cleaner/clearer way
ld.head()

Unnamed: 0,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS
0,2023-03-29,DAL @ PHI,L,37,9,20,0.45,4,9,0.444,2,2,1.0,1,9,10,8,0,1,4,3,24,-7
1,2023-03-27,DAL @ IND,W,28,8,17,0.471,4,10,0.4,5,5,1.0,2,5,7,6,1,0,0,0,25,9
2,2023-03-26,DAL @ CHA,L,40,12,29,0.414,7,16,0.438,9,10,0.9,1,11,12,8,2,2,4,2,40,-11
3,2023-03-24,DAL vs. CHA,L,37,12,23,0.522,2,9,0.222,8,10,0.8,0,10,10,8,0,1,5,0,34,-1
4,2023-03-22,DAL vs. GSW,L,41,11,27,0.407,2,10,0.2,6,10,0.6,1,6,7,17,1,0,5,3,30,2


In [26]:
# creating a function to determine whether a game is home or away
def game_location (matchup):
    if '@' in matchup:
        return 'Away'
    else: 
        return 'Home'

In [27]:
# creating a new column for the game location
ld['Game Location'] = ld['MATCHUP'].apply(lambda x: game_location(x))

In [28]:
# creating a new column for the opponent
ld['Opponent'] = ld['MATCHUP'].apply(lambda x: x.split()[2])

In [29]:
# I want to reorder the columns
cols = ['GAME_DATE', 'Game Location', 'Opponent', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS']

ld = ld[cols]
ld = ld.sort_values(by='GAME_DATE', ascending=True)

# Exporting the Data

In [30]:
# exporting cleaned dataframe as an excel workbook
ld.to_excel('Luka_Doncic_Stats.xlsx')