## Import the libraries

In [1]:
import pickle
import pandas as pd

## Import the final dataset

In [36]:
import pickle

# Pickle
# with open('positions_and_salary.pkl', 'wb') as f:
#     pickle.dump(position_and_salary, f)

# Unpickle
with open('final_df.pkl', 'rb') as f:
    df = pickle.load(f)


In [5]:
df.head(2)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings
0,22020,1630173,Precious Achiuwa,22001069,2021-05-16,MIA @ DET,MIA,DET,0,1,...,7,3,7,0,2,1,2,3,23,23.112
1,22020,1630173,Precious Achiuwa,22001062,2021-05-15,MIA @ MIL,MIA,MIL,0,0,...,0,0,0,1,0,0,0,0,2,60.293


## Include double-double and triple-double features

DraftKings criteria:
- Double-double (max 1 per player): 
    - Points, Rebounds, Assists, Blocks, Steals
- Triple-double (max 1 per player): 
    - Points, Rebounds, Assists, Blocks, Steals

In [7]:
def get_double_and_triple_doubles(player_info):
    points = player_info['Points']
    rebounds = player_info['Offensive Rebounds'] + player_info['Defensive Rebounds']
    assists = player_info['Assists']
    blocks = player_info['Blocks']
    steals = player_info['Steals']

    stats = [points, rebounds, assists, blocks, steals]

    cnt = 0
    for stat in stats:
        if int(stat) >= 10:
            cnt += 1

    if cnt >= 3:
        return 'Triple-double'
    elif cnt == 2:
        return 'Double-double'
    else: 
        return None

### Testing the function

In [32]:
# This player, in this game, should get a double-double (assists and points)
df[(df['Assists'] > 10) & (df['Points'] > 10)].head(1)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings
124,22020,1628389,Bam Adebayo,22000986,2021-05-04,MIA vs. DAL,MIA,DAL,1,0,...,6,3,6,11,2,0,3,1,11,27.417


In [20]:
test = df[(df['Assists'] > 10) & (df['Points'] > 10)].head(1)
get_double_and_triple_doubles(test)

'Double-double'

In [14]:
# This player, in this game, should get a triple-double (assists, points and rebounds)
df[(df['Assists'] > 10) & (df['Points'] > 10) & (df['Offensive Rebounds'] + df['Defensive Rebounds']> 10)].head(1)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Free Throws Attempt,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings
484,22020,203507,Giannis Antetokounmpo,22000589,2021-03-13,MIL @ WAS,MIL,WAS,0,1,...,8,2,9,11,1,1,4,3,33,50.59


In [21]:
test = df[(df['Assists'] > 10) & (df['Points'] > 10) & (df['Offensive Rebounds'] + df['Defensive Rebounds']> 10)].head(1)
get_double_and_triple_doubles(test)

'Triple-double'

In [22]:
# This player, in this game, should get None
df.iloc[2]

Season ID                                    22020
Player ID                                  1630173
Player Name                       Precious Achiuwa
Game ID                                 0022001050
Game Date                      2021-05-13 00:00:00
Matchup                                MIA vs. PHI
Player Team                                    MIA
Opponent                                       PHI
Home                                             1
Won                                              1
Minutes played                                   2
Field Goals Made                                 0
Field Goals Attempted                            0
Field Goals 3 Points Made                        0
Field Goal 3 Points Attempt                      0
Free Throws Made                                 0
Free Throws Attempt                              0
Offensive Rebounds                               0
Defensive Rebounds                               0
Assists                        

In [24]:
test = df.iloc[2]
print(get_double_and_triple_doubles(test))

None


### Apply the function

In [37]:
df = df.reset_index(drop=True)

results = []

for index, row in df.iterrows():
    results.append(get_double_and_triple_doubles(row))

df['Double/Triple double'] = results

### Check the results

In [39]:
# This player, in this game, should get a double-double (assists and points)
df[(df['Assists'] > 10) & (df['Points'] > 10)].head(1)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings,Double/Triple double
124,22020,1628389,Bam Adebayo,22000986,2021-05-04,MIA vs. DAL,MIA,DAL,1,0,...,3,6,11,2,0,3,1,11,27.417,Double-double


In [40]:
# This player, in this game, should get a triple-double (assists, points and rebounds)
df[(df['Assists'] > 10) & (df['Points'] > 10) & (df['Offensive Rebounds'] + df['Defensive Rebounds']> 10)].head(1)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Offensive Rebounds,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings,Double/Triple double
484,22020,203507,Giannis Antetokounmpo,22000589,2021-03-13,MIL @ WAS,MIL,WAS,0,1,...,2,9,11,1,1,4,3,33,50.59,Triple-double


In [41]:
# This player, in this game, should get None
df.iloc[2]

Season ID                                    22020
Player ID                                  1630173
Player Name                       Precious Achiuwa
Game ID                                 0022001050
Game Date                      2021-05-13 00:00:00
Matchup                                MIA vs. PHI
Player Team                                    MIA
Opponent                                       PHI
Home                                             1
Won                                              1
Minutes played                                   2
Field Goals Made                                 0
Field Goals Attempted                            0
Field Goals 3 Points Made                        0
Field Goal 3 Points Attempt                      0
Free Throws Made                                 0
Free Throws Attempt                              0
Offensive Rebounds                               0
Defensive Rebounds                               0
Assists                        

### Create the double-double and triple-double features

In [46]:
df['Double-double'] = df['Double/Triple double'].apply(lambda x: 1 if x == 'Double-double' else 0)
df['Triple-double'] = df['Double/Triple double'].apply(lambda x: 1 if x == 'Triple-double' else 0)

df.head(3)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings,Double/Triple double,Double-double,Triple-double
0,22020,1630173,Precious Achiuwa,22001069,2021-05-16,MIA @ DET,MIA,DET,0,1,...,0,2,1,2,3,23,23.112,Double-double,1,0
1,22020,1630173,Precious Achiuwa,22001062,2021-05-15,MIA @ MIL,MIA,MIL,0,0,...,1,0,0,0,0,2,60.293,,0,0
2,22020,1630173,Precious Achiuwa,22001050,2021-05-13,MIA vs. PHI,MIA,PHI,1,1,...,0,0,0,0,0,0,0.0,,0,0


In [48]:
# This player, in this game, should get a double-double (assists and points)
df[(df['Assists'] > 10) & (df['Points'] > 10)].head(1)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings,Double/Triple double,Double-double,Triple-double
124,22020,1628389,Bam Adebayo,22000986,2021-05-04,MIA vs. DAL,MIA,DAL,1,0,...,11,2,0,3,1,11,27.417,Double-double,1,0


In [49]:
# This player, in this game, should get a triple-double (assists, points and rebounds)
df[(df['Assists'] > 10) & (df['Points'] > 10) & (df['Offensive Rebounds'] + df['Defensive Rebounds']> 10)].head(1)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings,Double/Triple double,Double-double,Triple-double
484,22020,203507,Giannis Antetokounmpo,22000589,2021-03-13,MIL @ WAS,MIL,WAS,0,1,...,11,1,1,4,3,33,50.59,Triple-double,0,1


In [50]:
# This player, in this game, should get None
df.iloc[2]

Season ID                                    22020
Player ID                                  1630173
Player Name                       Precious Achiuwa
Game ID                                 0022001050
Game Date                      2021-05-13 00:00:00
Matchup                                MIA vs. PHI
Player Team                                    MIA
Opponent                                       PHI
Home                                             1
Won                                              1
Minutes played                                   2
Field Goals Made                                 0
Field Goals Attempted                            0
Field Goals 3 Points Made                        0
Field Goal 3 Points Attempt                      0
Free Throws Made                                 0
Free Throws Attempt                              0
Offensive Rebounds                               0
Defensive Rebounds                               0
Assists                        

In [53]:
df.drop(['Double/Triple double'], axis=1, inplace=True)
df.head(2)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Defensive Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings,Double-double,Triple-double
0,22020,1630173,Precious Achiuwa,22001069,2021-05-16,MIA @ DET,MIA,DET,0,1,...,7,0,2,1,2,3,23,23.112,1,0
1,22020,1630173,Precious Achiuwa,22001062,2021-05-15,MIA @ MIL,MIA,MIL,0,0,...,0,1,0,0,0,0,2,60.293,0,0


## Include DraftKings score (model's output)

In [57]:
def get_draftkings_score(player_info):
    points = player_info['Points']
    three_points = player_info['Field Goals 3 Points Made']
    rebounds = player_info['Offensive Rebounds'] + player_info['Defensive Rebounds']
    assists = player_info['Assists']
    steals = player_info['Steals']
    blocks = player_info['Blocks']
    turnovers = player_info['Turnovers']
    double_double = player_info['Double-double']
    triple_double = player_info['Triple-double']

    return points*1 + three_points*0.5 + rebounds*1.25 + assists*1.5 * steals*2 + blocks*2 + turnovers*(-0.5) + double_double*1.5 + triple_double*3

In [59]:
df = df.reset_index(drop=True)

results = []

for index, row in df.iterrows():
    results.append(get_draftkings_score(row))

df['DraftKings score'] = results
df.head(3)

Unnamed: 0,Season ID,Player ID,Player Name,Game ID,Game Date,Matchup,Player Team,Opponent,Home,Won,...,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Player Efficiency Ratings,Double-double,Triple-double,DraftKings score
0,22020,1630173,Precious Achiuwa,22001069,2021-05-16,MIA @ DET,MIA,DET,0,1,...,0,2,1,2,3,23,23.112,1,0,38.0
1,22020,1630173,Precious Achiuwa,22001062,2021-05-15,MIA @ MIL,MIA,MIL,0,0,...,1,0,0,0,0,2,60.293,0,0,2.0
2,22020,1630173,Precious Achiuwa,22001050,2021-05-13,MIA vs. PHI,MIA,PHI,1,1,...,0,0,0,0,0,0,0.0,0,0,0.0


## Prepare the 'mean' dataset

In [61]:
df.columns

Index(['Season ID', 'Player ID', 'Player Name', 'Game ID', 'Game Date',
       'Matchup', 'Player Team', 'Opponent', 'Home', 'Won', 'Minutes played',
       'Field Goals Made', 'Field Goals Attempted',
       'Field Goals 3 Points Made', 'Field Goal 3 Points Attempt',
       'Free Throws Made', 'Free Throws Attempt', 'Offensive Rebounds',
       'Defensive Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers',
       'Personal Fouls', 'Points', 'Player Efficiency Ratings',
       'Double-double', 'Triple-double', 'DraftKings score'],
      dtype='object')

In [60]:
def get_mean_player_stats(df, player_id, ref_date, n_days, opponent='Any'):
    """
    df: Pandas DataFrame with the data
    ref_date: String with the reference date (YYYY-MM-DD)
    n_days: Number of records to be used in the mean
    opponent: String of the opponenet name, with 3 leters
    """
    columns_to_get_mean = ['Minutes played',
       'Field Goals Made', 'Field Goals Attempted',
       'Field Goals 3 Points Made', 'Field Goal 3 Points Attempt',
       'Free Throws Made', 'Free Throws Attempt', 'Offensive Rebounds',
       'Defensive Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers',
       'Personal Fouls', 'Points', 'Player Efficiency Ratings',
       'Double-double', 'Triple-double']
    
    opponent = opponent.upper()
    ref_date = pd.to_datetime(ref_date)
    
    if opponent == 'ANY':
        result = df[(df['Player ID'] == player_id) & (df['Game Date'] < ref_date)].sort_values('Game Date', ascending=False).loc[:][0:n_days]
        if result.shape[0] == 0:
            print("The search did not retrieve any result")
            return None
    else:
        result = df[(df['Player ID'] == player_id) & (df['Game Date'] < ref_date) & (df['Opponent'] == opponent)].sort_values('Game Date', ascending=False).loc[:][0:n_days]
        if result.shape[0] == 0:
            print("The search did not retrieve any result")
            return None
    return result[columns_to_get_mean].mean()

## Prepare PCA dataset

### PCA from single values

### PCA from mean values