In [249]:
# Importing the necessary packages
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

In [250]:
# Loading in the data set
home =pd.read_csv('csv_files/home_teams.csv', header=0)
away =pd.read_csv('csv_files/away_teams.csv', header=0)

In [251]:
# Dropping the unnamed column 
home = home.drop(['Unnamed: 0'], axis=1)

In [252]:
home.columns

Index(['TEAM', 'DATE', 'MATCHUP', 'W/L', 'MIN', 'PTS', 'FGM', 'FGA', 'FGPerc',
       '3PM', '3PA', 'ThreePPerc', 'FTM', 'FTA', 'FTPerc', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', '+/', 'EFGPerc', 'FTR',
       'TOVPerc'],
      dtype='object')

In [253]:
# Renaming the columns in to make them all home orientated 
home.rename(columns={'TEAM':'H_TEAM','PTS':'H_PTS','FGM':'H_FGM','FGA':'H_FGA', 'FGPerc':'H_FGPerc', 
                    '3PM':'H_3PM', '3PA':'H_3PA', 'ThreePPerc':'H_ThreePPerc', 'FTM':'H_FTM', 'FTA':'H_FTA', 
                    'FTPerc':'H_FTPerc', 'OREB':'H_OREB', 'DREB':'H_DREB', 'REB':'H_REB', 'AST':'H_AST',
                    'STL':'H_STL', 'BLK':'H_BLK', 'TOV':'H_TOV', 'PF':'H_PF', '+/':'H_+/', 'EFGPerc':'H_EFGPerc',
                    'FTR':'H_FTR', 'TOVPerc':'H_TOVPerc'}, inplace=True)

In [254]:
away = away.drop(['Unnamed: 0'], axis=1)

In [255]:
# Renaming the columns in to make them all away orientated 
away.rename(columns={'TEAM':'A_TEAM','W/L':'A_W/L','MIN':'A_MIN','PTS':'A_PTS','FGM':'A_FGM','FGA':'A_FGA', 'FGPerc':'A_FGPerc', 
                    '3PM':'A_3PM', '3PA':'A_3PA', 'ThreePPerc':'A_ThreePPerc', 'FTM':'A_FTM', 'FTA':'A_FTA', 
                    'FTPerc':'A_FTPerc', 'OREB':'A_OREB', 'DREB':'A_DREB', 'REB':'A_REB', 'AST':'A_AST',
                    'STL':'A_STL', 'BLK':'A_BLK', 'TOV':'A_TOV', 'PF':'A_PF', '+/':'A_+/', 'EFGPerc':'A_EFGPerc',
                    'FTR':'A_FTR', 'TOVPerc':'A_TOVPerc'}, inplace=True)

In [256]:
away.columns

Index(['A_TEAM', 'DATE', 'MATCHUP', 'A_W/L', 'A_MIN', 'A_PTS', 'A_FGM',
       'A_FGA', 'A_FGPerc', 'A_3PM', 'A_3PA', 'A_ThreePPerc', 'A_FTM', 'A_FTA',
       'A_FTPerc', 'A_OREB', 'A_DREB', 'A_REB', 'A_AST', 'A_STL', 'A_BLK',
       'A_TOV', 'A_PF', 'A_+/', 'A_EFGPerc', 'A_FTR', 'A_TOVPerc'],
      dtype='object')

In [257]:
home.count()

H_TEAM          11348
DATE            11348
MATCHUP         11348
W/L             11348
MIN             11348
H_PTS           11348
H_FGM           11348
H_FGA           11348
H_FGPerc        11348
H_3PM           11348
H_3PA           11348
H_ThreePPerc    11348
H_FTM           11348
H_FTA           11348
H_FTPerc        11348
H_OREB          11348
H_DREB          11348
H_REB           11348
H_AST           11348
H_STL           11348
H_BLK           11348
H_TOV           11348
H_PF            11348
H_+/            11348
H_EFGPerc       11348
H_FTR           11348
H_TOVPerc       11348
dtype: int64

In [258]:
# Creating an idential Away Team Variable to join the datasets on 
home['Away_Team'] = home['MATCHUP'].str[-3:]

In [259]:
# Creating an idential Away Team Variable to join the datasets on 
away['Away_Team'] = away['MATCHUP'].str[:3]

In [260]:
away.count()

A_TEAM          11347
DATE            11347
MATCHUP         11347
A_W/L           11347
A_MIN           11347
A_PTS           11347
A_FGM           11347
A_FGA           11347
A_FGPerc        11347
A_3PM           11347
A_3PA           11347
A_ThreePPerc    11347
A_FTM           11347
A_FTA           11347
A_FTPerc        11347
A_OREB          11347
A_DREB          11347
A_REB           11347
A_AST           11347
A_STL           11347
A_BLK           11347
A_TOV           11347
A_PF            11347
A_+/            11347
A_EFGPerc       11347
A_FTR           11347
A_TOVPerc       11347
Away_Team       11347
dtype: int64

In [261]:
home.count()

H_TEAM          11348
DATE            11348
MATCHUP         11348
W/L             11348
MIN             11348
H_PTS           11348
H_FGM           11348
H_FGA           11348
H_FGPerc        11348
H_3PM           11348
H_3PA           11348
H_ThreePPerc    11348
H_FTM           11348
H_FTA           11348
H_FTPerc        11348
H_OREB          11348
H_DREB          11348
H_REB           11348
H_AST           11348
H_STL           11348
H_BLK           11348
H_TOV           11348
H_PF            11348
H_+/            11348
H_EFGPerc       11348
H_FTR           11348
H_TOVPerc       11348
Away_Team       11348
dtype: int64

In [262]:
# inner joining ot make a new dataframe
new_df = pd.merge(home, away,left_on=[home['DATE'], home['Away_Team']], right_on=[away['DATE'], away['Away_Team']], how='inner')

In [263]:
new_df.columns

Index(['key_0', 'key_1', 'H_TEAM', 'DATE_x', 'MATCHUP_x', 'W/L', 'MIN',
       'H_PTS', 'H_FGM', 'H_FGA', 'H_FGPerc', 'H_3PM', 'H_3PA', 'H_ThreePPerc',
       'H_FTM', 'H_FTA', 'H_FTPerc', 'H_OREB', 'H_DREB', 'H_REB', 'H_AST',
       'H_STL', 'H_BLK', 'H_TOV', 'H_PF', 'H_+/', 'H_EFGPerc', 'H_FTR',
       'H_TOVPerc', 'Away_Team_x', 'A_TEAM', 'DATE_y', 'MATCHUP_y', 'A_W/L',
       'A_MIN', 'A_PTS', 'A_FGM', 'A_FGA', 'A_FGPerc', 'A_3PM', 'A_3PA',
       'A_ThreePPerc', 'A_FTM', 'A_FTA', 'A_FTPerc', 'A_OREB', 'A_DREB',
       'A_REB', 'A_AST', 'A_STL', 'A_BLK', 'A_TOV', 'A_PF', 'A_+/',
       'A_EFGPerc', 'A_FTR', 'A_TOVPerc', 'Away_Team_y'],
      dtype='object')

In [264]:
new_df

Unnamed: 0,key_0,key_1,H_TEAM,DATE_x,MATCHUP_x,W/L,MIN,H_PTS,H_FGM,H_FGA,...,A_AST,A_STL,A_BLK,A_TOV,A_PF,A_+/,A_EFGPerc,A_FTR,A_TOVPerc,Away_Team_y
0,6/5/19,TOR,GSW,6/5/19,GSW vs. TOR,L,241,109,36,91,...,30,9,10,14,22,14.0,0.628049,0.256098,13.302927,TOR
1,6/2/19,GSW,TOR,6/2/19,TOR vs. GSW,L,240,104,35,94,...,34,7,5,15,26,5.0,0.542683,0.280488,14.002987,GSW
2,5/30/19,GSW,TOR,5/30/19,TOR vs. GSW,W,241,118,39,77,...,29,6,2,16,27,-9.0,0.512821,0.397436,14.864363,GSW
3,5/25/19,MIL,TOR,5/25/19,TOR vs. MIL,W,239,100,36,74,...,19,7,7,11,22,-6.0,0.480000,0.413333,11.039743,MIL
4,5/23/19,TOR,MIL,5/23/19,MIL vs. TOR,L,240,99,38,84,...,19,6,4,6,16,6.0,0.476190,0.369048,5.789271,TOR
5,5/21/19,MIL,TOR,5/21/19,TOR vs. MIL,W,239,120,41,87,...,30,8,7,12,22,-18.0,0.531250,0.325000,11.600928,MIL
6,5/20/19,GSW,POR,5/20/19,POR vs. GSW,L,265,117,47,98,...,30,10,5,15,17,2.0,0.525253,0.191919,12.258908,GSW
7,5/19/19,MIL,TOR,5/19/19,TOR vs. MIL,W,290,118,40,102,...,22,14,5,20,30,-6.0,0.441176,0.323529,14.649868,MIL
8,5/18/19,GSW,POR,5/18/19,POR vs. GSW,L,239,99,34,85,...,27,9,5,13,24,11.0,0.535714,0.273810,12.135922,GSW
9,5/17/19,TOR,MIL,5/17/19,MIL vs. TOR,W,240,125,43,92,...,19,3,6,13,30,-22.0,0.482759,0.287356,11.711712,TOR


In [265]:
new_df.count()

key_0           14209
key_1           14209
H_TEAM          14209
DATE_x          14209
MATCHUP_x       14209
W/L             14209
MIN             14209
H_PTS           14209
H_FGM           14209
H_FGA           14209
H_FGPerc        14209
H_3PM           14209
H_3PA           14209
H_ThreePPerc    14209
H_FTM           14209
H_FTA           14209
H_FTPerc        14209
H_OREB          14209
H_DREB          14209
H_REB           14209
H_AST           14209
H_STL           14209
H_BLK           14209
H_TOV           14209
H_PF            14209
H_+/            14209
H_EFGPerc       14209
H_FTR           14209
H_TOVPerc       14209
Away_Team_x     14209
A_TEAM          14209
DATE_y          14209
MATCHUP_y       14209
A_W/L           14209
A_MIN           14209
A_PTS           14209
A_FGM           14209
A_FGA           14209
A_FGPerc        14209
A_3PM           14209
A_3PA           14209
A_ThreePPerc    14209
A_FTM           14209
A_FTA           14209
A_FTPerc        14209
A_OREB    

In [266]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 14209 entries, 0 to 14208
Data columns (total 58 columns):
key_0           14209 non-null object
key_1           14209 non-null object
H_TEAM          14209 non-null object
DATE_x          14209 non-null object
MATCHUP_x       14209 non-null object
W/L             14209 non-null object
MIN             14209 non-null int64
H_PTS           14209 non-null int64
H_FGM           14209 non-null int64
H_FGA           14209 non-null int64
H_FGPerc        14209 non-null float64
H_3PM           14209 non-null int64
H_3PA           14209 non-null int64
H_ThreePPerc    14209 non-null float64
H_FTM           14209 non-null int64
H_FTA           14209 non-null int64
H_FTPerc        14209 non-null float64
H_OREB          14209 non-null int64
H_DREB          14209 non-null int64
H_REB           14209 non-null int64
H_AST           14209 non-null int64
H_STL           14209 non-null int64
H_BLK           14209 non-null int64
H_TOV           14209 non-nu

In [267]:
new_df = new_df.drop_duplicates()

In [268]:
new_df

Unnamed: 0,key_0,key_1,H_TEAM,DATE_x,MATCHUP_x,W/L,MIN,H_PTS,H_FGM,H_FGA,...,A_AST,A_STL,A_BLK,A_TOV,A_PF,A_+/,A_EFGPerc,A_FTR,A_TOVPerc,Away_Team_y
0,6/5/19,TOR,GSW,6/5/19,GSW vs. TOR,L,241,109,36,91,...,30,9,10,14,22,14.0,0.628049,0.256098,13.302927,TOR
1,6/2/19,GSW,TOR,6/2/19,TOR vs. GSW,L,240,104,35,94,...,34,7,5,15,26,5.0,0.542683,0.280488,14.002987,GSW
2,5/30/19,GSW,TOR,5/30/19,TOR vs. GSW,W,241,118,39,77,...,29,6,2,16,27,-9.0,0.512821,0.397436,14.864363,GSW
3,5/25/19,MIL,TOR,5/25/19,TOR vs. MIL,W,239,100,36,74,...,19,7,7,11,22,-6.0,0.480000,0.413333,11.039743,MIL
4,5/23/19,TOR,MIL,5/23/19,MIL vs. TOR,L,240,99,38,84,...,19,6,4,6,16,6.0,0.476190,0.369048,5.789271,TOR
5,5/21/19,MIL,TOR,5/21/19,TOR vs. MIL,W,239,120,41,87,...,30,8,7,12,22,-18.0,0.531250,0.325000,11.600928,MIL
6,5/20/19,GSW,POR,5/20/19,POR vs. GSW,L,265,117,47,98,...,30,10,5,15,17,2.0,0.525253,0.191919,12.258908,GSW
7,5/19/19,MIL,TOR,5/19/19,TOR vs. MIL,W,290,118,40,102,...,22,14,5,20,30,-6.0,0.441176,0.323529,14.649868,MIL
8,5/18/19,GSW,POR,5/18/19,POR vs. GSW,L,239,99,34,85,...,27,9,5,13,24,11.0,0.535714,0.273810,12.135922,GSW
9,5/17/19,TOR,MIL,5/17/19,MIL vs. TOR,W,240,125,43,92,...,19,3,6,13,30,-22.0,0.482759,0.287356,11.711712,TOR


In [269]:
# export working dataframe to csv
new_df.to_csv('csv_files/Home_Away_Combined.csv')

In [270]:
new_df.columns

Index(['key_0', 'key_1', 'H_TEAM', 'DATE_x', 'MATCHUP_x', 'W/L', 'MIN',
       'H_PTS', 'H_FGM', 'H_FGA', 'H_FGPerc', 'H_3PM', 'H_3PA', 'H_ThreePPerc',
       'H_FTM', 'H_FTA', 'H_FTPerc', 'H_OREB', 'H_DREB', 'H_REB', 'H_AST',
       'H_STL', 'H_BLK', 'H_TOV', 'H_PF', 'H_+/', 'H_EFGPerc', 'H_FTR',
       'H_TOVPerc', 'Away_Team_x', 'A_TEAM', 'DATE_y', 'MATCHUP_y', 'A_W/L',
       'A_MIN', 'A_PTS', 'A_FGM', 'A_FGA', 'A_FGPerc', 'A_3PM', 'A_3PA',
       'A_ThreePPerc', 'A_FTM', 'A_FTA', 'A_FTPerc', 'A_OREB', 'A_DREB',
       'A_REB', 'A_AST', 'A_STL', 'A_BLK', 'A_TOV', 'A_PF', 'A_+/',
       'A_EFGPerc', 'A_FTR', 'A_TOVPerc', 'Away_Team_y'],
      dtype='object')

In [271]:
# Dropping duplicate columns that are not necessary 
new_df = new_df.drop(['key_0', 'key_1', 'Away_Team_x', 'DATE_y', 'MATCHUP_y', 'A_MIN', 'Away_Team_y'], axis=1)

In [272]:
new_df.columns

Index(['H_TEAM', 'DATE_x', 'MATCHUP_x', 'W/L', 'MIN', 'H_PTS', 'H_FGM',
       'H_FGA', 'H_FGPerc', 'H_3PM', 'H_3PA', 'H_ThreePPerc', 'H_FTM', 'H_FTA',
       'H_FTPerc', 'H_OREB', 'H_DREB', 'H_REB', 'H_AST', 'H_STL', 'H_BLK',
       'H_TOV', 'H_PF', 'H_+/', 'H_EFGPerc', 'H_FTR', 'H_TOVPerc', 'A_TEAM',
       'A_W/L', 'A_PTS', 'A_FGM', 'A_FGA', 'A_FGPerc', 'A_3PM', 'A_3PA',
       'A_ThreePPerc', 'A_FTM', 'A_FTA', 'A_FTPerc', 'A_OREB', 'A_DREB',
       'A_REB', 'A_AST', 'A_STL', 'A_BLK', 'A_TOV', 'A_PF', 'A_+/',
       'A_EFGPerc', 'A_FTR', 'A_TOVPerc'],
      dtype='object')

In [273]:
# Renaming the columns in order get ride of the % signs 
new_df.rename(columns={'DATE_x':'DATE','MATCHUP_x':'MATCHUP','W/L':'H_W/L'}, inplace=True)

In [274]:
new_df['H_TEAM'].value_counts()

GSW    372
CLE    357
SAS    353
TOR    352
HOU    351
MIA    349
OKC    343
LAC    343
BOS    342
LAL    337
CHI    336
POR    333
MEM    332
IND    332
ATL    328
WAS    327
MIL    325
PHI    322
BKN    322
UTA    320
DAL    318
ORL    316
CHA    315
DET    314
MIN    310
DEN    310
NYK    309
PHX    308
SAC    305
NOP    267
NOH     44
WST      5
USA      4
WLD      2
FBU      2
FCB      2
EST      2
CAN      2
RMD      2
MLN      1
FLA      1
EAM      1
HME      1
UBB      1
WEB      1
CHK      1
ALB      1
STP      1
Name: H_TEAM, dtype: int64

In [275]:
# Creating a new list to iterate through and drop non wanted game
H_Team_to_drop = ['WST', 'USA', 'RMD', 'FBU', 'EST', 'FCB', 'WLD', 'CAN', 'UBB', 'WEB', 'ALB', 'CHK', 'HME', 'MLN', 'EAM', 'FLA', 'STP']

In [276]:
# Dropping the unwanted columns 
df = new_df[~new_df['H_TEAM'].isin(H_Team_to_drop)]

In [277]:
df['H_TEAM'].value_counts()

GSW    372
CLE    357
SAS    353
TOR    352
HOU    351
MIA    349
LAC    343
OKC    343
BOS    342
LAL    337
CHI    336
POR    333
IND    332
MEM    332
ATL    328
WAS    327
MIL    325
BKN    322
PHI    322
UTA    320
DAL    318
ORL    316
CHA    315
DET    314
MIN    310
DEN    310
NYK    309
PHX    308
SAC    305
NOP    267
NOH     44
Name: H_TEAM, dtype: int64

In [278]:
df['A_TEAM'].value_counts()

GSW    369
SAS    350
IND    344
HOU    343
CLE    342
TOR    340
MIA    339
POR    338
ATL    336
BOS    335
WAS    334
OKC    333
MEM    331
DEN    331
LAC    329
MIL    328
UTA    328
CHI    322
BKN    321
DAL    319
CHA    319
NYK    319
PHI    318
DET    315
SAC    314
ORL    312
MIN    312
PHX    309
LAL    293
NOP    275
NOH     46
MAC     17
FLA      4
MEL      3
SDS      3
PER      2
MOS      2
SYD      2
MTA      2
FEN      2
BAU      2
GUA      2
BJD      1
BNE      1
MPS      1
RMD      1
NZB      1
SLA      1
ADL      1
Name: A_TEAM, dtype: int64

In [279]:
# Creating a list of away team names to drop 
A_Team_to_drop = ['MAC', 'FLA', 'SDS', 'MEL', 'MTA', 'MOS', 'BAU', 'GUA', 'SYD', 'FEN', 'PER', 'SLA', 'MPS', 'ADL', 'RMD', 'BNE', 'NZB', 'BJD']

In [280]:
# Dropping the unwanted away columns 
df = df[~df['A_TEAM'].isin(A_Team_to_drop)]

In [281]:
df

Unnamed: 0,H_TEAM,DATE,MATCHUP,H_W/L,MIN,H_PTS,H_FGM,H_FGA,H_FGPerc,H_3PM,...,A_REB,A_AST,A_STL,A_BLK,A_TOV,A_PF,A_+/,A_EFGPerc,A_FTR,A_TOVPerc
0,GSW,6/5/19,GSW vs. TOR,L,241,109,36,91,39.6,12,...,40,30,9,10,14,22,14.0,0.628049,0.256098,13.302927
1,TOR,6/2/19,TOR vs. GSW,L,240,104,35,94,37.2,11,...,42,34,7,5,15,26,5.0,0.542683,0.280488,14.002987
2,TOR,5/30/19,TOR vs. GSW,W,241,118,39,77,50.6,13,...,38,29,6,2,16,27,-9.0,0.512821,0.397436,14.864363
3,TOR,5/25/19,TOR vs. MIL,W,239,100,36,74,48.6,12,...,42,19,7,7,11,22,-6.0,0.480000,0.413333,11.039743
4,MIL,5/23/19,MIL vs. TOR,L,240,99,38,84,45.2,10,...,45,19,6,4,6,16,6.0,0.476190,0.369048,5.789271
5,TOR,5/21/19,TOR vs. MIL,W,239,120,41,87,47.1,14,...,40,30,8,7,12,22,-18.0,0.531250,0.325000,11.600928
6,POR,5/20/19,POR vs. GSW,L,265,117,47,98,48.0,16,...,56,30,10,5,15,17,2.0,0.525253,0.191919,12.258908
7,TOR,5/19/19,TOR vs. MIL,W,290,118,40,102,39.2,17,...,63,22,14,5,20,30,-6.0,0.441176,0.323529,14.649868
8,POR,5/18/19,POR vs. GSW,L,239,99,34,85,40.0,11,...,49,27,9,5,13,24,11.0,0.535714,0.273810,12.135922
9,MIL,5/17/19,MIL vs. TOR,W,240,125,43,92,46.7,13,...,40,19,3,6,13,30,-22.0,0.482759,0.287356,11.711712


In [282]:
df.columns

Index(['H_TEAM', 'DATE', 'MATCHUP', 'H_W/L', 'MIN', 'H_PTS', 'H_FGM', 'H_FGA',
       'H_FGPerc', 'H_3PM', 'H_3PA', 'H_ThreePPerc', 'H_FTM', 'H_FTA',
       'H_FTPerc', 'H_OREB', 'H_DREB', 'H_REB', 'H_AST', 'H_STL', 'H_BLK',
       'H_TOV', 'H_PF', 'H_+/', 'H_EFGPerc', 'H_FTR', 'H_TOVPerc', 'A_TEAM',
       'A_W/L', 'A_PTS', 'A_FGM', 'A_FGA', 'A_FGPerc', 'A_3PM', 'A_3PA',
       'A_ThreePPerc', 'A_FTM', 'A_FTA', 'A_FTPerc', 'A_OREB', 'A_DREB',
       'A_REB', 'A_AST', 'A_STL', 'A_BLK', 'A_TOV', 'A_PF', 'A_+/',
       'A_EFGPerc', 'A_FTR', 'A_TOVPerc'],
      dtype='object')

In [283]:
#df['Target'] = df['H_PTS'] - df['A_PTS']

In [284]:
# Creating the Target column from the Result column
df['Target_L'] = df['H_W/L'].astype(str).str[0]

In [285]:
# Creating a function to bin the target variable
def target_binning(Target_L):
    if str(Target_L) == 'L':  # Will return 0 if a team loses the game
        return(0)
    else:                   # Will return 1 if a team wins the game
        return(1)


In [286]:
# Applying the function in order to bin the target variable 
df['Target'] = df['Target_L'].apply(target_binning)

In [287]:
df = df.drop(['Target_L'], axis=1)

In [288]:
df.columns

Index(['H_TEAM', 'DATE', 'MATCHUP', 'H_W/L', 'MIN', 'H_PTS', 'H_FGM', 'H_FGA',
       'H_FGPerc', 'H_3PM', 'H_3PA', 'H_ThreePPerc', 'H_FTM', 'H_FTA',
       'H_FTPerc', 'H_OREB', 'H_DREB', 'H_REB', 'H_AST', 'H_STL', 'H_BLK',
       'H_TOV', 'H_PF', 'H_+/', 'H_EFGPerc', 'H_FTR', 'H_TOVPerc', 'A_TEAM',
       'A_W/L', 'A_PTS', 'A_FGM', 'A_FGA', 'A_FGPerc', 'A_3PM', 'A_3PA',
       'A_ThreePPerc', 'A_FTM', 'A_FTA', 'A_FTPerc', 'A_OREB', 'A_DREB',
       'A_REB', 'A_AST', 'A_STL', 'A_BLK', 'A_TOV', 'A_PF', 'A_+/',
       'A_EFGPerc', 'A_FTR', 'A_TOVPerc', 'Target'],
      dtype='object')

In [289]:
# Dropping duplicate columns that are not necessary 
df = df.drop(['H_W/L', 'H_PTS', 'H_+/', 'A_W/L', 'A_PTS', 'A_+/'], axis=1)

In [290]:
df.columns

Index(['H_TEAM', 'DATE', 'MATCHUP', 'MIN', 'H_FGM', 'H_FGA', 'H_FGPerc',
       'H_3PM', 'H_3PA', 'H_ThreePPerc', 'H_FTM', 'H_FTA', 'H_FTPerc',
       'H_OREB', 'H_DREB', 'H_REB', 'H_AST', 'H_STL', 'H_BLK', 'H_TOV', 'H_PF',
       'H_EFGPerc', 'H_FTR', 'H_TOVPerc', 'A_TEAM', 'A_FGM', 'A_FGA',
       'A_FGPerc', 'A_3PM', 'A_3PA', 'A_ThreePPerc', 'A_FTM', 'A_FTA',
       'A_FTPerc', 'A_OREB', 'A_DREB', 'A_REB', 'A_AST', 'A_STL', 'A_BLK',
       'A_TOV', 'A_PF', 'A_EFGPerc', 'A_FTR', 'A_TOVPerc', 'Target'],
      dtype='object')

In [291]:
columns = ['DATE', 'MATCHUP', 'MIN', 'H_TEAM', 'A_TEAM', 'H_FGM', 'H_FGA',
       'H_FGPerc', 'H_3PM', 'H_3PA', 'H_ThreePPerc', 'H_FTM', 'H_FTA',
       'H_FTPerc', 'H_OREB', 'H_DREB', 'H_REB', 'H_AST', 'H_STL', 'H_BLK',
       'H_TOV', 'H_PF', 'H_EFGPerc', 'H_FTR', 'H_TOVPerc', 
     'A_FGM', 'A_FGA', 'A_FGPerc', 'A_3PM', 'A_3PA',
       'A_ThreePPerc', 'A_FTM', 'A_FTA', 'A_FTPerc', 'A_OREB', 'A_DREB',
       'A_REB', 'A_AST', 'A_STL', 'A_BLK', 'A_TOV', 'A_PF',
       'A_EFGPerc', 'A_FTR', 'A_TOVPerc', 'Target' ]

In [292]:
df.head()

Unnamed: 0,H_TEAM,DATE,MATCHUP,MIN,H_FGM,H_FGA,H_FGPerc,H_3PM,H_3PA,H_ThreePPerc,...,A_REB,A_AST,A_STL,A_BLK,A_TOV,A_PF,A_EFGPerc,A_FTR,A_TOVPerc,Target
0,GSW,6/5/19,GSW vs. TOR,241,36,91,39.6,12,36,33.3,...,40,30,9,10,14,22,0.628049,0.256098,13.302927,0
1,TOR,6/2/19,TOR vs. GSW,240,35,94,37.2,11,38,28.9,...,42,34,7,5,15,26,0.542683,0.280488,14.002987,0
2,TOR,5/30/19,TOR vs. GSW,241,39,77,50.6,13,33,39.4,...,38,29,6,2,16,27,0.512821,0.397436,14.864363,1
3,TOR,5/25/19,TOR vs. MIL,239,36,74,48.6,12,27,44.4,...,42,19,7,7,11,22,0.48,0.413333,11.039743,1
4,MIL,5/23/19,MIL vs. TOR,240,38,84,45.2,10,31,32.3,...,45,19,6,4,6,16,0.47619,0.369048,5.789271,0


In [293]:
df = pd.DataFrame(df, columns = ['DATE', 'MATCHUP', 'MIN', 'H_TEAM', 'A_TEAM', 'H_FGM', 'H_FGA',
       'H_FGPerc', 'H_3PM', 'H_3PA', 'H_ThreePPerc', 'H_FTM', 'H_FTA',
       'H_FTPerc', 'H_OREB', 'H_DREB', 'H_REB', 'H_AST', 'H_STL', 'H_BLK',
       'H_TOV', 'H_PF', 'H_EFGPerc', 'H_FTR', 'H_TOVPerc', 
     'A_FGM', 'A_FGA', 'A_FGPerc', 'A_3PM', 'A_3PA',
       'A_ThreePPerc', 'A_FTM', 'A_FTA', 'A_FTPerc', 'A_OREB', 'A_DREB',
       'A_REB', 'A_AST', 'A_STL', 'A_BLK', 'A_TOV', 'A_PF',
       'A_EFGPerc', 'A_FTR', 'A_TOVPerc', 'Target' ])

In [294]:
df.columns

Index(['DATE', 'MATCHUP', 'MIN', 'H_TEAM', 'A_TEAM', 'H_FGM', 'H_FGA',
       'H_FGPerc', 'H_3PM', 'H_3PA', 'H_ThreePPerc', 'H_FTM', 'H_FTA',
       'H_FTPerc', 'H_OREB', 'H_DREB', 'H_REB', 'H_AST', 'H_STL', 'H_BLK',
       'H_TOV', 'H_PF', 'H_EFGPerc', 'H_FTR', 'H_TOVPerc', 'A_FGM', 'A_FGA',
       'A_FGPerc', 'A_3PM', 'A_3PA', 'A_ThreePPerc', 'A_FTM', 'A_FTA',
       'A_FTPerc', 'A_OREB', 'A_DREB', 'A_REB', 'A_AST', 'A_STL', 'A_BLK',
       'A_TOV', 'A_PF', 'A_EFGPerc', 'A_FTR', 'A_TOVPerc', 'Target'],
      dtype='object')

In [295]:
df['H_FTPerc'] = df['H_FTPerc'] / 100
df['A_FTPerc'] = df['A_FTPerc'] / 100

In [296]:
df.head()

Unnamed: 0,DATE,MATCHUP,MIN,H_TEAM,A_TEAM,H_FGM,H_FGA,H_FGPerc,H_3PM,H_3PA,...,A_REB,A_AST,A_STL,A_BLK,A_TOV,A_PF,A_EFGPerc,A_FTR,A_TOVPerc,Target
0,6/5/19,GSW vs. TOR,241,GSW,TOR,36,91,39.6,12,36,...,40,30,9,10,14,22,0.628049,0.256098,13.302927,0
1,6/2/19,TOR vs. GSW,240,TOR,GSW,35,94,37.2,11,38,...,42,34,7,5,15,26,0.542683,0.280488,14.002987,0
2,5/30/19,TOR vs. GSW,241,TOR,GSW,39,77,50.6,13,33,...,38,29,6,2,16,27,0.512821,0.397436,14.864363,1
3,5/25/19,TOR vs. MIL,239,TOR,MIL,36,74,48.6,12,27,...,42,19,7,7,11,22,0.48,0.413333,11.039743,1
4,5/23/19,MIL vs. TOR,240,MIL,TOR,38,84,45.2,10,31,...,45,19,6,4,6,16,0.47619,0.369048,5.789271,0


In [297]:
df.head()

Unnamed: 0,DATE,MATCHUP,MIN,H_TEAM,A_TEAM,H_FGM,H_FGA,H_FGPerc,H_3PM,H_3PA,...,A_REB,A_AST,A_STL,A_BLK,A_TOV,A_PF,A_EFGPerc,A_FTR,A_TOVPerc,Target
0,6/5/19,GSW vs. TOR,241,GSW,TOR,36,91,39.6,12,36,...,40,30,9,10,14,22,0.628049,0.256098,13.302927,0
1,6/2/19,TOR vs. GSW,240,TOR,GSW,35,94,37.2,11,38,...,42,34,7,5,15,26,0.542683,0.280488,14.002987,0
2,5/30/19,TOR vs. GSW,241,TOR,GSW,39,77,50.6,13,33,...,38,29,6,2,16,27,0.512821,0.397436,14.864363,1
3,5/25/19,TOR vs. MIL,239,TOR,MIL,36,74,48.6,12,27,...,42,19,7,7,11,22,0.48,0.413333,11.039743,1
4,5/23/19,MIL vs. TOR,240,MIL,TOR,38,84,45.2,10,31,...,45,19,6,4,6,16,0.47619,0.369048,5.789271,0


In [298]:
# export working dataframe for Regression to csv
df.to_csv('csv_files/FinishedDataset_Classification.csv')