In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, roc_curve


In [67]:
pd.options.display.max_columns
df = pd.read_csv('df_all_rs.csv')
df = df.drop('Unnamed: 0', axis=1)
df = df.drop('index', axis=1)
df

Unnamed: 0,GAME_DATE_EST,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,TEAM_ID_home,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,TEAM_ID_away,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,2004-04-14,20301188,1610612746,1610612760,2003,1610612746,87.0,0.423,0.727,0.214,17.0,37.0,1610612760,118.0,0.542,1.000,0.375,32.0,34.0,0
1,2004-04-14,20301184,1610612759,1610612743,2003,1610612759,93.0,0.424,0.679,0.100,15.0,58.0,1610612743,67.0,0.325,0.611,0.222,11.0,47.0,1
2,2004-04-14,20301181,1610612754,1610612741,2003,1610612754,101.0,0.420,0.794,0.316,24.0,58.0,1610612741,96.0,0.420,0.667,0.357,20.0,41.0,1
3,2004-04-14,20301177,1610612764,1610612740,2003,1610612764,78.0,0.375,0.714,0.211,13.0,39.0,1610612740,94.0,0.451,0.600,0.364,24.0,48.0,0
4,2004-04-14,20301179,1610612752,1610612739,2003,1610612752,90.0,0.481,0.714,0.400,13.0,42.0,1610612739,100.0,0.488,0.900,0.364,22.0,40.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2018-10-17,21800011,1610612758,1610612762,2018,1610612758,117.0,0.516,0.667,0.368,17.0,37.0,1610612762,123.0,0.519,0.737,0.481,21.0,44.0,0
19394,2018-10-17,21800012,1610612746,1610612743,2018,1610612746,98.0,0.398,0.833,0.286,21.0,47.0,1610612743,107.0,0.379,0.786,0.333,20.0,56.0,0
19395,2018-10-17,21800013,1610612756,1610612742,2018,1610612756,121.0,0.543,0.875,0.559,35.0,44.0,1610612742,100.0,0.432,0.700,0.303,28.0,38.0,1
19396,2018-10-16,21800001,1610612738,1610612755,2018,1610612738,105.0,0.433,0.714,0.297,21.0,55.0,1610612755,87.0,0.391,0.609,0.192,18.0,47.0,1


In [68]:
df_teams = pd.read_csv('teams.csv')
df_teams = df_teams[['TEAM_ID', 'ABBREVIATION', 'NICKNAME']]
df_teams

Unnamed: 0,TEAM_ID,ABBREVIATION,NICKNAME
0,1610612737,ATL,Hawks
1,1610612738,BOS,Celtics
2,1610612740,NOP,Pelicans
3,1610612741,CHI,Bulls
4,1610612742,DAL,Mavericks
5,1610612743,DEN,Nuggets
6,1610612745,HOU,Rockets
7,1610612746,LAC,Clippers
8,1610612747,LAL,Lakers
9,1610612748,MIA,Heat


In [69]:
# unimos los dataframes de los partidos y de los equipos para tener el nombre de los equipos en el dataframe
df_merge = pd.merge(left=df,right=df_teams, left_on='HOME_TEAM_ID', right_on='TEAM_ID')
df_merge = pd.merge(left=df_merge,right=df_teams, left_on='VISITOR_TEAM_ID', right_on='TEAM_ID')
df_merge

Unnamed: 0,GAME_DATE_EST,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,TEAM_ID_home,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,...,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,TEAM_ID_x,ABBREVIATION_x,NICKNAME_x,TEAM_ID_y,ABBREVIATION_y,NICKNAME_y
0,2004-04-14,20301188,1610612746,1610612760,2003,1610612746,87.0,0.423,0.727,0.214,...,0.375,32.0,34.0,0,1610612746,LAC,Clippers,1610612760,OKC,Thunder
1,2003-10-31,20300025,1610612746,1610612760,2003,1610612746,105.0,0.404,0.833,0.381,...,0.346,27.0,42.0,0,1610612746,LAC,Clippers,1610612760,OKC,Thunder
2,2005-01-12,20400514,1610612746,1610612760,2004,1610612746,103.0,0.494,0.815,0.429,...,0.200,18.0,44.0,1,1610612746,LAC,Clippers,1610612760,OKC,Thunder
3,2004-11-03,20400015,1610612746,1610612760,2004,1610612746,114.0,0.629,0.677,0.556,...,0.409,17.0,37.0,1,1610612746,LAC,Clippers,1610612760,OKC,Thunder
4,2006-04-16,20501201,1610612746,1610612760,2005,1610612746,98.0,0.455,0.781,0.375,...,0.438,29.0,36.0,0,1610612746,LAC,Clippers,1610612760,OKC,Thunder
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2014-11-24,21400198,1610612766,1610612746,2014,1610612766,92.0,0.413,0.800,0.100,...,0.469,28.0,43.0,0,1610612766,CHA,Hornets,1610612746,LAC,Clippers
19394,2015-12-30,21500474,1610612766,1610612746,2015,1610612766,117.0,0.432,0.889,0.300,...,0.500,27.0,39.0,0,1610612766,CHA,Hornets,1610612746,LAC,Clippers
19395,2017-02-11,21600808,1610612766,1610612746,2016,1610612766,102.0,0.435,0.750,0.351,...,0.412,26.0,50.0,0,1610612766,CHA,Hornets,1610612746,LAC,Clippers
19396,2017-11-18,21700228,1610612766,1610612746,2017,1610612766,102.0,0.391,0.793,0.423,...,0.276,18.0,44.0,1,1610612766,CHA,Hornets,1610612746,LAC,Clippers


In [71]:
#eliminamos las colunas que hemos metidos de mas al hacer los merges
df_merge = df_merge.drop(['TEAM_ID_x','TEAM_ID_y','TEAM_ID_home'], axis=1)
df_merge

Unnamed: 0,GAME_DATE_EST,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,...,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,ABBREVIATION_x,NICKNAME_x,ABBREVIATION_y,NICKNAME_y
0,2004-04-14,20301188,1610612746,1610612760,2003,87.0,0.423,0.727,0.214,17.0,...,0.542,1.000,0.375,32.0,34.0,0,LAC,Clippers,OKC,Thunder
1,2003-10-31,20300025,1610612746,1610612760,2003,105.0,0.404,0.833,0.381,23.0,...,0.535,0.793,0.346,27.0,42.0,0,LAC,Clippers,OKC,Thunder
2,2005-01-12,20400514,1610612746,1610612760,2004,103.0,0.494,0.815,0.429,20.0,...,0.413,0.778,0.200,18.0,44.0,1,LAC,Clippers,OKC,Thunder
3,2004-11-03,20400015,1610612746,1610612760,2004,114.0,0.629,0.677,0.556,33.0,...,0.370,0.750,0.409,17.0,37.0,1,LAC,Clippers,OKC,Thunder
4,2006-04-16,20501201,1610612746,1610612760,2005,98.0,0.455,0.781,0.375,26.0,...,0.541,0.711,0.438,29.0,36.0,0,LAC,Clippers,OKC,Thunder
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2014-11-24,21400198,1610612766,1610612746,2014,92.0,0.413,0.800,0.100,21.0,...,0.511,0.800,0.469,28.0,43.0,0,CHA,Hornets,LAC,Clippers
19394,2015-12-30,21500474,1610612766,1610612746,2015,117.0,0.432,0.889,0.300,20.0,...,0.521,0.805,0.500,27.0,39.0,0,CHA,Hornets,LAC,Clippers
19395,2017-02-11,21600808,1610612766,1610612746,2016,102.0,0.435,0.750,0.351,24.0,...,0.471,0.650,0.412,26.0,50.0,0,CHA,Hornets,LAC,Clippers
19396,2017-11-18,21700228,1610612766,1610612746,2017,102.0,0.391,0.793,0.423,20.0,...,0.398,0.684,0.276,18.0,44.0,1,CHA,Hornets,LAC,Clippers


In [73]:
#ordenamos las columnas a nuestro antojo
df_merge = df_merge[['GAME_DATE_EST', 'GAME_ID', 'HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'ABBREVIATION_x', 'NICKNAME_x',\
                     'ABBREVIATION_y', 'NICKNAME_y', 'SEASON', 'PTS_home', 'FG_PCT_home', 'FT_PCT_home', \
                     'FG3_PCT_home', 'AST_home', 'REB_home', 'PTS_away', 'FG_PCT_away', 'FT_PCT_away', \
                     'FG3_PCT_away', 'AST_away' ,'REB_away', 'HOME_TEAM_WINS']]
df_merge

Unnamed: 0,GAME_DATE_EST,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,ABBREVIATION_x,NICKNAME_x,ABBREVIATION_y,NICKNAME_y,SEASON,PTS_home,...,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,2004-04-14,20301188,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2003,87.0,...,0.214,17.0,37.0,118.0,0.542,1.000,0.375,32.0,34.0,0
1,2003-10-31,20300025,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2003,105.0,...,0.381,23.0,43.0,124.0,0.535,0.793,0.346,27.0,42.0,0
2,2005-01-12,20400514,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2004,103.0,...,0.429,20.0,36.0,92.0,0.413,0.778,0.200,18.0,44.0,1
3,2004-11-03,20400015,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2004,114.0,...,0.556,33.0,38.0,84.0,0.370,0.750,0.409,17.0,37.0,1
4,2006-04-16,20501201,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2005,98.0,...,0.375,26.0,43.0,114.0,0.541,0.711,0.438,29.0,36.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2014-11-24,21400198,1610612766,1610612746,CHA,Hornets,LAC,Clippers,2014,92.0,...,0.100,21.0,33.0,113.0,0.511,0.800,0.469,28.0,43.0,0
19394,2015-12-30,21500474,1610612766,1610612746,CHA,Hornets,LAC,Clippers,2015,117.0,...,0.300,20.0,42.0,122.0,0.521,0.805,0.500,27.0,39.0,0
19395,2017-02-11,21600808,1610612766,1610612746,CHA,Hornets,LAC,Clippers,2016,102.0,...,0.351,24.0,40.0,107.0,0.471,0.650,0.412,26.0,50.0,0
19396,2017-11-18,21700228,1610612766,1610612746,CHA,Hornets,LAC,Clippers,2017,102.0,...,0.423,20.0,49.0,87.0,0.398,0.684,0.276,18.0,44.0,1


In [74]:
#Renombramos algunas columnas
df_merge = df_merge.rename(columns={'GAME_DATE_EST' : 'GAME_DATE',
                                    'ABBREVIATION_x' : 'HOME_TEAM_ABBREVIATION',
                                    'NICKNAME_x' : 'HOME_TEAM_NICKNAME',
                                    'ABBREVIATION_y' : 'VISITOR_TEAM_ABBREVIATION',
                                    'NICKNAME_y' : 'VISITOR_TEAM_NICKNAME',})
df_merge

Unnamed: 0,GAME_DATE,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_ABBREVIATION,VISITOR_TEAM_NICKNAME,SEASON,PTS_home,...,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,2004-04-14,20301188,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2003,87.0,...,0.214,17.0,37.0,118.0,0.542,1.000,0.375,32.0,34.0,0
1,2003-10-31,20300025,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2003,105.0,...,0.381,23.0,43.0,124.0,0.535,0.793,0.346,27.0,42.0,0
2,2005-01-12,20400514,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2004,103.0,...,0.429,20.0,36.0,92.0,0.413,0.778,0.200,18.0,44.0,1
3,2004-11-03,20400015,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2004,114.0,...,0.556,33.0,38.0,84.0,0.370,0.750,0.409,17.0,37.0,1
4,2006-04-16,20501201,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2005,98.0,...,0.375,26.0,43.0,114.0,0.541,0.711,0.438,29.0,36.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2014-11-24,21400198,1610612766,1610612746,CHA,Hornets,LAC,Clippers,2014,92.0,...,0.100,21.0,33.0,113.0,0.511,0.800,0.469,28.0,43.0,0
19394,2015-12-30,21500474,1610612766,1610612746,CHA,Hornets,LAC,Clippers,2015,117.0,...,0.300,20.0,42.0,122.0,0.521,0.805,0.500,27.0,39.0,0
19395,2017-02-11,21600808,1610612766,1610612746,CHA,Hornets,LAC,Clippers,2016,102.0,...,0.351,24.0,40.0,107.0,0.471,0.650,0.412,26.0,50.0,0
19396,2017-11-18,21700228,1610612766,1610612746,CHA,Hornets,LAC,Clippers,2017,102.0,...,0.423,20.0,49.0,87.0,0.398,0.684,0.276,18.0,44.0,1


In [76]:
#ordenamos por game date y game_id
df_merge = df_merge.sort_values(['GAME_DATE', 'GAME_ID'])
df_merge

Unnamed: 0,GAME_DATE,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_ABBREVIATION,VISITOR_TEAM_NICKNAME,SEASON,PTS_home,...,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
11562,2003-10-28,20300001,1610612755,1610612748,PHI,76ers,MIA,Heat,2003,89.0,...,0.350,25.0,39.0,74.0,0.408,0.824,0.250,16.0,44.0,1
12344,2003-10-28,20300002,1610612759,1610612756,SAS,Spurs,PHX,Suns,2003,83.0,...,0.100,20.0,38.0,82.0,0.361,0.810,0.500,14.0,43.0,1
5505,2003-10-28,20300003,1610612747,1610612742,LAL,Lakers,DAL,Mavericks,2003,109.0,...,0.350,32.0,46.0,93.0,0.376,0.733,0.280,17.0,46.0,1
11151,2003-10-29,20300004,1610612738,1610612748,BOS,Celtics,MIA,Heat,2003,98.0,...,0.313,28.0,40.0,75.0,0.366,0.750,0.417,14.0,39.0,1
16941,2003-10-29,20300005,1610612752,1610612753,NYK,Knicks,ORL,Magic,2003,83.0,...,0.313,20.0,48.0,85.0,0.368,0.552,0.250,17.0,44.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,2019-04-10,21801226,1610612749,1610612760,MIL,Bucks,OKC,Thunder,2018,116.0,...,0.326,27.0,53.0,127.0,0.485,0.615,0.426,40.0,53.0,0
5243,2019-04-10,21801227,1610612759,1610612742,SAS,Spurs,DAL,Mavericks,2018,105.0,...,0.333,22.0,53.0,94.0,0.407,0.750,0.297,27.0,42.0,1
3105,2019-04-10,21801228,1610612743,1610612750,DEN,Nuggets,MIN,Timberwolves,2018,99.0,...,0.303,23.0,53.0,95.0,0.429,0.667,0.406,24.0,41.0,1
1971,2019-04-10,21801229,1610612746,1610612762,LAC,Clippers,UTA,Jazz,2018,143.0,...,0.429,34.0,52.0,137.0,0.443,0.879,0.400,31.0,57.0,1


In [77]:
df_merge = df_merge.reset_index()
df_merge = df_merge.drop('index', axis=1)
df_merge

Unnamed: 0,GAME_DATE,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_ABBREVIATION,VISITOR_TEAM_NICKNAME,SEASON,PTS_home,...,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,2003-10-28,20300001,1610612755,1610612748,PHI,76ers,MIA,Heat,2003,89.0,...,0.350,25.0,39.0,74.0,0.408,0.824,0.250,16.0,44.0,1
1,2003-10-28,20300002,1610612759,1610612756,SAS,Spurs,PHX,Suns,2003,83.0,...,0.100,20.0,38.0,82.0,0.361,0.810,0.500,14.0,43.0,1
2,2003-10-28,20300003,1610612747,1610612742,LAL,Lakers,DAL,Mavericks,2003,109.0,...,0.350,32.0,46.0,93.0,0.376,0.733,0.280,17.0,46.0,1
3,2003-10-29,20300004,1610612738,1610612748,BOS,Celtics,MIA,Heat,2003,98.0,...,0.313,28.0,40.0,75.0,0.366,0.750,0.417,14.0,39.0,1
4,2003-10-29,20300005,1610612752,1610612753,NYK,Knicks,ORL,Magic,2003,83.0,...,0.313,20.0,48.0,85.0,0.368,0.552,0.250,17.0,44.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2019-04-10,21801226,1610612749,1610612760,MIL,Bucks,OKC,Thunder,2018,116.0,...,0.326,27.0,53.0,127.0,0.485,0.615,0.426,40.0,53.0,0
19394,2019-04-10,21801227,1610612759,1610612742,SAS,Spurs,DAL,Mavericks,2018,105.0,...,0.333,22.0,53.0,94.0,0.407,0.750,0.297,27.0,42.0,1
19395,2019-04-10,21801228,1610612743,1610612750,DEN,Nuggets,MIN,Timberwolves,2018,99.0,...,0.303,23.0,53.0,95.0,0.429,0.667,0.406,24.0,41.0,1
19396,2019-04-10,21801229,1610612746,1610612762,LAC,Clippers,UTA,Jazz,2018,143.0,...,0.429,34.0,52.0,137.0,0.443,0.879,0.400,31.0,57.0,1


In [78]:
df_merge.to_csv('df_merged_renamed_droped_sorted_rs.csv')

In [79]:
pd.options.display.max_columns
df = pd.read_csv('df_merged_renamed_droped_sorted_rs.csv')
df = df.drop('Unnamed: 0', axis=1)
df

Unnamed: 0,GAME_DATE,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_ABBREVIATION,VISITOR_TEAM_NICKNAME,SEASON,PTS_home,...,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,2003-10-28,20300001,1610612755,1610612748,PHI,76ers,MIA,Heat,2003,89.0,...,0.350,25.0,39.0,74.0,0.408,0.824,0.250,16.0,44.0,1
1,2003-10-28,20300002,1610612759,1610612756,SAS,Spurs,PHX,Suns,2003,83.0,...,0.100,20.0,38.0,82.0,0.361,0.810,0.500,14.0,43.0,1
2,2003-10-28,20300003,1610612747,1610612742,LAL,Lakers,DAL,Mavericks,2003,109.0,...,0.350,32.0,46.0,93.0,0.376,0.733,0.280,17.0,46.0,1
3,2003-10-29,20300004,1610612738,1610612748,BOS,Celtics,MIA,Heat,2003,98.0,...,0.313,28.0,40.0,75.0,0.366,0.750,0.417,14.0,39.0,1
4,2003-10-29,20300005,1610612752,1610612753,NYK,Knicks,ORL,Magic,2003,83.0,...,0.313,20.0,48.0,85.0,0.368,0.552,0.250,17.0,44.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2019-04-10,21801226,1610612749,1610612760,MIL,Bucks,OKC,Thunder,2018,116.0,...,0.326,27.0,53.0,127.0,0.485,0.615,0.426,40.0,53.0,0
19394,2019-04-10,21801227,1610612759,1610612742,SAS,Spurs,DAL,Mavericks,2018,105.0,...,0.333,22.0,53.0,94.0,0.407,0.750,0.297,27.0,42.0,1
19395,2019-04-10,21801228,1610612743,1610612750,DEN,Nuggets,MIN,Timberwolves,2018,99.0,...,0.303,23.0,53.0,95.0,0.429,0.667,0.406,24.0,41.0,1
19396,2019-04-10,21801229,1610612746,1610612762,LAC,Clippers,UTA,Jazz,2018,143.0,...,0.429,34.0,52.0,137.0,0.443,0.879,0.400,31.0,57.0,1


In [80]:
df

Unnamed: 0,GAME_DATE,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_ABBREVIATION,VISITOR_TEAM_NICKNAME,SEASON,PTS_home,...,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,2003-10-28,20300001,1610612755,1610612748,PHI,76ers,MIA,Heat,2003,89.0,...,0.350,25.0,39.0,74.0,0.408,0.824,0.250,16.0,44.0,1
1,2003-10-28,20300002,1610612759,1610612756,SAS,Spurs,PHX,Suns,2003,83.0,...,0.100,20.0,38.0,82.0,0.361,0.810,0.500,14.0,43.0,1
2,2003-10-28,20300003,1610612747,1610612742,LAL,Lakers,DAL,Mavericks,2003,109.0,...,0.350,32.0,46.0,93.0,0.376,0.733,0.280,17.0,46.0,1
3,2003-10-29,20300004,1610612738,1610612748,BOS,Celtics,MIA,Heat,2003,98.0,...,0.313,28.0,40.0,75.0,0.366,0.750,0.417,14.0,39.0,1
4,2003-10-29,20300005,1610612752,1610612753,NYK,Knicks,ORL,Magic,2003,83.0,...,0.313,20.0,48.0,85.0,0.368,0.552,0.250,17.0,44.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2019-04-10,21801226,1610612749,1610612760,MIL,Bucks,OKC,Thunder,2018,116.0,...,0.326,27.0,53.0,127.0,0.485,0.615,0.426,40.0,53.0,0
19394,2019-04-10,21801227,1610612759,1610612742,SAS,Spurs,DAL,Mavericks,2018,105.0,...,0.333,22.0,53.0,94.0,0.407,0.750,0.297,27.0,42.0,1
19395,2019-04-10,21801228,1610612743,1610612750,DEN,Nuggets,MIN,Timberwolves,2018,99.0,...,0.303,23.0,53.0,95.0,0.429,0.667,0.406,24.0,41.0,1
19396,2019-04-10,21801229,1610612746,1610612762,LAC,Clippers,UTA,Jazz,2018,143.0,...,0.429,34.0,52.0,137.0,0.443,0.879,0.400,31.0,57.0,1


In [81]:
#Añadimos la columna VISITOR_TEAM_WINS que necesitaremos mas adelnate
df['VISITOR_TEAM_WINS'] = np.nan
df

Unnamed: 0,GAME_DATE,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_ABBREVIATION,VISITOR_TEAM_NICKNAME,SEASON,PTS_home,...,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,VISITOR_TEAM_WINS
0,2003-10-28,20300001,1610612755,1610612748,PHI,76ers,MIA,Heat,2003,89.0,...,25.0,39.0,74.0,0.408,0.824,0.250,16.0,44.0,1,
1,2003-10-28,20300002,1610612759,1610612756,SAS,Spurs,PHX,Suns,2003,83.0,...,20.0,38.0,82.0,0.361,0.810,0.500,14.0,43.0,1,
2,2003-10-28,20300003,1610612747,1610612742,LAL,Lakers,DAL,Mavericks,2003,109.0,...,32.0,46.0,93.0,0.376,0.733,0.280,17.0,46.0,1,
3,2003-10-29,20300004,1610612738,1610612748,BOS,Celtics,MIA,Heat,2003,98.0,...,28.0,40.0,75.0,0.366,0.750,0.417,14.0,39.0,1,
4,2003-10-29,20300005,1610612752,1610612753,NYK,Knicks,ORL,Magic,2003,83.0,...,20.0,48.0,85.0,0.368,0.552,0.250,17.0,44.0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2019-04-10,21801226,1610612749,1610612760,MIL,Bucks,OKC,Thunder,2018,116.0,...,27.0,53.0,127.0,0.485,0.615,0.426,40.0,53.0,0,
19394,2019-04-10,21801227,1610612759,1610612742,SAS,Spurs,DAL,Mavericks,2018,105.0,...,22.0,53.0,94.0,0.407,0.750,0.297,27.0,42.0,1,
19395,2019-04-10,21801228,1610612743,1610612750,DEN,Nuggets,MIN,Timberwolves,2018,99.0,...,23.0,53.0,95.0,0.429,0.667,0.406,24.0,41.0,1,
19396,2019-04-10,21801229,1610612746,1610612762,LAC,Clippers,UTA,Jazz,2018,143.0,...,34.0,52.0,137.0,0.443,0.879,0.400,31.0,57.0,1,


In [82]:
def fill_visitor_team_wins(n):
    if n==1:
        return 0
    else:
        return 1

In [86]:
home_team_wins = list(df['HOME_TEAM_WINS'].values)
visitor_team_wins = list(map(fill_visitor_team_wins, home_team_wins))
df['VISITOR_TEAM_WINS'] = pd.Series(visitor_team_wins)
df

Unnamed: 0,GAME_DATE,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_ABBREVIATION,VISITOR_TEAM_NICKNAME,SEASON,PTS_home,...,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,VISITOR_TEAM_WINS
0,2003-10-28,20300001,1610612755,1610612748,PHI,76ers,MIA,Heat,2003,89.0,...,25.0,39.0,74.0,0.408,0.824,0.250,16.0,44.0,1,0
1,2003-10-28,20300002,1610612759,1610612756,SAS,Spurs,PHX,Suns,2003,83.0,...,20.0,38.0,82.0,0.361,0.810,0.500,14.0,43.0,1,0
2,2003-10-28,20300003,1610612747,1610612742,LAL,Lakers,DAL,Mavericks,2003,109.0,...,32.0,46.0,93.0,0.376,0.733,0.280,17.0,46.0,1,0
3,2003-10-29,20300004,1610612738,1610612748,BOS,Celtics,MIA,Heat,2003,98.0,...,28.0,40.0,75.0,0.366,0.750,0.417,14.0,39.0,1,0
4,2003-10-29,20300005,1610612752,1610612753,NYK,Knicks,ORL,Magic,2003,83.0,...,20.0,48.0,85.0,0.368,0.552,0.250,17.0,44.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19393,2019-04-10,21801226,1610612749,1610612760,MIL,Bucks,OKC,Thunder,2018,116.0,...,27.0,53.0,127.0,0.485,0.615,0.426,40.0,53.0,0,1
19394,2019-04-10,21801227,1610612759,1610612742,SAS,Spurs,DAL,Mavericks,2018,105.0,...,22.0,53.0,94.0,0.407,0.750,0.297,27.0,42.0,1,0
19395,2019-04-10,21801228,1610612743,1610612750,DEN,Nuggets,MIN,Timberwolves,2018,99.0,...,23.0,53.0,95.0,0.429,0.667,0.406,24.0,41.0,1,0
19396,2019-04-10,21801229,1610612746,1610612762,LAC,Clippers,UTA,Jazz,2018,143.0,...,34.0,52.0,137.0,0.443,0.879,0.400,31.0,57.0,1,0


In [89]:
df_rs_03 = df.query("GAME_DATE >= '2003-10-28' and GAME_DATE <='2004-04-14'")
df_rs_04 = df.query("GAME_DATE >= '2004-11-02' and GAME_DATE <='2005-04-20'")
df_rs_05 = df.query("GAME_DATE >= '2005-11-01' and GAME_DATE <='2006-04-19'")
df_rs_06 = df.query("GAME_DATE >= '2006-10-31' and GAME_DATE <='2007-04-18'")
df_rs_07 = df.query("GAME_DATE >= '2007-10-30' and GAME_DATE <='2008-04-16'")
df_rs_08 = df.query("GAME_DATE >= '2008-10-28' and GAME_DATE <='2009-04-16'")
df_rs_09 = df.query("GAME_DATE >= '2009-10-27' and GAME_DATE <='2010-04-14'")
df_rs_10 = df.query("GAME_DATE >= '2010-10-26' and GAME_DATE <='2011-04-13'")
df_rs_11 = df.query("GAME_DATE >= '2011-12-25' and GAME_DATE <='2012-04-26'")
df_rs_12 = df.query("GAME_DATE >= '2012-10-30' and GAME_DATE <='2013-04-17'")
df_rs_13 = df.query("GAME_DATE >= '2013-10-29' and GAME_DATE <='2014-04-16'")
df_rs_14 = df.query("GAME_DATE >= '2014-10-28' and GAME_DATE <='2015-04-15'")
df_rs_15 = df.query("GAME_DATE >= '2015-10-27' and GAME_DATE <='2016-04-13'")
df_rs_16 = df.query("GAME_DATE >= '2016-10-25' and GAME_DATE <='2017-04-12'")
df_rs_17 = df.query("GAME_DATE >= '2017-10-17' and GAME_DATE <='2018-04-11'")
df_rs_18 = df.query("GAME_DATE >= '2018-10-16' and GAME_DATE <='2019-04-10'")

In [90]:
df_rs_03

Unnamed: 0,GAME_DATE,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,HOME_TEAM_ABBREVIATION,HOME_TEAM_NICKNAME,VISITOR_TEAM_ABBREVIATION,VISITOR_TEAM_NICKNAME,SEASON,PTS_home,...,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,VISITOR_TEAM_WINS
0,2003-10-28,20300001,1610612755,1610612748,PHI,76ers,MIA,Heat,2003,89.0,...,25.0,39.0,74.0,0.408,0.824,0.250,16.0,44.0,1,0
1,2003-10-28,20300002,1610612759,1610612756,SAS,Spurs,PHX,Suns,2003,83.0,...,20.0,38.0,82.0,0.361,0.810,0.500,14.0,43.0,1,0
2,2003-10-28,20300003,1610612747,1610612742,LAL,Lakers,DAL,Mavericks,2003,109.0,...,32.0,46.0,93.0,0.376,0.733,0.280,17.0,46.0,1,0
3,2003-10-29,20300004,1610612738,1610612748,BOS,Celtics,MIA,Heat,2003,98.0,...,28.0,40.0,75.0,0.366,0.750,0.417,14.0,39.0,1,0
4,2003-10-29,20300005,1610612752,1610612753,NYK,Knicks,ORL,Magic,2003,83.0,...,20.0,48.0,85.0,0.368,0.552,0.250,17.0,44.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1184,2004-04-14,20301185,1610612762,1610612756,UTA,Jazz,PHX,Suns,2003,84.0,...,20.0,43.0,89.0,0.479,0.609,0.438,18.0,39.0,0,1
1185,2004-04-14,20301186,1610612745,1610612742,HOU,Rockets,DAL,Mavericks,2003,89.0,...,24.0,50.0,92.0,0.408,0.676,0.389,22.0,40.0,0,1
1186,2004-04-14,20301187,1610612757,1610612747,POR,Trail Blazers,LAL,Lakers,2003,104.0,...,28.0,55.0,105.0,0.398,0.593,0.292,27.0,51.0,0,1
1187,2004-04-14,20301188,1610612746,1610612760,LAC,Clippers,OKC,Thunder,2003,87.0,...,17.0,37.0,118.0,0.542,1.000,0.375,32.0,34.0,0,1
