In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

import matplotlib.pyplot as plt
%matplotlib inline

data = pd.read_csv('./RegularSeasonCompactResults.csv')
detailed_data = pd.read_csv('./RegularSeasonDetailedResults.csv')
seed = pd.read_csv('./NCAATourneySeeds.csv')
team_names = pd.read_csv('./Teams.csv')

In [16]:
detailed_data.head()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
0,2003,10,1104,68,1328,62,N,0,27,58,...,10,16,22,10,22,8,18,9,2,20
1,2003,10,1272,70,1393,63,N,0,26,62,...,24,9,20,20,25,7,12,8,6,16
2,2003,11,1266,73,1437,61,N,0,24,58,...,26,14,23,31,22,9,12,2,5,23
3,2003,11,1296,56,1457,50,N,0,18,38,...,22,8,15,17,20,9,19,4,3,23
4,2003,11,1400,77,1208,71,N,0,30,61,...,16,17,27,21,15,12,10,7,1,14


In [47]:
detailed_data.columns.tolist()

['Season',
 'DayNum',
 'WTeamID',
 'WScore',
 'LTeamID',
 'LScore',
 'WLoc',
 'NumOT',
 'WFGM',
 'WFGA',
 'WFGM3',
 'WFGA3',
 'WFTM',
 'WFTA',
 'WOR',
 'WDR',
 'WAst',
 'WTO',
 'WStl',
 'WBlk',
 'WPF',
 'LFGM',
 'LFGA',
 'LFGM3',
 'LFGA3',
 'LFTM',
 'LFTA',
 'LOR',
 'LDR',
 'LAst',
 'LTO',
 'LStl',
 'LBlk',
 'LPF']

In [8]:
data.head()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0
3,1985,25,1165,70,1432,54,H,0
4,1985,25,1192,86,1447,74,H,0


In [3]:
data.to_dict().keys()

dict_keys(['Season', 'DayNum', 'WTeamID', 'WScore', 'LTeamID', 'LScore', 'WLoc', 'NumOT'])

In [7]:
data.to_dict('records')

[{'DayNum': 20,
  'LScore': 64,
  'LTeamID': 1328,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'N',
  'WScore': 81,
  'WTeamID': 1228},
 {'DayNum': 25,
  'LScore': 70,
  'LTeamID': 1354,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 77,
  'WTeamID': 1106},
 {'DayNum': 25,
  'LScore': 56,
  'LTeamID': 1223,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 63,
  'WTeamID': 1112},
 {'DayNum': 25,
  'LScore': 54,
  'LTeamID': 1432,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 70,
  'WTeamID': 1165},
 {'DayNum': 25,
  'LScore': 74,
  'LTeamID': 1447,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 86,
  'WTeamID': 1192},
 {'DayNum': 25,
  'LScore': 78,
  'LTeamID': 1337,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 79,
  'WTeamID': 1218},
 {'DayNum': 25,
  'LScore': 44,
  'LTeamID': 1226,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'N',
  'WScore': 64,
  'WTeamID': 1228},
 {'DayNum': 25,
  'LScore': 56,
  'LTeamID': 1268,
  'NumOT': 

In [10]:
data.head()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0
3,1985,25,1165,70,1432,54,H,0
4,1985,25,1192,86,1447,74,H,0


In [11]:
df = data.head(100).copy()

In [13]:
df.to_dict('records')

[{'DayNum': 20,
  'LScore': 64,
  'LTeamID': 1328,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'N',
  'WScore': 81,
  'WTeamID': 1228},
 {'DayNum': 25,
  'LScore': 70,
  'LTeamID': 1354,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 77,
  'WTeamID': 1106},
 {'DayNum': 25,
  'LScore': 56,
  'LTeamID': 1223,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 63,
  'WTeamID': 1112},
 {'DayNum': 25,
  'LScore': 54,
  'LTeamID': 1432,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 70,
  'WTeamID': 1165},
 {'DayNum': 25,
  'LScore': 74,
  'LTeamID': 1447,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 86,
  'WTeamID': 1192},
 {'DayNum': 25,
  'LScore': 78,
  'LTeamID': 1337,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 79,
  'WTeamID': 1218},
 {'DayNum': 25,
  'LScore': 44,
  'LTeamID': 1226,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'N',
  'WScore': 64,
  'WTeamID': 1228},
 {'DayNum': 25,
  'LScore': 56,
  'LTeamID': 1268,
  'NumOT': 

In [23]:
test_rows = df.to_dict('records')
test_rows[0:2]

[{'DayNum': 20,
  'LScore': 64,
  'LTeamID': 1328,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'N',
  'WScore': 81,
  'WTeamID': 1228},
 {'DayNum': 25,
  'LScore': 70,
  'LTeamID': 1354,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'H',
  'WScore': 77,
  'WTeamID': 1106}]

In [38]:
test_row

{'DayNum': 20,
 'LScore': 64,
 'LTeamID': 1328,
 'NumOT': 0,
 'Season': 1985,
 'WLoc': 'N',
 'WScore': 81,
 'WTeamID': 1228}

In [42]:
game_winner = {}
game_loser = {}

test_dict = []

for k,v in test_row.items():
    
    if k[0] == 'W':
        game_winner['Team' + k[1:]] = v
        game_loser['Opp' + k[1:]] = v
        
    elif k[0] == 'L':
        game_loser['Team' + k[1:]] = v
        game_winner['Opp' + k[1:]] = v
    
    elif k == 'WLoc':
        if v == 'A'
            game_winner['TeamLoc'] = 'A'
            game_loser['TeamLoc'] = 'H'
    
    else:
        game_loser[k] = v
        game_winner[k] = v
    
test_dict.append(game_winner)
test_dict.append(game_loser)
    
test_dict

[{'DayNum': 20,
  'NumOT': 0,
  'OppScore': 64,
  'OppTeamID': 1328,
  'Season': 1985,
  'TeamLoc': 'N',
  'TeamScore': 81,
  'TeamTeamID': 1228},
 {'DayNum': 20,
  'NumOT': 0,
  'OppLoc': 'N',
  'OppScore': 81,
  'OppTeamID': 1228,
  'Season': 1985,
  'TeamScore': 64,
  'TeamTeamID': 1328}]

In [44]:
pd.DataFrame(test_dict)

Unnamed: 0,DayNum,NumOT,OppLoc,OppScore,OppTeamID,Season,TeamLoc,TeamScore,TeamTeamID
0,20,0,,64,1328,1985,N,81,1228
1,20,0,N,81,1228,1985,,64,1328


In [40]:
test_row

{'DayNum': 20,
 'LScore': 64,
 'LTeamID': 1328,
 'NumOT': 0,
 'Season': 1985,
 'WLoc': 'N',
 'WScore': 81,
 'WTeamID': 1228}

In [41]:
games = []
game_dict = {}

for k,v in test_row.items():
    if k[0] == 'W':
        game_dict['Team' + k[1:]] = v
    elif k[0] == 'L':
        game_dict['Opp' + k[1:]] = v
    else:
        game_dict[k] = v
        
games.append(game_dict)
        
        
games

[{'DayNum': 20,
  'NumOT': 0,
  'OppScore': 64,
  'OppTeamID': 1328,
  'Season': 1985,
  'TeamLoc': 'N',
  'TeamScore': 81,
  'TeamTeamID': 1228}]

In [30]:
games = []
for perspective in ['W','L']:
    
    game_winner = {}
    game_loser = {}
    
    for k,v in test_row.items():

        if k[0] == perspective:
            game_winner[str(perspective + k[1:])] = v
        else:
            game_winner[k] = v
            
    games.append(game_winner)
    games.append(game_loser)

Season 1985
DayNum 20
WTeamID 1228
WScore 81
LTeamID 1328
LScore 64
WLoc N
NumOT 0
Season 1985
DayNum 20
WTeamID 1228
WScore 81
LTeamID 1328
LScore 64
WLoc N
NumOT 0


In [31]:
games

[{'DayNum': 20,
  'LScore': 64,
  'LTeamID': 1328,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'N',
  'WScore': 81,
  'WTeamID': 1228},
 {},
 {'DayNum': 20,
  'LScore': 64,
  'LTeamID': 1328,
  'NumOT': 0,
  'Season': 1985,
  'WLoc': 'N',
  'WScore': 81,
  'WTeamID': 1228},
 {}]

In [None]:
# Riley's Code:
games = []

for perspective in ['W', 'L']
    game = {}