In [2]:
from bs4 import BeautifulSoup
import requests
from IPython.core.display import display, HTML
import numpy as np
import re
import pickle
import pandas as pd
import re
import matplotlib.pyplot as plt

In [None]:
#create dictionary for results, which will be used for further calculations
r_keys = ['L', 'W', 'L\xa0(1 OT)', 'L\xa0(2 OT)', 'W\xa0(2 OT)','W\xa0(1 OT)', 'W\xa0(3 OT)', 'L\xa0(3 OT)']
r_wins = [0,1,0,0,1,1,1,0] #1 for win
r_OT = [0,0,1,2,2,1,3,3] #overtimes played
r_MP = [40,40,45,50,50,45,55,55] #minutes played

results_dict = dict(zip(r_keys,zip(r_wins,r_OT,r_MP)))

# Data Cleaning and Feature Engineering

## Data Cleaning

In [3]:
team_lookup = pickle.load(open('teams_lookup.pickle', "rb" ))

In [76]:
combo_log = pickle.load(open('combo_log.pickle', "rb"))

In [84]:


combo_col_w_blank = ['url','Team','Date','Court','Opponent','Result','Tm','Opp','FG','FGA','FG%','3P','3PA','3P%','FT','FTA',
           'FT%','ORB','TRB','AST','STL','BLK','TOV','PF','blank','FG_O','FGA_O','FG%_O','3P_O','3PA_O','3P%_O','FT_O',
           'FTA_O','FT%_O','ORB_O','TRB_O','AST_O','STL_O','BLK_O','TOV_O','PF_O','url2','Team2','Date2','Court2','Opponent2','Result2','Tm2','Opp2','ORtg','DRtg','Pace','FTr','3PAr','TS%','TRB%',
           'AST%','STL%','BLK%','blank1','OeFG%','OTOV%','ORB%','OFT/FGA','blank2','DeFG%','DTOV%','DRB%','DFT/FGA']
num_cols = ['Tm','Opp','FG','FGA','FG%','3P','3PA','3P%','FT','FTA',
           'FT%','ORB','TRB','AST','STL','BLK','TOV','PF','FG_O','FGA_O','FG%_O','3P_O','3PA_O','3P%_O','FT_O',
           'FTA_O','FT%_O','ORB_O','TRB_O','AST_O','STL_O','BLK_O','TOV_O','PF_O','ORtg','DRtg','Pace','FTr','3PAr','TS%','TRB%',
           'AST%','STL%','BLK%','OeFG%','OTOV%','ORB%','OFT/FGA','DeFG%','DTOV%','DRB%','DFT/FGA']

In [109]:
def clean_gamelog(gamelog = combo_log, col_names_wb = combo_col_w_blank,
                  col_names = num_cols):
    '''
    Takes in gamelog and cleans the data
    input: gamelog as dataframe and stat_type. stat_type can be basic or advanced
    output: dataframe with clean gamelog
    '''
    games = pd.DataFrame.from_dict(gamelog, orient = "index")
    games.columns = col_names_wb
    games['Date'] = pd.to_datetime(games['Date'])
    games = games.drop(columns=['blank','url2','Team2','Date2','Court2','Opponent2','Result2','Tm2','Opp2','blank1','blank2'])
    for i in col_names:
        games[i] = pd.to_numeric(games[i])
    return games

In [125]:
#adding columns to help with further data manipulation
def helper_columns(df,team_lookup=team_lookup):
    '''
    this helps to clean data. update this here
    games = gamelog information by team ______
    columns = names of columns in the dataframe
    team_lookup = to get the right name
    
    output: dataframe cleaned!
    with columns we want
    
    '''
    hot_court = pd.get_dummies(df.Court)
    df['Home'],df['Away'] = hot_court[""],hot_court["@"]
    df['Wins'] = [results_dict[x][0] for x in df.Result]
    df['OT'] = [results_dict[x][1] for x in df.Result]
    df['MP']= [results_dict[x][2] for x in df.Result]
    return df

In [126]:
combo_log = pickle.load(open('combo_log.pickle', "rb"))
combo_log = clean_gamelog(combo_log)
combo_log = helper_columns(combo_log)

## Additional columns
spreads (see below)\
D1\
Win\
OT\
MP

In [127]:
#create lookup for D1 feature gen
#D1 = indicate if opponent is in Division 1 or not (from the data collection, only D1 teams included for gamelog but they sometimes play non-D1 schools)

team_df = pd.DataFrame.from_dict(team_lookup, orient = 'index')
D1 = dict(zip(team_df.loc[:,0],np.repeat(1,len(team_df.loc[:,0]))))

#removing all non-D1 games
combo_log['D1'] = [D1.get(x) or 0 for x in combo_log['Opponent']]
combo_log = combo_log[combo_log.D1 == 1]

In [128]:
#calculate various spreads

def calc_spreads(df):
    df['P_S'] = df.eval('Tm - Opp')
    df['FG_S'] = df.eval('FG - FG_O')
    df['FGA_S'] = df.eval('FGA - FGA_O')
    df['FG%_S'] = df['FG%'] - df['FG%_O']
    df['3P_S'] = df['3P'] - df['3P_O']
    df['3PA_S'] = df['3PA'] - df['3PA_O']
    df['3P%_S'] = df['3P%'] - df['3P%_O']
    df['FT_S'] = df.eval('FT - FT_O')
    df['FTA_S'] = df.eval('FTA - FTA_O')
    df['FT%_S'] = df['FT%'] - df['FT%_O']
    df['TRB_S'] = df.eval('TRB- TRB_O')
    df['AST_S'] = df.eval('AST - AST_O')
    df['STL_S'] = df.eval('STL - STL_O')
    df['BLK_S'] = df.eval('BLK - BLK_O')
    df['TOV_S'] = df.eval('TOV - TOV_O')
    df['PF_S'] = df.eval('PF - PF_O')
    df['FTr_S'] = df.eval('FTr - FT_O/FGA_O') 
    df['3PAr_S'] = df['3PA']/df['FG'] - df['3PA_O']/df['FG_O']
    df['TS%_S'] = df['TS%'] - df.eval('Opp/(2*(FGA_O+0.44*FTA_O))')
    df['TRB%_S'] = 2*df['TRB%'] - 1
    df['AST%_S'] = df.eval('(AST - AST_O)/Pace')
    df['STL%_S'] = df.eval('(STL - STL_O)/Pace')
    df['BLK%_S'] = df.eval('(BLK - BLK_O)/Pace')
    return df

In [129]:
combo_log = calc_spreads(combo_log)

In [133]:
with open('clean_combo_19.pickle', 'wb') as handle:
    pickle.dump(combo_log, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [132]:
len(combo_log)

11206

In [130]:
combo_log.columns

Index(['url', 'Team', 'Date', 'Court', 'Opponent', 'Result', 'Tm', 'Opp', 'FG',
       'FGA', 'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'TRB',
       'AST', 'STL', 'BLK', 'TOV', 'PF', 'FG_O', 'FGA_O', 'FG%_O', '3P_O',
       '3PA_O', '3P%_O', 'FT_O', 'FTA_O', 'FT%_O', 'ORB_O', 'TRB_O', 'AST_O',
       'STL_O', 'BLK_O', 'TOV_O', 'PF_O', 'ORtg', 'DRtg', 'Pace', 'FTr',
       '3PAr', 'TS%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'OeFG%', 'OTOV%', 'ORB%',
       'OFT/FGA', 'DeFG%', 'DTOV%', 'DRB%', 'DFT/FGA', 'Home', 'Away', 'Wins',
       'OT', 'MP', 'D1', 'P_S', 'FG_S', 'FGA_S', 'FG%_S', '3P_S', '3PA_S',
       '3P%_S', 'FT_S', 'FTA_S', 'FT%_S', 'TRB_S', 'AST_S', 'STL_S', 'BLK_S',
       'TOV_S', 'PF_S', 'FTr_S', '3PAr_S', 'TS%_S', 'TRB%_S', 'AST%_S',
       'STL%_S', 'BLK%_S'],
      dtype='object')

## Preparing clean data file

In [136]:
def agg_df(games):
    '''
    This will compute the running stats for teams, with season-to-date info prior to each game
    '''
    df = pd.DataFrame(games['url'])
    df['Team'] = [team_lookup[x][0] for x in games['Team']]
    df['Date'] = games['Date']
    df['Opponent'] = games['Opponent']
    df['Spread'] = games['P_S']
    df['Win_Loss'] = games['Wins']
    df['Home'] = games['Home']
    df['Away'] = games['Away']
    
    # GP = Games Played prior to this game
    df['GP'] = games.groupby('Team')['Team'].transform(lambda x: x.expanding().count()-1)
    # get prior average stats for each team before the game
    for i in ['Wins','Tm', 'Opp', 'FG',
       'FGA', 'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'TRB',
       'AST', 'STL', 'BLK', 'TOV', 'PF', 'ORtg', 'DRtg', 'Pace', 'FTr',
       '3PAr', 'TS%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'OeFG%', 'OTOV%', 'ORB%',
       'OFT/FGA', 'DeFG%', 'DTOV%', 'DRB%', 'DFT/FGA','P_S', 'FG_S', 'FGA_S', 'FG%_S', '3P_S', '3PA_S',
       '3P%_S', 'FT_S', 'FTA_S', 'FT%_S', 'TRB_S', 'AST_S', 'STL_S', 'BLK_S','TOV_S', 'PF_S', 'FTr_S', '3PAr_S', 'TS%_S', 'TRB%_S', 'AST%_S',
       'STL%_S', 'BLK%_S']:
        df[i] = games.groupby('Team')[i].transform(lambda x: x.expanding().sum()-x)/df['GP']
    return df

In [137]:
df1 = agg_df(combo_log)

In [149]:
df1[0:40]

Unnamed: 0,url,Team,Date,Opponent,Spread,Win_Loss,Home,Away,GP,Wins,...,BLK_S,TOV_S,PF_S,FTr_S,3PAr_S,TS%_S,TRB%_S,AST%_S,STL%_S,BLK%_S
/cbb/boxscores/2018-11-09-23-california.htmlcalifornia,/cbb/boxscores/2018-11-09-23-california.html,California,2018-11-09,Yale,-17,0,0,0,0.0,,...,,,,,,,,,,
/cbb/boxscores/2018-11-13-22-california.htmlcalifornia,/cbb/boxscores/2018-11-13-22-california.html,California,2018-11-13,Hampton,14,1,1,0,1.0,0.0,...,-3.0,-3.0,-12.0,0.300077,-0.284444,-0.157175,86.6,-0.105263,-0.013158,-0.039474
/cbb/boxscores/2018-11-19-19-california.htmlcalifornia,/cbb/boxscores/2018-11-19-19-california.html,California,2018-11-19,St. John's (NY),-3,0,0,0,2.0,0.5,...,3.0,-1.0,-7.5,0.275736,-0.350264,0.025401,90.6,0.021592,0.000169,0.040992
/cbb/boxscores/2018-11-20-17-temple.htmlcalifornia,/cbb/boxscores/2018-11-20-17-temple.html,California,2018-11-20,Temple,-17,0,0,0,3.0,0.333333,...,2.0,-0.333333,-4.333333,0.187861,-0.267992,0.018871,91.333333,-0.01037,0.000112,0.027328
/cbb/boxscores/2018-11-26-21-california.htmlcalifornia,/cbb/boxscores/2018-11-26-21-california.html,California,2018-11-26,Santa Clara,12,1,1,0,4.0,0.25,...,0.5,0.5,-4.0,0.214716,-0.191778,-0.00782,91.35,-0.059476,-0.022072,0.005725
/cbb/boxscores/2018-12-01-22-saint-marys-ca.htmlcalifornia,/cbb/boxscores/2018-12-01-22-saint-marys-ca.html,California,2018-12-01,Saint Mary's,-13,0,0,1,5.0,0.4,...,0.0,-2.2,-2.4,0.18234,-0.252482,0.001186,89.36,-0.047581,0.013241,-0.001038
/cbb/boxscores/2018-12-05-23-california.htmlcalifornia,/cbb/boxscores/2018-12-05-23-california.html,California,2018-12-05,San Francisco,-19,0,1,0,6.0,0.333333,...,-0.333333,-2.5,-2.166667,0.149783,-0.253136,-0.031806,88.966667,-0.073825,0.008405,-0.006123
/cbb/boxscores/2018-12-08-22-california.htmlcalifornia,/cbb/boxscores/2018-12-08-22-california.html,California,2018-12-08,San Diego State,6,1,1,0,7.0,0.285714,...,0.142857,-1.857143,-2.142857,0.163037,-0.177047,-0.039429,87.971429,-0.072138,0.002775,0.001397
/cbb/boxscores/2018-12-15-22-california.htmlcalifornia,/cbb/boxscores/2018-12-15-22-california.html,California,2018-12-15,Cal Poly,1,1,1,0,8.0,0.375,...,0.25,-2.0,-2.0,0.157497,-0.124743,-0.035289,89.6,-0.063121,0.000438,0.003212
/cbb/boxscores/2018-12-19-22-fresno-state.htmlcalifornia,/cbb/boxscores/2018-12-19-22-fresno-state.html,California,2018-12-19,Fresno State,-22,0,0,1,9.0,0.444444,...,0.333333,-2.333333,-1.666667,0.169912,-0.1573,-0.031272,90.266667,-0.061291,0.009029,0.004583


## Append (vs) Opponent Stats 

In [167]:
def vs_df(df):
    '''
    This will compute the running stats for teams, with season-to-date info prior to each game
    '''
    # GP = Games Played prior to this game
    winners = df[df.Win_Loss == 1]
    losers = df[df.Win_Loss == 0]
    
    winner_merge = pd.merge(winners, losers, left_on='url',right_on='url',how='outer',suffixes=('','_vs'))
    loser_merge = pd.merge(losers, winners, left_on='url',right_on='url',how='outer',suffixes=('','_vs'))
    final = pd.concat([winner_merge,loser_merge])
    return final

In [168]:
df_final = vs_df(df1)

In [174]:
df_final

Unnamed: 0,url,Team,Date,Opponent,Spread,Win_Loss,Home,Away,GP,Wins,...,BLK_S_vs,TOV_S_vs,PF_S_vs,FTr_S_vs,3PAr_S_vs,TS%_S_vs,TRB%_S_vs,AST%_S_vs,STL%_S_vs,BLK%_S_vs
0,/cbb/boxscores/2018-11-13-22-california.html,California,2018-11-13,Hampton,14,1,1,0,1.0,0.000000,...,-1.000000,1.000000,1.000000,0.053667,0.074879,-0.122053,104.200000,-0.087940,-0.075377,-0.012563
1,/cbb/boxscores/2018-11-26-21-california.html,California,2018-11-26,Santa Clara,12,1,1,0,4.0,0.250000,...,1.000000,1.800000,5.000000,-0.027312,0.323610,-0.041497,91.680000,-0.019679,-0.006209,0.015796
2,/cbb/boxscores/2018-12-08-22-california.html,California,2018-12-08,San Diego State,6,1,1,0,7.0,0.285714,...,0.750000,0.500000,-2.625000,0.200167,-0.210165,0.055071,101.275000,0.027414,-0.015284,0.010068
3,/cbb/boxscores/2018-12-15-22-california.html,California,2018-12-15,Cal Poly,1,1,1,0,8.0,0.375000,...,0.000000,1.714286,-0.428571,0.035119,0.094773,-0.045731,91.314286,-0.069181,-0.008425,0.000097
4,/cbb/boxscores/2018-12-21-22-california.html,California,2018-12-21,San Jose State,8,1,1,0,10.0,0.400000,...,0.111111,3.111111,-2.666667,0.283569,-0.113571,-0.025478,102.977778,-0.019670,-0.062934,0.000974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5598,/cbb/boxscores/2019-01-26-19-cleveland-state.html,Youngstown State,2019-01-26,Cleveland State,-10,0,0,1,19.0,0.210526,...,-0.263158,2.157895,-0.684211,0.079369,0.153413,-0.029673,92.736842,-0.032423,-0.034945,-0.002696
5599,/cbb/boxscores/2019-02-21-19-northern-kentucky...,Youngstown State,2019-02-21,Northern Kentucky,-7,0,0,1,26.0,0.384615,...,0.240000,-0.520000,1.200000,0.102166,0.031735,0.043565,105.000000,0.077575,0.003338,0.003380
5600,/cbb/boxscores/2019-02-23-15-wright-state.html,Youngstown State,2019-02-23,Wright State,-28,0,0,1,27.0,0.370370,...,-1.115385,-1.846154,-2.961538,0.167148,0.050100,-0.014979,101.761538,0.037687,0.008731,-0.016557
5601,/cbb/boxscores/2019-03-02-18-youngstown-state....,Youngstown State,2019-03-02,Cleveland State,-9,0,1,0,28.0,0.357143,...,-0.214286,1.250000,-0.535714,0.059117,0.146171,-0.019909,91.521429,-0.019571,-0.019866,-0.002160


In [180]:
pd.set_option('display.max_columns', None)
df_final[df_final['url'] == '/cbb/boxscores/2019-03-07-23-stanford.html']

Unnamed: 0,url,Team,Date,Opponent,Spread,Win_Loss,Home,Away,GP,Wins,Tm,Opp,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,TRB,AST,STL,BLK,TOV,PF,ORtg,DRtg,Pace,FTr,3PAr,TS%,TRB%,AST%,STL%,BLK%,OeFG%,OTOV%,ORB%,OFT/FGA,DeFG%,DTOV%,DRB%,DFT/FGA,P_S,FG_S,FGA_S,FG%_S,3P_S,3PA_S,3P%_S,FT_S,FTA_S,FT%_S,TRB_S,AST_S,STL_S,BLK_S,TOV_S,PF_S,FTr_S,3PAr_S,TS%_S,TRB%_S,AST%_S,STL%_S,BLK%_S,Team_vs,Date_vs,Opponent_vs,Spread_vs,Win_Loss_vs,Home_vs,Away_vs,GP_vs,Wins_vs,Tm_vs,Opp_vs,FG_vs,FGA_vs,FG%_vs,3P_vs,3PA_vs,3P%_vs,FT_vs,FTA_vs,FT%_vs,ORB_vs,TRB_vs,AST_vs,STL_vs,BLK_vs,TOV_vs,PF_vs,ORtg_vs,DRtg_vs,Pace_vs,FTr_vs,3PAr_vs,TS%_vs,TRB%_vs,AST%_vs,STL%_vs,BLK%_vs,OeFG%_vs,OTOV%_vs,ORB%_vs,OFT/FGA_vs,DeFG%_vs,DTOV%_vs,DRB%_vs,DFT/FGA_vs,P_S_vs,FG_S_vs,FGA_S_vs,FG%_S_vs,3P_S_vs,3PA_S_vs,3P%_S_vs,FT_S_vs,FTA_S_vs,FT%_S_vs,TRB_S_vs,AST_S_vs,STL_S_vs,BLK_S_vs,TOV_S_vs,PF_S_vs,FTr_S_vs,3PAr_S_vs,TS%_S_vs,TRB%_S_vs,AST%_S_vs,STL%_S_vs,BLK%_S_vs
7,/cbb/boxscores/2019-03-07-23-stanford.html,California,2019-03-07,Stanford,5,1,0,1,29.0,0.241379,69.172414,78.344828,24.137931,56.206897,0.430517,6.586207,19.034483,0.343586,14.310345,19.586207,0.726517,6.862069,26.586207,11.034483,7.172414,2.965517,10.965517,18.655172,99.006897,112.075862,69.862069,0.352862,0.338793,0.529759,44.989655,45.396552,10.048276,9.041379,0.489586,14.262069,21.148276,0.257828,0.580552,17.065517,72.824138,0.27,-9.172414,-3.206897,1.103448,-0.068138,-2.37931,-3.931034,-0.039897,-0.37931,-0.793103,0.01769,-5.931034,-4.206897,2.241379,-0.206897,-2.448276,0.862069,0.082791,-0.04109,-0.085037,88.97931,-0.060665,0.030825,-0.002913,Stanford,2019-03-07,California,-5,0,1,0,29.0,0.517241,73.241379,71.862069,26.137931,56.655172,0.461759,6.758621,20.689655,0.317138,14.206897,21.0,0.679379,8.206897,33.241379,12.793103,5.965517,4.551724,14.793103,18.793103,99.482759,98.275862,73.096552,0.376621,0.365862,0.55,51.865517,48.848276,8.086207,10.148276,0.521069,18.327586,26.231034,0.255931,0.481897,14.631034,74.451724,0.223207,1.37931,-0.275862,-4.275862,0.026517,1.137931,4.103448,-0.019552,0.793103,1.517241,-0.00769,2.103448,2.0,-0.448276,1.413793,2.724138,-0.724138,0.153427,0.188796,0.031462,102.731034,0.02563,-0.007512,0.01876
37,/cbb/boxscores/2019-03-07-23-stanford.html,Stanford,2019-03-07,California,-5,0,1,0,29.0,0.517241,73.241379,71.862069,26.137931,56.655172,0.461759,6.758621,20.689655,0.317138,14.206897,21.0,0.679379,8.206897,33.241379,12.793103,5.965517,4.551724,14.793103,18.793103,99.482759,98.275862,73.096552,0.376621,0.365862,0.55,51.865517,48.848276,8.086207,10.148276,0.521069,18.327586,26.231034,0.255931,0.481897,14.631034,74.451724,0.223207,1.37931,-0.275862,-4.275862,0.026517,1.137931,4.103448,-0.019552,0.793103,1.517241,-0.00769,2.103448,2.0,-0.448276,1.413793,2.724138,-0.724138,0.153427,0.188796,0.031462,102.731034,0.02563,-0.007512,0.01876,California,2019-03-07,Stanford,5,1,0,1,29.0,0.241379,69.172414,78.344828,24.137931,56.206897,0.430517,6.586207,19.034483,0.343586,14.310345,19.586207,0.726517,6.862069,26.586207,11.034483,7.172414,2.965517,10.965517,18.655172,99.006897,112.075862,69.862069,0.352862,0.338793,0.529759,44.989655,45.396552,10.048276,9.041379,0.489586,14.262069,21.148276,0.257828,0.580552,17.065517,72.824138,0.27,-9.172414,-3.206897,1.103448,-0.068138,-2.37931,-3.931034,-0.039897,-0.37931,-0.793103,0.01769,-5.931034,-4.206897,2.241379,-0.206897,-2.448276,0.862069,0.082791,-0.04109,-0.085037,88.97931,-0.060665,0.030825,-0.002913


In [181]:
with open('final_19.pickle', 'wb') as handle:
    pickle.dump(df_final, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [188]:
df_final[0:10]

Unnamed: 0,url,Team,Date,Opponent,Spread,Win_Loss,Home,Away,GP,Wins,Tm,Opp,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,TRB,AST,STL,BLK,TOV,PF,ORtg,DRtg,Pace,FTr,3PAr,TS%,TRB%,AST%,STL%,BLK%,OeFG%,OTOV%,ORB%,OFT/FGA,DeFG%,DTOV%,DRB%,DFT/FGA,P_S,FG_S,FGA_S,FG%_S,3P_S,3PA_S,3P%_S,FT_S,FTA_S,FT%_S,TRB_S,AST_S,STL_S,BLK_S,TOV_S,PF_S,FTr_S,3PAr_S,TS%_S,TRB%_S,AST%_S,STL%_S,BLK%_S,Team_vs,Date_vs,Opponent_vs,Spread_vs,Win_Loss_vs,Home_vs,Away_vs,GP_vs,Wins_vs,Tm_vs,Opp_vs,FG_vs,FGA_vs,FG%_vs,3P_vs,3PA_vs,3P%_vs,FT_vs,FTA_vs,FT%_vs,ORB_vs,TRB_vs,AST_vs,STL_vs,BLK_vs,TOV_vs,PF_vs,ORtg_vs,DRtg_vs,Pace_vs,FTr_vs,3PAr_vs,TS%_vs,TRB%_vs,AST%_vs,STL%_vs,BLK%_vs,OeFG%_vs,OTOV%_vs,ORB%_vs,OFT/FGA_vs,DeFG%_vs,DTOV%_vs,DRB%_vs,DFT/FGA_vs,P_S_vs,FG_S_vs,FGA_S_vs,FG%_S_vs,3P_S_vs,3PA_S_vs,3P%_S_vs,FT_S_vs,FTA_S_vs,FT%_S_vs,TRB_S_vs,AST_S_vs,STL_S_vs,BLK_S_vs,TOV_S_vs,PF_S_vs,FTr_S_vs,3PAr_S_vs,TS%_S_vs,TRB%_S_vs,AST%_S_vs,STL%_S_vs,BLK%_S_vs
0,/cbb/boxscores/2018-11-13-22-california.html,California,2018-11-13,Hampton,14,1,1,0,1.0,0.0,59.0,76.0,18.0,51.0,0.353,2.0,10.0,0.2,21.0,32.0,0.656,7.0,28.0,6.0,6.0,1.0,16.0,17.0,77.6,100.0,76.0,0.627,0.196,0.446,43.8,33.3,7.9,3.2,0.373,19.5,18.9,0.412,0.567,22.9,77.8,0.327,-17.0,-7.0,-1.0,-0.128,-7.0,-11.0,-0.229,4.0,7.0,-0.024,-8.0,-8.0,-1.0,-3.0,-3.0,-12.0,0.300077,-0.284444,-0.157175,86.6,-0.105263,-0.013158,-0.039474,Hampton,2018-11-13,California,-14,0,0,1,1.0,0.0,57.0,69.0,18.0,62.0,0.29,3.0,17.0,0.176,18.0,24.0,0.75,12.0,41.0,7.0,2.0,3.0,17.0,26.0,71.3,86.3,79.6,0.387,0.274,0.388,52.6,38.9,2.5,8.8,0.315,18.8,26.7,0.29,0.472,18.9,87.9,0.333,-12.0,-5.0,8.0,-0.136,-2.0,-3.0,-0.074,0.0,-7.0,0.169,4.0,-7.0,-6.0,-1.0,1.0,1.0,0.053667,0.074879,-0.122053,104.2,-0.08794,-0.075377,-0.012563
1,/cbb/boxscores/2018-11-26-21-california.html,California,2018-11-26,Santa Clara,12,1,1,0,4.0,0.25,69.25,75.0,23.5,50.25,0.46725,6.5,14.25,0.437,15.75,23.25,0.6775,4.25,27.0,10.5,5.0,3.5,13.0,17.0,97.6,105.85,71.275,0.46075,0.28425,0.567,46.175,44.225,7.025,8.15,0.53225,17.5,14.275,0.31075,0.536,15.775,76.55,0.24625,-5.75,-3.25,-8.0,-0.00325,-0.75,-6.75,0.0915,1.5,4.25,-0.044,-4.5,-4.0,-1.5,0.5,0.5,-4.0,0.214716,-0.191778,-0.00782,91.35,-0.059476,-0.022072,0.005725,Santa Clara,2018-11-26,California,-12,0,0,1,5.0,0.2,63.6,73.4,23.4,57.8,0.4022,8.2,24.8,0.3282,8.6,15.4,0.5648,7.6,29.8,12.2,5.2,4.2,13.4,22.8,88.48,101.68,71.8,0.2702,0.4282,0.4884,46.34,52.76,7.18,10.22,0.4732,16.82,22.04,0.1516,0.485,14.3,70.98,0.2976,-9.8,-1.4,-0.8,-0.0248,1.4,6.0,-0.026,-8.4,-9.6,-0.119,-4.6,-1.4,-0.4,1.0,1.8,5.0,-0.027312,0.32361,-0.041497,91.68,-0.019679,-0.006209,0.015796
2,/cbb/boxscores/2018-12-08-22-california.html,California,2018-12-08,San Diego State,6,1,1,0,7.0,0.285714,69.428571,75.571429,24.0,51.714286,0.463143,6.142857,15.285714,0.395571,15.285714,21.142857,0.742429,5.0,25.428571,10.0,5.714286,3.285714,11.571429,17.428571,100.757143,110.171429,69.171429,0.412286,0.298143,0.562714,44.485714,42.528571,8.185714,9.114286,0.523143,15.771429,16.657143,0.295857,0.565714,17.271429,72.728571,0.249286,-6.142857,-2.857143,-4.0,-0.027143,-2.0,-7.0,0.035286,1.571429,2.714286,0.011286,-6.285714,-4.714286,0.285714,0.142857,-1.857143,-2.142857,0.163037,-0.177047,-0.039429,87.971429,-0.072138,0.002775,0.001397,San Diego State,2018-12-08,California,-6,0,0,1,8.0,0.625,75.25,69.625,24.375,54.625,0.442625,8.0,21.625,0.36225,18.5,25.375,0.734875,9.25,34.0,14.125,6.0,3.25,13.25,19.5,105.075,99.5,71.2,0.466875,0.398875,0.56175,51.1375,57.2625,8.125,9.4625,0.515875,16.5875,30.3,0.340875,0.4605,15.1625,69.7625,0.26675,5.625,1.125,-4.5,0.04725,0.375,-3.875,0.07025,3.0,3.375,0.019875,1.875,2.25,-0.875,0.75,0.5,-2.625,0.200167,-0.210165,0.055071,101.275,0.027414,-0.015284,0.010068
3,/cbb/boxscores/2018-12-15-22-california.html,California,2018-12-15,Cal Poly,1,1,1,0,8.0,0.375,71.875,76.5,24.625,52.125,0.471125,7.0,16.875,0.404125,15.625,21.0,0.762125,5.625,25.375,11.0,5.125,3.0,11.125,17.25,105.825,112.8625,68.375,0.40625,0.3245,0.578625,45.3,44.975,7.3625,8.3625,0.538375,15.175,19.3875,0.29975,0.5775,17.05,71.7875,0.24875,-4.625,-2.5,-3.25,-0.02625,-1.625,-5.25,0.0175,2.0,3.0,0.014,-5.375,-4.125,0.125,0.25,-2.0,-2.0,0.157497,-0.124743,-0.035289,89.6,-0.063121,0.000438,0.003212,Cal Poly,2018-12-15,California,-1,0,0,1,7.0,0.285714,66.0,74.285714,24.571429,59.571429,0.410429,7.857143,25.285714,0.306429,9.0,13.857143,0.666143,8.142857,30.714286,12.142857,5.285714,3.0,12.714286,16.428571,92.814286,104.914286,68.2,0.237857,0.422714,0.496143,46.157143,49.6,7.642857,8.014286,0.474857,16.371429,23.728571,0.154,0.512714,13.8,70.042857,0.202714,-8.285714,-2.285714,-1.857143,-0.026429,-1.142857,0.714286,-0.053571,-2.571429,-2.857143,-0.014143,-5.0,-4.714286,-0.714286,0.0,1.714286,-0.428571,0.035119,0.094773,-0.045731,91.314286,-0.069181,-0.008425,9.7e-05
4,/cbb/boxscores/2018-12-21-22-california.html,California,2018-12-21,San Jose State,8,1,1,0,10.0,0.4,71.5,77.3,24.6,52.2,0.4698,6.4,16.7,0.3727,15.9,21.0,0.7708,5.7,25.4,10.5,5.5,3.2,11.2,18.5,104.74,113.1,68.71,0.4066,0.3213,0.575,45.18,42.88,7.9,9.28,0.5315,15.23,19.71,0.3061,0.5718,17.14,71.1,0.273,-5.8,-2.2,-3.0,-0.0213,-2.3,-5.8,-0.0117,0.9,1.3,0.016,-5.5,-4.3,0.5,0.5,-2.1,-0.4,0.133643,-0.160412,-0.036878,89.36,-0.064397,0.006807,0.006764,San Jose State,2018-12-21,California,-8,0,0,1,9.0,0.222222,69.0,74.888889,22.555556,55.444444,0.412333,6.444444,19.777778,0.332444,17.444444,25.111111,0.690889,10.0,33.888889,12.777778,3.111111,2.777778,15.888889,19.333333,93.766667,101.766667,73.733333,0.467667,0.361778,0.519556,51.988889,57.722222,4.2,8.6,0.472667,19.311111,31.644444,0.329556,0.528667,15.444444,73.777778,0.184111,-5.888889,-4.888889,-5.444444,-0.041889,-2.555556,-7.0,-0.011444,6.444444,6.444444,0.118444,3.0,-1.444444,-4.555556,0.111111,3.111111,-2.666667,0.283569,-0.113571,-0.025478,102.977778,-0.01967,-0.062934,0.000974
5,/cbb/boxscores/2019-02-28-23-california.html,California,2019-02-28,Washington,3,1,1,0,27.0,0.185185,68.666667,78.888889,23.777778,56.148148,0.424519,6.62963,19.37037,0.338704,14.481481,19.62963,0.732852,6.851852,26.37037,10.62963,6.925926,2.925926,11.0,19.148148,98.355556,112.9,69.8,0.354741,0.344963,0.525963,44.585185,44.588889,9.711111,8.803704,0.484,14.3,20.977778,0.261556,0.582222,16.981481,72.32963,0.280926,-10.222222,-3.666667,1.222222,-0.077444,-2.111111,-3.148148,-0.042593,-0.777778,-1.333333,0.012815,-6.333333,-4.518519,1.925926,-0.111111,-2.333333,1.185185,0.073756,0.001792,-0.092536,88.17037,-0.065591,0.026555,-0.001503,Washington,2019-02-28,California,-3,0,0,1,27.0,0.814815,71.074074,63.592593,24.962963,54.37037,0.46363,7.37037,21.259259,0.346889,13.777778,19.481481,0.71337,8.407407,29.259259,11.814815,9.0,5.962963,13.222222,18.555556,103.151852,92.203704,68.996296,0.365852,0.393556,0.562444,48.466667,47.807407,13.103704,16.840741,0.532185,17.314815,27.440741,0.258037,0.465037,20.085185,67.925926,0.236111,7.481481,2.703704,-0.407407,0.054815,1.222222,2.259259,0.027963,0.851852,0.962963,0.016333,-1.666667,-1.185185,3.111111,3.814815,-2.592593,-0.259259,0.129763,-0.01194,0.055377,95.933333,-0.016611,0.045822,0.055072
6,/cbb/boxscores/2019-03-02-19-california.html,California,2019-03-02,Washington State,7,1,1,0,28.0,0.214286,68.928571,78.678571,24.0,56.071429,0.429214,6.678571,19.214286,0.345643,14.25,19.357143,0.7305,6.857143,26.428571,11.035714,6.964286,2.928571,10.964286,18.928571,99.017857,112.878571,69.625,0.35,0.342571,0.529929,44.992857,45.614286,9.803571,8.871429,0.489214,14.3,21.228571,0.2575,0.582536,16.875,72.746429,0.276071,-9.75,-3.428571,1.142857,-0.072357,-2.142857,-3.464286,-0.036571,-0.75,-1.321429,0.014214,-5.892857,-4.178571,1.964286,-0.178571,-2.25,1.142857,0.073855,-0.016129,-0.087949,88.985714,-0.060497,0.027257,-0.00255,Washington State,2019-03-02,California,-7,0,0,1,28.0,0.392857,76.25,78.571429,26.571429,58.535714,0.457357,9.392857,25.678571,0.363536,13.714286,18.107143,0.73925,7.571429,30.892857,14.964286,5.428571,2.678571,12.928571,17.392857,104.921429,108.782143,72.585714,0.31575,0.43825,0.570536,47.614286,55.121429,7.389286,7.389286,0.538107,16.307143,23.317857,0.240071,0.552786,14.582143,72.275,0.18875,-2.321429,-2.285714,-2.928571,-0.01725,-0.107143,-0.607143,-0.004786,2.357143,0.821429,0.079786,-2.785714,-0.5,-1.285714,0.214286,0.928571,0.178571,0.126956,0.020067,-0.003133,94.228571,-0.009281,-0.019181,0.002508
7,/cbb/boxscores/2019-03-07-23-stanford.html,California,2019-03-07,Stanford,5,1,0,1,29.0,0.241379,69.172414,78.344828,24.137931,56.206897,0.430517,6.586207,19.034483,0.343586,14.310345,19.586207,0.726517,6.862069,26.586207,11.034483,7.172414,2.965517,10.965517,18.655172,99.006897,112.075862,69.862069,0.352862,0.338793,0.529759,44.989655,45.396552,10.048276,9.041379,0.489586,14.262069,21.148276,0.257828,0.580552,17.065517,72.824138,0.27,-9.172414,-3.206897,1.103448,-0.068138,-2.37931,-3.931034,-0.039897,-0.37931,-0.793103,0.01769,-5.931034,-4.206897,2.241379,-0.206897,-2.448276,0.862069,0.082791,-0.04109,-0.085037,88.97931,-0.060665,0.030825,-0.002913,Stanford,2019-03-07,California,-5,0,1,0,29.0,0.517241,73.241379,71.862069,26.137931,56.655172,0.461759,6.758621,20.689655,0.317138,14.206897,21.0,0.679379,8.206897,33.241379,12.793103,5.965517,4.551724,14.793103,18.793103,99.482759,98.275862,73.096552,0.376621,0.365862,0.55,51.865517,48.848276,8.086207,10.148276,0.521069,18.327586,26.231034,0.255931,0.481897,14.631034,74.451724,0.223207,1.37931,-0.275862,-4.275862,0.026517,1.137931,4.103448,-0.019552,0.793103,1.517241,-0.00769,2.103448,2.0,-0.448276,1.413793,2.724138,-0.724138,0.153427,0.188796,0.031462,102.731034,0.02563,-0.007512,0.01876
8,/cbb/boxscores/2018-11-06-22-stanford.html,Stanford,2018-11-06,Seattle,22,1,1,0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Seattle,2018-11-06,Stanford,-22,0,0,1,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,/cbb/boxscores/2018-11-09-19-north-carolina-wi...,Stanford,2018-11-09,UNC Wilmington,13,1,0,1,1.0,1.0,96.0,74.0,29.0,56.0,0.518,11.0,15.0,0.733,27.0,33.0,0.818,7.0,35.0,12.0,4.0,6.0,13.0,23.0,128.0,98.7,75.3,0.589,0.268,0.67,57.4,41.4,5.3,13.0,0.616,15.4,31.8,0.482,0.484,11.9,71.8,0.188,22.0,1.0,-8.0,0.08,5.0,-3.0,0.4,15.0,12.0,0.247,9.0,5.0,2.0,5.0,3.0,-2.0,0.4015,-0.125616,0.164812,113.8,0.066401,0.02656,0.066401,UNC Wilmington,2018-11-09,Stanford,-13,0,1,0,1.0,0.0,93.0,97.0,29.0,63.0,0.46,15.0,29.0,0.517,20.0,27.0,0.741,10.0,42.0,18.0,3.0,2.0,18.0,20.0,112.0,116.9,73.4,0.429,0.46,0.613,59.2,62.1,3.6,5.1,0.579,19.2,33.3,0.317,0.549,7.7,78.0,0.25,-4.0,-4.0,-9.0,0.002,2.0,-4.0,0.123,2.0,3.0,-0.009,13.0,-3.0,-8.0,-1.0,11.0,3.0,0.179,0.0,0.025548,117.4,-0.040872,-0.108992,-0.013624


In [None]:
df_final.