In [1]:
import pandas as pd
import numpy as np

In [2]:
msuurl = 'https://www.sports-reference.com/cbb/schools/michigan-state/2018-gamelogs.html#sgl-basic::none'

In [3]:
'''Read team gamelog'''
df = pd.read_html(msuurl)[0]

In [4]:
'''remove oppenent columns'''
df = df.iloc[:, 0:23]

'''Remove divider rows'''
df = df.drop(df.index[[20,21]])

In [5]:
# df.columns

In [6]:
'''Remove Double column headers'''
dubcols = df.columns.tolist()
cols = [col[1] for col in dubcols]
df.columns = cols

In [7]:
df.columns

Index(['G', 'Date', ' ', 'Opp', 'W/L', 'Tm', 'Opp', 'FG', 'FGA', 'FG%', '3P',
       '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF'],
      dtype='object')

In [8]:
'''Rename Columns'''
newcols = ['G', 'Date', 'Blank', 'Opp', 'W', 'Pts', 'PtsA', 'FG', 'FGA', 
           'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'RB', 
           'AST', 'STL', 'BLK', 'TO', 'PF']
df.columns = newcols

In [9]:
def teams_dict(filepath):
    '''
    Create dictionary of school names and formatted school names for mapping
    '''
    team_names = pd.read_csv(filepath)
    team_names = team_names[['School', 'School_format']]
    team_dict = {}
    schools = team_names['School'].tolist()
    schools_format = team_names['School_format'].tolist()
    for school, schform in zip(schools, schools_format):
        team_dict[school] = schform
    return team_dict

In [10]:
'''reformat Opponent team name column strings'''
team_names_sos_filepath = '/Users/sec/galvanize/capstone1/team_list/sos_team_list_2018_final.csv'
df['Opp'] = df['Opp'].map(teams_dict(team_names_sos_filepath))

In [11]:
'''Only take the first charcter in W field then map to 0's and 1's.
        (Ties and overtime have excess characters)'''
df['W'] = df['W'].astype(str).str[0]
df['W'] = df['W'].map({'W': 1, 'L': 0})

In [12]:
'''Create win precentage and rolling average Features'''
df['Ws'] = df['W'].cumsum(axis=0)
df['Wp'] = df['Ws'].astype(int) / df['G'].astype(int)
df['ppg'] = df['Pts'].rolling(window=5,center=False).mean()
df['pApg'] = df['PtsA'].rolling(window=5,center=False).mean()
df['FGp'] = df['FG%'].rolling(window=5,center=False).mean()
df['3Pg'] = df['3P%'].rolling(window=5,center=False).mean()
df['FTp'] = df['FT%'].rolling(window=5,center=False).mean()
df['ORBpg'] = df['ORB'].rolling(window=5,center=False).mean()
df['RBpg'] = df['RB'].rolling(window=5,center=False).mean()
df['ASTpg'] = df['AST'].rolling(window=5,center=False).mean()
df['STLpg'] = df['STL'].rolling(window=5,center=False).mean()
df['BLKpg'] = df['BLK'].rolling(window=5,center=False).mean()
df['TOpg'] = df['TO'].rolling(window=5,center=False).mean()
df['PFpg'] = df['PF'].rolling(window=5,center=False).mean()

In [13]:
'''Remove columns after rolling ave calcs'''
df = df.drop(['G', 'Blank', 'Pts', 'PtsA', 'FG', 'FGA', 'FG%',
              '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'RB', 
              'AST', 'STL', 'BLK', 'TO', 'PF'], axis=1)

In [14]:
df = df.dropna()

In [17]:
'''Add Team Column'''
team = 'michigan-state'
df['Tm'] = team

In [19]:
def sos_dict(filepath):
    '''
    Create dictionary of school names and strength of schedule for mapping
    '''
    team_sos = pd.read_csv(filepath)
    team_sos = team_sos[['School_format', 'SOS']]
    sos_dict = {}
    schools = team_sos['School_format'].tolist()
    sos = team_sos['SOS'].tolist()
    for school, sos in zip(schools, sos):
        sos_dict[school] = sos
    return sos_dict

In [20]:
'''Add SOS columns'''
df['sos'] = df['Tm'].map(sos_dict(team_names_sos_filepath))

In [21]:
df

Unnamed: 0,Date,Opp,W,Ws,Wp,ppg,pApg,FGp,3Pg,FTp,ORBpg,RBpg,ASTpg,STLpg,BLKpg,TOpg,PFpg,Tm,sos
4,2017-11-25,connecticut,1,4,0.8,84.4,66.6,0.5236,0.3644,0.6834,10.2,38.0,19.2,5.2,6.8,14.8,20.8,michigan-state,7.58
5,2017-11-26,north-carolina,1,5,0.833333,77.4,62.4,0.4888,0.3914,0.6656,9.6,37.8,18.0,4.2,6.2,15.0,21.2,michigan-state,7.58
7,2017-12-03,nebraska,1,7,0.875,76.0,54.6,0.4696,0.3912,0.6842,10.6,40.6,16.4,4.0,5.8,12.6,19.0,michigan-state,7.58
8,2017-12-05,rutgers,1,8,0.888889,73.8,54.8,0.4524,0.3804,0.6562,11.4,41.2,16.0,3.8,8.2,12.6,17.2,michigan-state,7.58
10,2017-12-16,oakland,1,10,0.909091,80.6,61.6,0.4738,0.3876,0.6966,12.8,43.0,18.8,4.4,9.6,12.2,15.0,michigan-state,7.58
15,2018-01-04,maryland,1,15,0.9375,103.8,59.2,0.6206,0.492,0.732,11.6,42.8,30.4,4.8,9.2,12.0,17.2,michigan-state,7.58
17,2018-01-10,rutgers,1,16,0.888889,90.0,65.2,0.5306,0.4532,0.7274,10.4,41.2,25.2,3.4,5.8,11.8,17.0,michigan-state,7.58
18,2018-01-13,michigan,0,16,0.842105,82.2,69.4,0.4918,0.384,0.7406,9.0,38.2,22.0,3.8,6.6,13.0,18.6,michigan-state,7.58
19,2018-01-19,indiana,1,17,0.85,77.6,70.4,0.4744,0.4112,0.761,8.4,35.6,19.6,3.0,6.6,12.2,18.6,michigan-state,7.58
22,2018-01-22,illinois,1,18,0.857143,76.8,73.0,0.4966,0.3858,0.7786,8.8,36.4,16.6,3.2,6.6,15.8,18.8,michigan-state,7.58
