In [241]:
import os
import pandas as pd
import numpy as np
import warnings; warnings.simplefilter('ignore')

In [242]:
## Load data

def load_data(directory, year):
    file_path = directory + "gl1990-2018/GL" + str(year) + ".TXT"
    currentdf = pd.read_csv(file_path,sep=',',header=None)
    return currentdf

np.arange(1978,2019)
year_range = np.arange(1978, 2019)
directory = './'
for num in year_range:  
    curdf = load_data(directory, num) 
    if num == 1978:
        alldata = curdf
    else:
        alldata = pd.concat([alldata, curdf])

(92291, 161)

In [243]:
def HistoricalWinLoss(team): ##BAL, BOS, NYA, TOR
    data = alldata.loc[(alldata[6] == team) | (alldata[3] == team)] ## home and away columns
    
    data[0]=pd.to_datetime(data[0],format='%Y%m%d')  ## convert to datetime
    data.set_index(0, inplace = True) ## set datetime as index
    
    data = data[[1, 2, 3, 4, 6, 12, 13, 17, 23,51, 9, 10]] ##Useful columns
    data.columns = ['GameType','DayOfWeek','VisitingTeam_Team','VisitingTeam_League','HomeTeam','DayNight','Completion','Attendance','VisitingTeam_Hits','HomeTeam_Hits','VisitingScore','HomeScore']
    
    
    data = data.loc[data['GameType'] != 1] ## Exclude one game of double-header that has no attendance
    
    ## Create Winner column based PointDifferential
    # Calcualtion differs based on whether Toronto is home or away team
    data.loc[data['HomeTeam'] ==team,'PointDifferential'] = data['HomeScore'] - data['VisitingScore']
    data.loc[(data['HomeTeam'] ==team) & (data['PointDifferential'] > 0), 'Winner'] = 'W'
    data.loc[(data['HomeTeam'] ==team) & (data['PointDifferential'] < 0), 'Winner'] = 'L'

    data.loc[data['VisitingTeam_Team'] ==team,'PointDifferential'] = data['VisitingScore'] - data['HomeScore']
    data.loc[(data['VisitingTeam_Team'] ==team) & (data['PointDifferential'] > 0), 'Winner'] = 'W'
    data.loc[(data['VisitingTeam_Team'] ==team) & (data['PointDifferential'] < 0), 'Winner'] = 'L'

    ## Assign a game number for each game of each season
    gamesperyear = data.groupby(data.index.year)['GameType'].count().values ## Determine amount of games/year
    data['GameNumber'] = ""
    for i, year in enumerate(data.index.year.unique().values):
        data.loc[data.index.year == year, 'GameNumber'] = list(range(1,gamesperyear[i]+1))
        
    ## WinnerNumber column is useful for calculating WinLossRatio (see next)
    data['WinnerNumber'] = ""
    data.loc[data['Winner'] == 'W', 'WinnerNumber'] = 1
    data.loc[data['Winner'] == 'L', 'WinnerNumber'] = -1
    data.loc[data['PointDifferential'] == 0, 'WinnerNumber'] = 0
    
    ## Create Winlossratio column which calculates win/loss for all prior games in a season
    ## Also creates WinLossRatioLast10 which is win/loss ratio for last 10 games

    data['WinLossRatio'] = ''
    data['WinLossRatioLast10'] = ''

    for j, year in enumerate(data.index.year.unique().values):
        i = gamesperyear[j]
        ## Win/loss for all season
        data.loc[(data.index.year == year) & (data['GameNumber'] == 1),'WinLossRatio'] = 0
        data.loc[(data.index.year == year) & (data['GameNumber'].isin(list(range(2,i+1)))),'WinLossRatio'] = data.loc[data.index.year == year,'WinnerNumber'].cumsum()[:-1].values
    
        ## Win/loss for last 10 games
        data.loc[(data.index.year == year) & (data['GameNumber'] == 1),'WinLossRatioLast10'] = 0
        data.loc[(data.index.year == year) & (data['GameNumber'].isin(list(range(2,i+1)))),'WinLossRatioLast10'] = data.loc[data.index.year == year,'WinnerNumber'].rolling(min_periods=1, window=10).sum()[:-1].values  
    
    ##Eventually going to merge all winloss columns into single dataframe, they neeed different names
    if team == 'TOR':
        pass
    else: 
        data.rename(index=str, columns={"WinLossRatio": team+"_WinLossRatio", 'WinLossRatioLast10' : team+"_WinLossRatioLast10"},inplace = True)
    
    data.to_csv(str(team) +'.csv')
        

In [244]:
HistoricalWinLoss('BAL')
HistoricalWinLoss('BOS')
HistoricalWinLoss('TOR')
HistoricalWinLoss('NYA')