In [5]:
import numpy as np
import pandas as pd
from datetime import *

In [28]:
def ptsWinsGames(teams, ind, prev_weeks): #Find pts scores, wins, games up to given week, for teams in a given game
    
    game = pd.Series([0.0 for x in range(len(ind))], index=ind) #pre-allocate array to return
    
    for i, team in enumerate(teams): #repeat for home and vis
        pos = 'Home' if i == 0 else 'Vis'
        prev_game = prev_weeks.loc[(prev_weeks['HomeID'] == team) | (prev_weeks['VisID'] == team)].tail(1) #find last appearance
        if prev_game.shape[0] == 0: #skip if team has not yet played
            continue
        elif int(prev_game['HomeID']) == team: #if prev appearance was home game
            prev_pos, prev_opp = 'Home','Vis' 
        else: #if prev appearance was vis game
            prev_pos, prev_opp = 'Vis', 'Home' 
            
        game['{}PtsF'.format(pos)] = int(prev_game['{}PtsF'.format(prev_pos)]) + int(prev_game['{}Final'.format(prev_pos)]) #add prev scores pts to prev game performance
        game['{}PtsA'.format(pos)] = int(prev_game['{}PtsA'.format(prev_pos)]) + int(prev_game['{}Final'.format(prev_opp)]) #add prev scores pts to prev game performance
        
        if prev_pos == 'Home': #add wins
            game['{}Wins'.format(pos)] = int(prev_game['{}Wins'.format(prev_pos)]) + 1 if int(prev_game['Spread']) > 0 else int(prev_game['{}Wins'.format(prev_pos)])
        else:
            game['{}Wins'.format(pos)] = int(prev_game['{}Wins'.format(prev_pos)]) + 1 if int(prev_game['Spread']) < 0 else int(prev_game['{}Wins'.format(prev_pos)])
        
        game['{}Games'.format(pos)] = int(prev_game['{}Games'.format(prev_pos)]) + 1 #add 1 to prev games played
        game['{}WinPct'.format(pos)] = int(game['{}Wins'.format(pos)])/int(game['{}Games'.format(pos)]) #get new win pct
        
        game['{}PtsFPG'.format(pos)] = game['{}PtsF'.format(pos)]/game['{}Games'.format(pos)]
        game['{}PtsAPG'.format(pos)] = game['{}PtsA'.format(pos)]/game['{}Games'.format(pos)]
    
    game['HomeDiffOD'] = game['HomePtsFPG'] - game['VisPtsAPG']
    game['VisDiffOD'] = game['VisPtsFPG'] - game['HomePtsAPG']
    game['DiffPtsFPG'] = game['HomePtsFPG'] - game['VisPtsFPG']
    game['DiffPtsAPG'] = game['HomePtsAPG'] - game['VisPtsAPG']
    
    return game

def add_ptsWinsGames(scores): #initialize df for year

    scores = scores.assign(HomePtsF=0.0, HomePtsA=0.0, HomeWins=0.0, HomeGames=0.0, HomeWinPct=0.0, HomePtsFPG=0.0, HomePtsAPG=0.0,
                           VisPtsF=0.0, VisPtsA=0.0, VisWins=0.0, VisGames=0.0, VisWinPct=0.0, VisPtsFPG=0.0, VisPtsAPG=0.0,
                           HomeDiffOD=0.0, VisDiffOD=0.0, DiffPtsFPG=0.0, DiffPtsAPG=0.0) #init features in df
    pwg_ind = ['HomePtsF','HomePtsA','HomeWins','HomeGames','HomeWinPct','HomePtsFPG','HomePtsAPG','HomeDiffOD',
               'VisPtsF','VisPtsA','VisWins','VisGames','VisWinPct','VisPtsFPG','VisPtsAPG','VisDiffOD',
               'DiffPtsFPG','DiffPtsAPG'] 
    
    scores['Spread'] = scores['HomeFinal'] - scores['VisFinal']  #add Spread
    scores_list = [group for _, group in scores.groupby('Week')] #split df by week
    
    for i, score in enumerate(scores_list[1:]): #iterate through weeks, ignore Week1
        i += 1
        prev_weeks = pd.concat(scores_list[:i]) #get df of prev weeks
        scores_list[i][pwg_ind] = scores_list[i].apply(lambda x: ptsWinsGames((x['HomeID'],x['VisID']),pwg_ind,prev_weeks), axis = 1)
    scores = pd.concat(scores_list) #rebuild df
    
    return scores

In [8]:
ultimate = pd.read_csv('data/ultimate/ultimate_names.csv')
ultimates = [ult for _,ult in ultimate.groupby('Season')]

In [29]:
ult2 = add_ptsWinsGames(ultimates[0])

In [22]:
ult2.columns.shape

(185,)

In [32]:
conf = pd.read_csv('data/conferences/mergedConferences.csv')

In [41]:
conf = pd.read_csv('data/conferences/mergedConferences.csv')

conf['HomeID'] = conf['ID']
conf['VisID'] = conf['ID']
conf['Season'] = conf['Year']
conf['HomeConf'] = conf['Conf']
conf['VisConf'] = conf['Conf']
confs = [cnf for _,cnf in conf.groupby('Season')]

In [63]:
merged = [0 for i in range(len(confs))]
for i in range(len(confs)):
    merged[i] = pd.merge(ultimates[i], confs[i][['HomeID', 'HomeConf']], on='HomeID', how='left')
    merged[i] = pd.merge(merged[i], confs[i][['VisID', 'VisConf']], on='VisID', how='left')
    merged[i] = pd.concat([merged[i], pd.get_dummies(merged[i][['HomeConf','VisConf']])], axis=1)

In [66]:
merged[1]['VisConf_NotMajor']

0      0
1      0
2      0
3      0
4      0
5      0
6      0
7      1
8      1
9      1
10     1
11     0
12     0
13     1
14     1
15     1
16     0
17     1
18     1
19     0
20     1
21     1
22     0
23     0
24     0
25     0
26     0
27     0
28     1
29     0
      ..
742    0
743    0
744    0
745    0
746    0
747    0
748    0
749    0
750    0
751    0
752    0
753    0
754    0
755    0
756    0
757    0
758    0
759    0
760    0
761    0
762    0
763    0
764    0
765    0
766    0
767    0
768    0
769    0
770    0
771    0
Name: VisConf_NotMajor, Length: 772, dtype: uint8

In [62]:
def get_Teams(scores): #list of teams and conferences for given year
    teams = np.union1d(scores['HomeID'].unique(), scores['VisID'].unique()) #teams
    stats = pd.DataFrame({'Team':teams}) 
    
    conf_year = []
    for pos in ['Home','Vis']: #add conferences
        if '{}Conf_NotMajor'.format(pos) in scores.columns:
            temp = scores[['{}ID'.format(pos),'{}Conf_NotMajor'.format(pos)]]
            temp = temp.rename(index=str, columns = {'{}ID'.format(pos):'Team'})
            temp.drop_duplicates(inplace=True)
            stats = pd.merge(stats, temp, on='Team', how='left')
            conf_year.append(pos)
            
    if len(conf_year) == 2:
        stats['Conf'] = stats['HomeConf_NotMajor'].fillna(stats['VisConf_NotMajor']) #merge conf features
        stats.drop(['VisConf_NotMajor','HomeConf_NotMajor'], axis=1, inplace=True) #drop old conf features
    elif 'Home' in conf_year:
        stats['Conf'] = stats['HomeConf_NotMajor']
        stats.drop(['HomeConf_NotMajor'], axis=1, inplace=True)
    elif 'Vis' in conf_year:
        stats['Conf'] = stats['VisConf_NotMajor']
        stats.drop(['VisConf_NotMajor'], axis=1, inplace=True)
    else:
        stats['Conf'] = 0
        
    stats.set_index('Team', inplace=True) #set team as index
    
    return stats

In [81]:
teams1 = np.union1d(merged[1]['HomeID'].unique(), merged[1]['VisID'].unique()) #teams
stats1 = pd.DataFrame({'Team':teams1}) 

In [75]:
stats1.shapex

(170, 1)

In [82]:
temp = merged[1][['{}ID'.format('Vis'),'{}Conf_NotMajor'.format('Vis')]]
temp = temp.rename(index=str, columns = {'{}ID'.format('Vis'):'Team'})
temp.drop_duplicates(inplace=True)
stats1 = pd.merge(stats1, temp, on='Team', how='left')

In [84]:
ultimates3 = pd.read_csv('data/ultimate/ultimate_2.csv')

In [87]:
ultimates3.loc[(ultimates3['HomeID']==2)|(ultimates3['VisID']==2),['HomeID','VisID','HomeElo','VisElo']]

Unnamed: 0,HomeID,VisID,HomeElo,VisElo
18,2,153,1500,1500
44,2,2050,1480,1500
92,2,145,1516,1515
169,183,2,1522,1535
233,238,2,1472,1504
261,2,344,1516,1491
314,2,57,1526,1668
370,8,2,1466,1546
485,61,2,1536,1504
529,2,333,1527,1530
