In [1]:
import pandas as pd
import numpy as np
import copy

In [104]:
pan1 = pd.read_csv('cleaned_pan1.csv')
pan2 = pd.read_csv('cleaned_pan2.csv')
pan3 = pd.read_csv('cleaned_pan3.csv')

In [3]:
teams1 = ['ARS', 'BOU', 'BUR', 'CHE', 'CRY',
       'EVE', 'HUL', 'LEI', 'LIV', 'MCI', 'MID', 'MUN', 'SOU', 'STO', 'SUN',
       'SWA', 'TOT', 'WAT', 'WBA', 'WHU']
teams2 = ['ARS', 'BHA', 'BOU', 'BUR', 'CHE',
       'CRY', 'EVE', 'HUD', 'LEI', 'LIV', 'MCI', 'MUN', 'NEW', 'SOU', 'STO',
       'SWA', 'TOT', 'WAT', 'WBA', 'WHU']
teams3 = ['ARS', 'BOU', 'BHA', 'BUR', 'CAR', 'CHE', 'CRY', 'EVE',
       'FUL', 'HUD', 'LEI', 'LIV', 'MCI', 'MUN', 'NEW', 'SOU', 'TOT', 'WAT',
       'WHU', 'WOL']

Create dummies for winner or loser of games within row. Constructed from the sum of goals over week so not precise on double weeks but still a good metric for team performance over the week.

In [4]:
def winner(row):
    t = row['team_score']
    o = row['opp_score']
    if t>o:
        return 1
    else:
        return 0

def draw(row):
    t = row['team_score']
    o = row['opp_score']
    if t==o:
        return 1
    else:
        return 0

In [5]:
pan1['win']=pan1.apply(winner,axis=1)
pan1['draw']=pan1.apply(draw,axis=1)
pan2['win']=pan2.apply(winner,axis=1)
pan2['draw']=pan2.apply(draw,axis=1)
pan3['win']=pan3.apply(winner,axis=1)
pan3['draw']=pan3.apply(draw,axis=1)

I now construct features to describe the teams of a game. To controll for quality for both the players team and the opponents team I create a series of features for positional quality, overall quality and top 11 quality. The idea being that each row goes from describing a player in a dark room without any information on the match to describing the difficulty of the game and quality of teams at certain positions. Lastly I construct a percentage points the player got out off all the points awarded during the week of games they were involved in.

In [33]:
def link_to_team(row, df):
    '''
    returns a sub-df of the players team that week
    '''
    team = row['team']
    gw = row['round']
    return df[(df['team']==team)&(df['round']==gw)]

def link_to_opposition(row, df, teams):
    '''
    returns a sub-df of the players opposition that week
    '''
    gw = row['round']
    teams_played = []
    for team in teams:
        if row[team]==1:
            teams_played.append(team)
    if len(teams_played)==1:
        team_played = teams_played[0]
        sub_df = df[(df['team']==team_played)&(df['round']==gw)]
    else: #double week
        opp_1 = (df['team']==teams_played[0])
        opp_2 = (df['team']==teams_played[1])
        condition = (opp_1 | opp_2)
        sub_df = df[condition&(df['round']==gw)]
        
    return sub_df

def team_points(row, df):
    '''
    from sub-df sums total_points
    '''
    team = link_to_team(row, df)
    team_points = team['total_points'].sum()
    return team_points

def opp_points(row, df, teams):
    '''
    from sub-df sums total_points
    '''
    team = link_to_opposition(row, df, teams)
    team_points = team['total_points'].sum()
    return team_points

def pos_points(row, df, pos, opp=False, teams=None):
    '''
    from sub-df sums total_points
    '''
    if opp==False:
        team = link_to_team(row, df)
    else:
        team = link_to_opposition(row, df, teams)
    pos_points = team[team['position']==pos]['total_points'].sum()
    return pos_points

def perc_points(row):
    '''
    sums total_points in game to find percentage
    points player got from those generated
    '''
    t_points = row['team_points']
    o_points = row['opp_points']
    total = t_points + o_points
    pts = row['total_points']
    perc = (pts/total)*100
    return perc

def perc_of_sum(row, team=True):
    if team==True:
        total=row['team_points']
    else:
        total=row['opp_points']
        
    pts = row['total_points']
    perc=(pts/total)*100
    return perc
    

def opp_league_points(row, df):
    '''
    links to the opposition points, finds the mean
    league points across the sub-df. If double week
    will return the mean points of both opposition teams
    '''
    team = link_to_opposition(row, df)
    league_points = team['league_points_cum'].mean()
    return league_points

In [7]:
pan1['team_points']=pan1.apply(team_points, df=pan1, axis=1)
pan2['team_points']=pan2.apply(team_points, df=pan2, axis=1)
pan3['team_points']=pan3.apply(team_points, df=pan3, axis=1)

In [34]:
pan1['opp_points']=pan1.apply(opp_points, df=pan1, teams=teams1, axis=1)
pan2['opp_points']=pan2.apply(opp_points, df=pan2, teams=teams2, axis=1)
pan3['opp_points']=pan3.apply(opp_points, df=pan3, teams=teams3, axis=1)

In [36]:
pan1['perc_of_all_points']=pan1.apply(perc_points, axis=1)
pan2['perc_of_all_points']=pan2.apply(perc_points, axis=1)
pan3['perc_of_all_points']=pan3.apply(perc_points, axis=1)

In [37]:
pan1['perc_of_team_points']=pan1.apply(perc_of_sum, axis=1)
pan2['perc_of_team_points']=pan2.apply(perc_of_sum, axis=1)
pan3['perc_of_team_points']=pan3.apply(perc_of_sum, axis=1)

In [38]:
pan1['perc_of_opp_points']=pan1.apply(perc_of_sum, team=False, axis=1)
pan2['perc_of_opp_points']=pan2.apply(perc_of_sum, team=False, axis=1)
pan3['perc_of_opp_points']=pan3.apply(perc_of_sum, team=False, axis=1)

In [39]:
pan1['def_points']=pan1.apply(pos_points, df=pan1, pos='Defender', axis=1)
pan1['mid_points']=pan1.apply(pos_points, df=pan1, pos='Midfielder', axis=1)
pan1['for_points']=pan1.apply(pos_points, df=pan1, pos='Forward', axis=1)
pan1['gk_points']=pan1.apply(pos_points, df=pan1, pos='Goalkeeper', axis=1)
pan1['na_points']=pan1.apply(pos_points, df=pan1, pos='unkown', axis=1)

pan2['def_points']=pan2.apply(pos_points, df=pan2, pos='Defender', axis=1)
pan2['mid_points']=pan2.apply(pos_points, df=pan2, pos='Midfielder', axis=1)
pan2['for_points']=pan2.apply(pos_points, df=pan2, pos='Forward', axis=1)
pan2['gk_points']=pan2.apply(pos_points, df=pan2, pos='Goalkeeper', axis=1)
pan2['na_points']=pan2.apply(pos_points, df=pan2, pos='unkown', axis=1)

pan3['def_points']=pan3.apply(pos_points, df=pan3, pos='Defender', axis=1)
pan3['mid_points']=pan3.apply(pos_points, df=pan3, pos='Midfielder', axis=1)
pan3['for_points']=pan3.apply(pos_points, df=pan3, pos='Forward', axis=1)
pan3['gk_points']=pan3.apply(pos_points, df=pan3, pos='Goalkeeper', axis=1)
pan3['na_points']=pan3.apply(pos_points, df=pan3, pos='unkown', axis=1)

In [40]:
pan1['opp_def_points']=pan1.apply(pos_points, df=pan1, pos='Defender',opp=True, teams=teams1, axis=1)
pan1['opp_mid_points']=pan1.apply(pos_points, df=pan1, pos='Midfielder',opp=True, teams=teams1, axis=1)
pan1['opp_for_points']=pan1.apply(pos_points, df=pan1, pos='Forward',opp=True, teams=teams1, axis=1)
pan1['opp_gk_points']=pan1.apply(pos_points, df=pan1, pos='Goalkeeper',opp=True, teams=teams1, axis=1)
pan1['opp_na_points']=pan1.apply(pos_points, df=pan1, pos='unkown',opp=True, teams=teams1, axis=1)

pan2['opp_def_points']=pan2.apply(pos_points, df=pan2, pos='Defender',opp=True, teams=teams2, axis=1)
pan2['opp_mid_points']=pan2.apply(pos_points, df=pan2, pos='Midfielder',opp=True, teams=teams2, axis=1)
pan2['opp_for_points']=pan2.apply(pos_points, df=pan2, pos='Forward',opp=True, teams=teams2, axis=1)
pan2['opp_gk_points']=pan2.apply(pos_points, df=pan2, pos='Goalkeeper',opp=True, teams=teams2, axis=1)
pan2['opp_na_points']=pan2.apply(pos_points, df=pan2, pos='unkown',opp=True, teams=teams2, axis=1)

pan3['opp_def_points']=pan3.apply(pos_points, df=pan3, pos='Defender',opp=True, teams=teams3, axis=1)
pan3['opp_mid_points']=pan3.apply(pos_points, df=pan3, pos='Midfielder',opp=True, teams=teams3, axis=1)
pan3['opp_for_points']=pan3.apply(pos_points, df=pan3, pos='Forward',opp=True, teams=teams3, axis=1)
pan3['opp_gk_points']=pan3.apply(pos_points, df=pan3, pos='Goalkeeper',opp=True, teams=teams3, axis=1)
pan3['opp_na_points']=pan3.apply(pos_points, df=pan3, pos='unkown',opp=True, teams=teams3, axis=1)

In [41]:
pan1.to_csv('tmp_panal_1.csv')
pan2.to_csv('tmp_panal_2.csv')
pan3.to_csv('tmp_panal_3.csv')

In [42]:
team_key_2016 = {
                'ARS':'laurent koscielny',
                'BOU':'simon francis',
                'BUR':'tom heaton',
                'CHE':'john terry',
                'CRY':'scott dann',
                'EVE':'phil jagielka',
                'HUL':'michael dawson',
                'LEI':'wes morgan',
                'LIV':'jordan henderson',
                'MCI':'vincent kompany',
                'MUN':'wayne rooney',
                'MID':'grant leadbitter',
                'SOU':'steven davis',
                'STO':'ryan shawcross',
                'SUN':"john o'shea",
                'SWA':'leon britton',
                'TOT':'hugo lloris',
                'WAT':'troy deeney',
                'WBA':'darren fletcher',
                'WHU':'mark noble'
                }

team_key_2017 = {
                'ARS':'per mertesacker',
                'BOU':'simon francis',
                'BHA':'bruno saltor grau',
                'BUR':'tom heaton',
                'CHE':'gary cahill',
                'CRY': 'jason puncheon',
                'EVE':'phil jagielka',
                'HUD':'tommy smith',
                'LEI':'wes morgan',
                'LIV':'jordan henderson',
                'MCI':'vincent kompany',
                'MUN':'michael carrick',
                'NEW':'jamaal lascelles',
                'SOU':'steven davis',
                'STO':'ryan shawcross',
                'SWA':'angel rangel',
                'TOT':'hugo lloris',
                'WAT':'troy deeney',
                'WBA':'jonny evans',
                'WHU':'mark noble'
                }

team_key_2018 = {
                'ARS':'laurent koscielny',
                'BOU':'simon francis',
                'BHA':'bruno saltor grau',
                'BUR':'tom heaton',
                'CAR':'sean morrison',
                'CHE':'gary cahill',
                'CRY':'luka milivojevic',
                'EVE':'phil jagielka',
                'FUL':'tom cairney',
                'HUD':'tommy smith',
                'LEI':'wes morgan',
                'LIV':'jordan henderson',
                'MCI':'vincent kompany',
                'MUN':'antonio valencia',
                'NEW':'jamaal lascelles',
                'SOU':'pierre-emile højbjerg',
                'TOT':'hugo lloris',
                'WAT':'troy deeney',
                'WHU':'mark noble',
                'WOL':'conor coady'
                }

I now construct the rankings of teams within the league, as points of a team is not particlarly informative while rank is relative to its standing in the league. 

In this section I also create lags for all player performance features. While this may be overkill I can feature select later.

In [43]:
def create_league_table(df, team_key, loc): #48 for pan1&2, 69 for pan3
    table={}
    for team in df['team'].unique():
        team_season=[]
        cap = team_key[team]
        for gw in sorted(df[df['team']==team]['round'].unique()):
            points=df[(df['team']==team)&(df['round']==gw)&
                     (df['player']==cap)].iloc[0,loc]
            team_season.append((gw,points))
        table[team]=team_season
    return table

In [44]:
def rank_dict(league_table, seas_len):
    '''
    rank teams by week so each team has a (gw,rank) list
    '''
    rank_table={}
    #keyed by gw
    for gw in range(1, seas_len+1):
        gw_ranking=[]
        for team in league_table.keys():
            teams_gws = [r for r,p in league_table[team]]
            found=False
            tmp = copy.deepcopy(gw)
            while not found:
                #keep looking further back untill gameweek
                #team played in is found
                try:
                    gw_index = teams_gws.index(tmp)
                    found=True
                except:
                    tmp -= 1
            teams_points = league_table[team][gw_index]#indexing!!
            gw_ranking.append((team, teams_points[1]))
        
        gw_ranking.sort(key=lambda x: x[1], reverse=True) #from lowest to highets
        gw_ranking = [(tup[0], i+1) for i,tup in enumerate(gw_ranking)]
        #get index+1 for team in gw_ranking creating a list of
        #(team, rank) value with gw key
        rank_table[gw]=gw_ranking
    return rank_table
        
def assign_rank(row, rank_table):
    team=row['team']
    gw=row['round']
    for t in rank_table[gw]:
        if t[0]==team:
            return t[1]
    print('error')

In [45]:
table1=create_league_table(pan1, team_key_2016, loc=48)
table2=create_league_table(pan2, team_key_2017, loc=48)
table3=create_league_table(pan3, team_key_2018, loc=69)

In [46]:
rank1=rank_dict(table1, seas_len=38)
rank2=rank_dict(table2, seas_len=38)
rank3=rank_dict(table3, seas_len=34)

In [47]:
pan1['team_rank']=pan1.apply(assign_rank, rank_table=rank1, axis=1)

In [48]:
pan2['team_rank']=pan2.apply(assign_rank, rank_table=rank2, axis=1)

In [49]:
pan3['team_rank']=pan3.apply(assign_rank, rank_table=rank3, axis=1)

In [50]:
pan1.to_csv('tmp_panal_1.csv')
pan2.to_csv('tmp_panal_2.csv')
pan3.to_csv('tmp_panal_3.csv')

In [76]:
pan1 = pd.read_csv('tmp_panal_1.csv')
pan2 = pd.read_csv('tmp_panal_2.csv')
pan3 = pd.read_csv('tmp_panal_3.csv')

In [77]:
last_weeks1={}
for player in pan1.player.unique():
    last_week=pan1[pan1['player']==player]['round'].max()
    last_weeks1[player]=last_week
first_weeks1={}
for player in pan1.player.unique():
    first_week=pan1[pan1['player']==player]['round'].min()
    first_weeks1[player]=first_week
last_weeks2={}
for player in pan2.player.unique():
    last_week=pan2[pan2['player']==player]['round'].max()
    last_weeks2[player]=last_week
first_weeks2={}
for player in pan2.player.unique():
    first_week=pan2[pan2['player']==player]['round'].min()
    first_weeks2[player]=first_week
last_weeks3={}
for player in pan3.player.unique():
    last_week=pan3[pan3['player']==player]['round'].max()
    last_weeks3[player]=last_week
first_weeks3={}
for player in pan3.player.unique():
    first_week=pan3[pan3['player']==player]['round'].min()
    first_weeks3[player]=first_week

In [78]:
def get_rank_diff(row, df, team_key):
    team=row['team']
    gw=row['round']
    opps=[]
    for opp in df.team.unique():
        if row[opp]==1:
            opps.append(opp)
    if len(opps)==1:
        opp = opps[0]
        cap = team_key[opp]
        opp_rank = df[(df['team']==opp)&(df['round']==gw)&(df['player']==cap)]['team_rank'].values[0]
        return row['team_rank']-opp_rank
    else:
        opp_ranks=[]
        for opp in opps:
            cap = team_key[opp]
            opp_rank=opp_rank = df[(df['team']==opp)&(df['round']==gw)&(df['player']==cap)]['team_rank'].values[0]
            opp_ranks.append(opp_rank)
        mean_rank = np.mean(opp_ranks)
        return row['team_rank']-mean_rank
    
def get_lags(df, to_lag, deg=1):
    '''
    shifts down by n degrees and makes
    a new column returning the new df
    '''
    for col in lag_me:
        new_name = col+'_lag_'+str(deg)
        df[new_name]=df[col].shift(deg)
    return df

def drop_last(row, last_weeks):
    '''
    to stop other players next game entering
    the last observation of a new player I 
    need to nullify the last entry of every 
    player, can then later drop this row.
    '''
    player = row['player']
    gw = row['round']
    if gw == last_weeks[player]:
        return np.nan
    else:
        return row['next_week_points']

def drop_first(row, col, ext, first_weeks, how_many=1):
    '''
    to stop other players lag entering
    the first observation of a new player I 
    need to nullify the first entry of every 
    player, can then later drop this row. This
    needs to also drop the second row of every
    player. Which row to drop can be controlled by
    the how_many arg, dropping all rows up to the 
    entered int. 
    '''
    player = row['player']
    gw = row['round']
    for drops in range(how_many):
        if gw == first_weeks[player]+drops:
            return np.nan
        else:
            continue
    return row[col+ext]

In [83]:
pan1['rank_diff']=pan1.apply(get_rank_diff, df=pan1, team_key=team_key_2016, axis=1)
pan2['rank_diff']=pan2.apply(get_rank_diff, df=pan2, team_key=team_key_2017, axis=1)
pan3['rank_diff']=pan3.apply(get_rank_diff, df=pan3, team_key=team_key_2018, axis=1)

In [84]:
lag_me=['selected', 'transfers_balance',
       'transfers_in', 'transfers_out', 'value', 'is_double', 'bps', 'minutes', 'saves', 'assists', 'attempted_passes',
       'big_chances_created', 'big_chances_missed', 'bonus', 'clean_sheets',
       'clearances_blocks_interceptions', 'completed_passes', 'creativity',
       'dribbles', 'errors_leading_to_goal', 'errors_leading_to_goal_attempt',
       'fouls', 'goals_conceded', 'goals_scored', 'ict_index', 'influence',
       'key_passes', 'offside', 'open_play_crosses', 'own_goals',
       'penalties_conceded', 'penalties_missed', 'penalties_saved',
       'recoveries', 'red_cards', 'tackled', 'tackles', 'target_missed',
       'threat', 'total_points', 'winning_goals', 'yellow_cards', 'team_score',
       'opp_score', 'league_points_cum', 'was_home', 'win', 'draw',
       'team_points', 'opp_points', 'def_points', 'mid_points', 'for_points',
       'gk_points', 'na_points', 'opp_def_points', 'opp_mid_points', 'team_rank', 'rank_diff',
       'perc_of_all_points', 'perc_of_team_points', 'perc_of_opp_points']

In [85]:
lag_pan1=get_lags(pan1, lag_me, deg=1)
lag_pan1=get_lags(lag_pan1, lag_me, deg=2)

lag_pan2=get_lags(pan2, lag_me, deg=1)
lag_pan2=get_lags(lag_pan2, lag_me, deg=2)

lag_pan3=get_lags(pan3, lag_me, deg=1)
lag_pan3=get_lags(lag_pan3, lag_me, deg=2)

In [86]:
lag_pan1['next_week_points']=lag_pan1['total_points'].shift(-1)
lag_pan2['next_week_points']=lag_pan2['total_points'].shift(-1)
lag_pan3['next_week_points']=lag_pan3['total_points'].shift(-1)

In [87]:
lag_pan1['next_week_points']=lag_pan1.apply(drop_last, last_weeks=last_weeks1, axis=1)
lag_pan2['next_week_points']=lag_pan2.apply(drop_last, last_weeks=last_weeks2, axis=1)
lag_pan3['next_week_points']=lag_pan3.apply(drop_last, last_weeks=last_weeks3, axis=1)

In [88]:
for col in lag_me:
    ext1='_lag_1'
    ext2='_lag_2'
    lag_pan1[col+ext1]=lag_pan1.apply(drop_first,first_weeks=first_weeks1,col=col,ext=ext1,how_many=1,axis=1)
    lag_pan1[col+ext2]=lag_pan1.apply(drop_first,first_weeks=first_weeks1,col=col,ext=ext2,how_many=2,axis=1)
    
    lag_pan2[col+ext1]=lag_pan2.apply(drop_first,first_weeks=first_weeks2,col=col,ext=ext1,how_many=1,axis=1)
    lag_pan2[col+ext2]=lag_pan2.apply(drop_first,first_weeks=first_weeks2,col=col,ext=ext2,how_many=2,axis=1)
    
    lag_pan3[col+ext1]=lag_pan3.apply(drop_first,first_weeks=first_weeks3,col=col,ext=ext1,how_many=1,axis=1)
    lag_pan3[col+ext2]=lag_pan3.apply(drop_first,first_weeks=first_weeks3,col=col,ext=ext2,how_many=2,axis=1)

In [89]:
lag_pan1['adj_round']=lag_pan1['round']
lag_pan2['adj_round']=lag_pan2['round'].apply(lambda x: x+38)
lag_pan3['adj_round']=lag_pan3['round'].apply(lambda x: x+76)

In [90]:
lag_pan=pd.concat([lag_pan1, lag_pan2, lag_pan3])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [91]:
lag_pan.reset_index(drop=True,inplace=True)

Lastly I create features that describe the game next week. These are structually the same as the previous engineered features but they are for the team that the player plays next week rather than last week or the current week. The data used to construct these metrics uses only data from the current week or earlier.

In [92]:
def get_forward_team(row, df=lag_pan):
    i = row.name
    try:
        f_row = df.loc[i+1]
    except:
        return 'last_row'
    pot_opp=['EVE','TOT','BUR','NEW','HUL','ARS','CHE','SUN',
            'MUN','WOL','CRY','MCI','FUL','WHU','HUD','STO',
            'WBA','SOU','MID','WAT','LEI','LIV','BOU','BHA',
            'SWA','CAR']
    for team in pot_opp:
        if f_row[team]==1:
            opp=team
            break
    return opp

In [93]:
lag_pan['next_team']=lag_pan.apply(get_forward_team, axis=1)

In [94]:
def link_to_next_team(row, df):
    '''
    returns a sub-df of the players team that week
    '''
    n_team = row['next_team']
    gw = row['adj_round']
    return df[(df['team']==n_team)&(df['round']<=gw)]

def next_team_points(row, df):
    '''
    from sub-df cumalative sum of total_points
    '''
    n_team = link_to_next_team(row, df)
    team_points = n_team['total_points'].sum()
    return team_points

def pos_points(row, df, pos):
    '''
    from sub-df cumalative sum of total_points
    by position
    '''
    n_team = link_to_next_team(row, df)
    pos_points = n_team[n_team['position']==pos]['total_points'].sum()
    return pos_points

In [95]:
lag_pan['next_team_points']=lag_pan.apply(team_points, df=lag_pan, axis=1)

In [96]:
lag_pan['next_def_points']=lag_pan.apply(pos_points, df=lag_pan, pos='Defender', axis=1)
lag_pan['next_mid_points']=lag_pan.apply(pos_points, df=lag_pan, pos='Midfielder', axis=1)
lag_pan['next_for_points']=lag_pan.apply(pos_points, df=lag_pan, pos='Forward', axis=1)
lag_pan['next_gk_points']=lag_pan.apply(pos_points, df=lag_pan, pos='Goalkeeper', axis=1)
lag_pan['next_na_points']=lag_pan.apply(pos_points, df=lag_pan, pos='unkown', axis=1)

In [97]:
def get_top_(num, team_dict):
    '''
    returns the top # keys in 
    the dict team_dict from best
    to worst.
    '''
    count=0
    top=[]
    while count<num:
        largest=max(team_dict, key=team_dict.get)
        top.append(team_dict[largest])
        var_bin = team_dict.pop(largest)
        count+=1
    return top
        

def mean_before_round(player, adj_round, df=lag_pan):
    '''
    gets the mean points per game from games before this
    game for the player input.
    '''
    tmp = df[(df['adj_round']<=adj_round)&(df['player']==player)]
    player_mean=tmp['total_points'].mean()
    return player_mean

def team_mean_before_round(team, adj_round, df=lag_pan):
    '''
    gest the team of players to find means for returns
    dictionary of player and their mean.
    '''
    tmp = df[(df['team']==team)&(df['adj_round']==adj_round)]
    if tmp.shape[0]==0: #team dont play this week
        tmp = df[(df['team']==team)&(df['adj_round']==adj_round-1)]
    #to get players currently in the team this round
    adict={}
    for player in tmp['player'].unique():
        player_mean = mean_before_round(player, adj_round, df=df)
        adict[player]=player_mean

    return adict

def get_player_rows(row,opps=False, df=lag_pan):
    '''
    finds the top 10 of a team from their cummalitive
    mean and returns a df with 10 columns and a row for 
    every row in the original dataset that this one is 
    built off. The dfs can then be concatinated to add
    top 10 features. If opps==True then the top11 of the 
    opponent team is returned.
    '''
    pot_opp=['EVE','TOT','BUR','NEW','HUL','ARS','CHE','SUN',
            'MUN','WOL','CRY','MCI','FUL','WHU','HUD','STO',
            'WBA','SOU','MID','WAT','LEI','LIV','BOU','BHA',
            'SWA','CAR']
    team = row['team']
    adj_r = row['adj_round']
    player1 = row['player']
    if opps==True:
        opps=[]
        for po in pot_opp:
            if row['next_team']==po:
                opps.append(po)
            elif row['next_team']=='last_row':
                return [np.nan]*11
        if len(opps)==1:
            opps=opps[0]
            team_dict=team_mean_before_round(opps, adj_r, df=df)
            top11=get_top_(11, team_dict)
            return top11
        else:
            team_dict={}
            for opp in opps:
                team_dict.update(team_mean_before_round(opp, adj_r, df=df))
            top11=get_top_(11, team_dict)
            return top11
    else:
        team_dict=team_mean_before_round(team, adj_r, df=df)
        top10=get_top_(10, team_dict)
        return top10

In [98]:
opp_playerframe = lag_pan.apply(get_player_rows, opps=True, df=lag_pan,axis=1)
team_playerframe = lag_pan.apply(get_player_rows, opps=False, df=lag_pan,axis=1)

opp_playerframe_df=opp_playerframe.apply(pd.Series)
team_playerframe_df=team_playerframe.apply(pd.Series)

In [99]:
opp_playerframe_df.columns=['opp_1','opp_2','opp_3','opp_4','opp_5',
                           'opp_6','opp_7','opp_8','opp_9','opp_10',
                           'opp_11']

In [100]:
team_playerframe_df.columns=['mate_1','mate_2','mate_3','mate_4','mate_5',
                    'mate_6','mate_7','mate_8','mate_9','mate_10']

In [101]:
full_data=pd.concat([lag_pan,opp_playerframe_df,team_playerframe_df],axis=1)

In [102]:
full_data.to_csv('final_dataset.csv')

In [103]:
print('finished')

finished
