## Set-up the Enviornment

In [26]:
import numpy as np
import pandas as pd

## Import Team Results

In [27]:
team_results = pd.read_pickle('../data/pickle/team_results.pkl')

In [28]:
def standings(frame, result_col, goals_col, goals_opp_col, points_col):
    """This function takes in a DataFrame and strings identifying fields
    to calculate the league table.
    
    Making it generalized will allow us to calculate league tables for
    First Half Goals only. Second Half Goals only.
    """
    record = {}
    
    record['Played'] = np.size(frame[result_col])
    record['Won'] = np.sum(frame[result_col] == 'W')
    record['Drawn'] = np.sum(frame[result_col] == 'D')
    record['Lost'] = np.sum(frame[result_col] == 'L')
    record['GF'] = np.sum(frame[goals_col])
    record['GA'] = np.sum(frame[goals_opp_col])
    record['GD'] = record['GF'] - record['GA']
    record['Points'] = np.sum(frame[points_col])
    
    return pd.Series(record,
                     index=['Played', 'Won', 'Drawn', 'Lost', 'GF', 'GA', 'GD', "Points"])

In [35]:
# Get League Table
results_byteam = team_results.groupby(['Team'])

(results_byteam 
     .apply(standings,
            result_col='Result',
            goals_col='Goals',
            goals_opp_col='Goals_Opp',
            points_col='Points')
     .sort_values('Points', ascending=False))

Unnamed: 0_level_0,Played,Won,Drawn,Lost,GF,GA,GD,Points
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Man City,38,29,6,3,99,26,73,93
Liverpool,38,28,8,2,94,26,68,92
Chelsea,38,21,11,6,76,33,43,74
Tottenham,38,22,5,11,69,40,29,71
Arsenal,38,22,3,13,61,48,13,69
Man United,38,16,10,12,57,57,0,58
West Ham,38,16,8,14,60,51,9,56
Leicester,38,14,10,14,62,59,3,52
Brighton,38,12,15,11,42,44,-2,51
Wolves,38,15,6,17,38,43,-5,51


In [30]:
# Rank Teams in Standings
league_table = (results_byteam
                    .apply(standings,
                           result_col='Result',
                           goals_col='Goals',
                           goals_opp_col='Goals_Opp',
                           points_col='Points')
                    .sort_values(by=['Points', 'GD', 'GF'], ascending=False))

In [31]:
def rank_teams(league_table, team_list):
    """Return a Series of ranked teams, including those who have yet to play
    
    Args:
        * league_table - League Table DataFrame
        * team_list - List of all teams in league
    """
    
    # sort by tiebraker and rank
    team_rank = (league_table
                     .apply(lambda row: (row['Points'], row['GD'], row['GF']), axis=1)
                     .rank(method='min', ascending=False)
                     .astype(int))
    
    # if not all teams are ranked (i.e. some of them might have not have played yet)
    if team_rank.size < len(team_list):
        # get all teams that need to be added to the table
        ranked_teams = team_rank.index.values
        teams_to_add = {team for team in team_list if team not in ranked_teams}  
        
        # position to rank remaining teams
        rank_to_assign = team_rank.size + 1
        
        # add teams that haven't played a game to rankings
        team_pos = {}
        for team in teams_to_add:
            team_pos[team] = rank_to_assign
        team_rank = team_rank.append(pd.Series(data=team_pos))
    
    return team_rank

In [32]:
league_table_rank = (league_table
                         .apply(rank_teams))
league_table_rank

TypeError: rank_teams() missing 1 required positional argument: 'team_list'