In [1]:
import pandas as pd
from bokeh.plotting import figure, show, ColumnDataSource
from bokeh.models.tools import HoverTool
from bokeh.layouts import row, gridplot

In [20]:
def get_ranks_distribution(team_df, n_teams):
    ranks_dist = []
    for i in range(n_teams):
        count_ranki = 0
        team_rankings = list(team_df['Ranking'])
        for j in range(len(team_rankings)):
            if team_rankings[j] == i:
                count_ranki += 1
        n_games = team_df.shape[0]
        ranks_dist.append(count_ranki/n_games)    
    return ranks_dist

def get_ranks_agg_distribution(team_df, n_teams):
    ranks_dist = []
    for i in range(n_teams):
        count_ranki = 0
        team_rankings = list(team_df['Ranking'])
        for j in range(len(team_rankings)):
            if team_rankings[j] <= i:
                count_ranki += 1
        n_games = team_df.shape[0]
        ranks_dist.append(count_ranki/n_games)
    return ranks_dist

In [21]:
def plot_team_ts(figure_abs, figure_norm, team_df, tooltips):
    '''
        figure_abs: bokeh figure to plot line with absolute values on;
        figure_norm: bokeh figure to plot line with normalized values on;
        team_df: data frame with team data;
        tooltips: list with tooltips to appear;
    '''
    
    # 1. Get data from team dataframe
    team_pts = list(team_df.iloc[:,-2])
    team_pts_norm = [x/team_pts[-1] for x in team_pts]
    team_rankings = list(team_df.iloc[:,-1])
    team_gms = list(team_df.iloc[:,0])
    team     = list(team.replace('_',' ') for team in team_df.Team)
    team_opp = list(team_df.Opponent)
    team_dts = list(team_df.Date)
    
    # 2. Create data source
    src = ColumnDataSource(data=dict(
        Games=team_gms,
        Points=team_pts,
        Points_norm=team_pts_norm,
        Rankings=team_rankings,
        Team=team,
        Opponent=team_opp,
        Date=team_dts,
    ))
    
    # 3. Plot on each figure
    figure_abs.line('Games', 'Points', source=src, line_width=2, hover_line_color='red')
    figure_norm.line('Games', 'Points_norm', source=src, line_width=2, hover_line_color='red')

In [22]:
def plot_time_series(data, title):
    '''
        data: data frame containing data from all teams;
        title: title for plot with absolute values
    '''
    tooltips = [
        ('Team','@Team'),
        ('Opponent','@Opponent'),
        ('GP','@Games'),
        ('Date','@Date'),
        ('Points','@Points'),
        ('Points_norm','@Points_norm'),
        ('Ranking','@Rankings'),
    ]

    fig_abs = figure(title=title, x_axis_label='Game', y_axis_label='Points',
                     tooltips=tooltips, plot_height=800, plot_width=800)
    
    fig_norm = figure(title=title+' (normalized)', x_axis_label='Game', y_axis_label='Points (normalized)',
                      tooltips=tooltips, plot_height=800, plot_width=800)

    for _, team_df in data.groupby(['Team']):
        plot_team_ts(fig_abs, fig_norm, team_df, tooltips)

    fig_abs.toolbar.autohide = True
    fig_norm.toolbar.autohide = True
    
    return fig_abs, fig_norm

In [23]:
def plot_team_rankings(fig_rankings, fig_rankings_agg, team_df, n_teams, tooltips):
    '''
        fig_rankings: bokeh figure to plot line with rank distribution;
        fig_rankings: bokeh figure to plot line with rank cumulative distribution;
        team_df: data frame with team data;
        tooltips: list with tooltips to appear;
    '''
    
    # 1. Get data from team dataframe
    team_ranks_dist = get_ranks_distribution(team_df, n_teams)
    team_ranks_agg_dist = get_ranks_agg_distribution(team_df, n_teams)
    total_ranks = list(range(n_teams))
    team     = list(set(team_df.Team))*len(total_ranks)
    
    # 2. Create data source
    src = ColumnDataSource(data=dict(
        x=total_ranks,
        Rankings=team_ranks_dist,
        Rankings_agg=team_ranks_agg_dist,
        Team=team,
    ))
    
    # 3. Plot on each figure
    fig_rankings.line('x', 'Rankings', source=src, line_width=2, hover_line_color='red')
    fig_rankings_agg.line('x', 'Rankings_agg', source=src, line_width=2, hover_line_color='red')
    
def plot_rankings(data, title):
    '''
        data: data frame containing data from all teams;
        title: title for plot with absolute values
    '''
    rankings_tooltips = [
        ('Team','@Team'),
        ('Ranking','@x'),
        ('Proportion','@Rankings'),
        ('Proportion_agg','@Rankings_agg')
    ]

    fig_rankings = figure(title=title, x_axis_label='Ranking', y_axis_label='Proportion',
                          tooltips=rankings_tooltips, plot_height=800, plot_width=800)
    
    fig_rankings_agg = figure(title=title+' (aggregated)', x_axis_label='Ranking', y_axis_label='Proportion (aggregated)',
                              tooltips=rankings_tooltips, plot_height=800, plot_width=800)
    
    n_teams = len(set(data.Team))

    for _, team_df in data.groupby(['Team']):
        plot_team_rankings(fig_rankings, fig_rankings_agg, team_df, n_teams, rankings_tooltips)

    fig_rankings.toolbar.autohide = True
    fig_rankings_agg.toolbar.autohide = True
    
    return fig_rankings, fig_rankings_agg

In [25]:
data = pd.read_csv(r'data/Premier_League_2021/all')
fig_abs, fig_norm = plot_time_series(data, 'Premier League 2020/21 season')
fig_ranks, fig_ranks_agg = plot_rankings(data, 'Premier League 2020/21 Rankings distributions')
grid = gridplot([fig_abs, fig_norm, fig_ranks, fig_ranks_agg], ncols=2, plot_width=900, plot_height=450)
show(grid)