In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from typing import Dict
import pickle as pk

In [2]:
df_cleaned = pd.read_csv('../data/cleaned_data.csv')

 
# Creating A Data Frame With Detailed Stats

### Calculating Points Secured In Each MatchWeek

In [3]:
teams = sorted(df_cleaned['HomeTeam'].unique())
team_stats_detailed: Dict[str, pd.DataFrame] = {}
for team in teams:
    team_df = df_cleaned[(df_cleaned['HomeTeam'] == team) | (df_cleaned['AwayTeam'] == team)].copy()
    
    #using matchweek as index
    team_df.index = np.arange(1, len(team_df) + 1)
    team_df.index.name = "MatchWeek"        
    
    #calculation of points based on results
    current = 0
    points = []
    total_points = []
    for _, row in team_df.iterrows():
        if row['HomeTeam'] == team:  # Team played at home
            if row['FTR'] == 'H':
                points.append(3)
            elif row['FTR'] == 'D':
                points.append(1)
            else:
                points.append(0)
        else:  # Team played away
            if row['FTR'] == 'A':
                points.append(3)
            elif row['FTR'] == 'D':
                points.append(1)
            else:
                points.append(0)
    
    team_df['Points'] = points
    for num in points:
        current += num
        total_points.append(current)

    team_df['TotalPoints'] = total_points
    team_stats_detailed[team] = team_df   



### Calculating goals scored, conceded and goal difference

In [4]:
#Calculating goals for, goals away and goal difference
for team in team_stats_detailed:
    df = team_stats_detailed[team].copy()
    goals_for, goals_against, gd = [], [], []
    t1, t2, t3 = 0, 0, 0
    for _, row in df.iterrows():
        if(row['HomeTeam'] == team):
            t1 += row['FTHG']
            t2 += row['FTAG']
            t3 = t1 - t2
            goals_for.append(t1)
            goals_against.append(t2)
            gd.append(t3)
        else:
            t1 += row['FTAG']
            t2 += row['FTHG']
            t3 = t1 - t2
            goals_for.append(t1)
            goals_against.append(t2)
            gd.append(t3)
    df.loc[:, 'GF'] = goals_for
    df.loc[:, 'GA'] = goals_against
    df.loc[:, 'GD'] = gd

    team_stats_detailed[team] = df



In [5]:
#saving the dictionary 
with open('../data/team_stats_detailed.pkl', 'wb') as f:
    pk.dump(team_stats_detailed, f)

 
# Creating A Data Frame With a Few Stats
### Contains only date, home/away team, goals and result

In [6]:
#creating the data frame with simple view
team_data_simple: Dict[str, pd.DataFrame] = {}
for team in team_stats_detailed:
    df = team_stats_detailed[team][['Date', 'HomeTeam', 'AwayTeam']].copy()

    result = []
    for _, row in team_stats_detailed[team].iterrows():
        res = f"{row['FTHG']}-{row['FTAG']}"   
        result.append(res)

    df['Final Score'] = result
    team_data_simple[team] = df

#### Gets The Table(detailed/simple) For A Team Based On Team Entered By User

In [7]:
def get_team_data(team : str, simple_view: bool):
    if(simple_view):
        return team_data_simple[team]
    
    return team_stats_detailed[team]

#### Displaying A Simple Table For A Team

In [8]:
#Simple table
df_team_req = get_team_data('Man City', True)  #Example Man City
df_team_req

Unnamed: 0_level_0,Date,HomeTeam,AwayTeam,Final Score
MatchWeek,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2024-08-18,Chelsea,Man City,0-2
2,2024-08-24,Man City,Ipswich,4-1
3,2024-08-31,West Ham,Man City,1-3
4,2024-09-14,Man City,Brentford,2-1
5,2024-09-22,Man City,Arsenal,2-2
6,2024-09-28,Newcastle,Man City,1-1
7,2024-10-05,Man City,Fulham,3-2
8,2024-10-20,Wolves,Man City,1-2
9,2024-10-26,Man City,Southampton,1-0
10,2024-11-02,Bournemouth,Man City,2-1


#### Displaying A Detailed Table For A Team

In [9]:
#Detailed table
df_team_req = get_team_data('Arsenal', False)  #Example Arsenal
df_team_req

Unnamed: 0_level_0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,...,AC,HY,AY,HR,AR,Points,TotalPoints,GF,GA,GD
MatchWeek,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2024-08-17,Arsenal,Wolves,2,0,H,1,0,H,J Gillett,...,2,2,2,0,0,3,3,2,0,2
2,2024-08-24,Aston Villa,Arsenal,0,2,A,0,0,D,M Oliver,...,1,1,3,0,0,3,6,4,0,4
3,2024-08-31,Arsenal,Brighton,1,1,D,1,0,H,C Kavanagh,...,7,3,2,1,0,1,7,5,1,4
4,2024-09-15,Tottenham,Arsenal,0,1,A,0,0,D,J Gillett,...,6,5,3,0,0,3,10,6,1,5
5,2024-09-22,Man City,Arsenal,2,2,D,1,2,A,M Oliver,...,2,3,4,0,1,1,11,8,3,5
6,2024-09-28,Arsenal,Leicester,4,2,H,2,0,H,S Barrott,...,0,2,4,0,0,3,14,12,5,7
7,2024-10-05,Arsenal,Southampton,3,1,H,0,0,D,T Harrington,...,1,0,3,0,0,3,17,15,6,9
8,2024-10-19,Bournemouth,Arsenal,2,0,H,0,0,D,R Jones,...,4,1,1,0,1,0,17,15,8,7
9,2024-10-27,Arsenal,Liverpool,2,2,D,2,1,H,A Taylor,...,3,2,2,0,0,1,18,17,10,7
10,2024-11-02,Newcastle,Arsenal,1,0,H,1,0,H,J Brooks,...,6,4,4,0,0,0,18,17,11,6


# Constructing the final league table

In [10]:
#constructing final league table 
league_table = []
for team in team_stats_detailed:
    df = team_stats_detailed[team].copy()
    tpc = df.loc[df.index.max(), 'TotalPoints']
    gfc = df.loc[df.index.max(), 'GF']
    gac = df.loc[df.index.max(), 'GA']
    gdc = df.loc[df.index.max(), 'GD']

    # Calculate wins, draws, defeats
    wins = ((df['Points'] == 3).sum())
    draws = ((df['Points'] == 1).sum())
    defeats = 38 - wins - draws

    league_table.append({
        'Team': team,
        'W': wins,
        'D': draws,
        'L': defeats,
        'Points': tpc,
        'GF': gfc,
        'GA': gac,
        'GD': gdc
    })

# Create DataFrame and sort by Points, GD, GF
df_table = pd.DataFrame(league_table)
df_table = df_table.sort_values(by=['Points', 'GD', 'GF'], ascending=False).reset_index(drop=True)
df_table['Pos'] = np.arange(1, len(df_table) + 1)
df_table.set_index('Pos', inplace= True)
df_table.to_csv('../data/Final League Table.csv')
df_table

Unnamed: 0_level_0,Team,W,D,L,Points,GF,GA,GD
Pos,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Liverpool,25,9,4,84,86,41,45
2,Arsenal,20,14,4,74,69,34,35
3,Man City,21,8,9,71,72,44,28
4,Chelsea,20,9,9,69,64,43,21
5,Newcastle,20,6,12,66,68,47,21
6,Aston Villa,19,9,10,66,58,51,7
7,Nott'm Forest,19,8,11,65,58,46,12
8,Brighton,16,13,9,61,66,59,7
9,Bournemouth,15,11,12,56,58,46,12
10,Brentford,16,8,14,56,66,57,9
