In [1]:
import requests
import pandas as pd
import re
from bs4 import BeautifulSoup
import time
import datetime
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

In [2]:
matches_23_24 = pd.read_csv("data/teams_pl_23-24_fbref.csv")
matches_22_23 = pd.read_csv("data/teams_pl_22-23_fbref.csv")
matches_21_22 = pd.read_csv("data/teams_pl_21-22_fbref.csv")
matches_20_21 = pd.read_csv("data/teams_pl_20-21_fbref.csv")
matches_19_20 = pd.read_csv("data/teams_pl_19-20_fbref.csv")
matches_18_19 = pd.read_csv("data/teams_pl_18-19_fbref.csv")
matches = pd.concat([matches_23_24, matches_22_23, matches_21_22, matches_20_21, matches_19_20, matches_18_19], ignore_index=True)
stadiums = pd.read_csv("data/stadiums.csv")

In [3]:
matches.head(2)

Unnamed: 0,season,venue_date,venue_time,round,attendance_value,referee,home_manager,away_manager,home_captain,away_captain,...,away_fouls,away_fouled,away_offsides,away_pens_won,away_pens_conceded,away_own_goals,away_ball_recoveries,away_aerials_won,away_aerials_lost,away_aerials_won_pct
0,2023-2024,2023-08-11,20:00,1,21572.0,Craig Pawson,Vincent Kompany,Pep Guardiola,Josh Cullen,Kevin De Bruyne,...,8,11,1,0,0,0,54,13,13,50.0
1,2023-2024,2023-08-12,12:30,1,59984.0,Michael Oliver,Mikel Arteta,Steve Cooper,Martin Ødegaard,Joe Worrall,...,12,12,1,0,0,0,34,20,12,62.5


In [4]:
matches.rename(columns={"venue_date": "date", "venue_time": "time"}, inplace=True)
matches = matches.drop(columns = ["home_shirtnumber", "home_nationality", "home_position", "home_age", "away_shirtnumber", "away_nationality", "away_position", "away_age"])

In [5]:
matches["home_goals"] = matches["home_goals"] + matches["away_own_goals"]
matches["away_goals"] = matches["away_goals"] + matches["home_own_goals"]
matches["total_goals"] = matches["home_goals"] + matches["away_goals"]

#### Add stadium info to every match

In [6]:
matches_with_stadiums = pd.merge(matches, stadiums[stadiums["Closed"].isna()], left_on=['home_team'], right_on=['Club'], how='left').drop(columns=["Closed", "Opened"])
matches_with_stadiums.head(2)

Unnamed: 0,season,date,time,round,attendance_value,referee,home_manager,away_manager,home_captain,away_captain,...,away_aerials_won_pct,total_goals,Stadium,Club,Location,Capacity,Pitch length m,Pitch width m,latitude,longitude
0,2023-2024,2023-08-11,20:00,1,21572.0,Craig Pawson,Vincent Kompany,Pep Guardiola,Josh Cullen,Kevin De Bruyne,...,50.0,3,Turf Moor,Burnley,Burnley,21744.0,105.0,68.0,53°47′21″N,2°13′49″W
1,2023-2024,2023-08-12,12:30,1,59984.0,Michael Oliver,Mikel Arteta,Steve Cooper,Martin Ødegaard,Joe Worrall,...,62.5,3,Emirates Stadium,Arsenal,London,60704.0,105.0,68.0,51°33′18″N,000°06′31″W


#### Add columns with match outcome and points of each team

In [7]:
def determine_outcome_and_points(row):
    if row['home_goals'] > row['away_goals']:
        return 1, 3, 0  # Home win
    elif row['home_goals'] < row['away_goals']:
        return 2, 0, 3  # Away win
    else:
        return 0, 1, 1  # Draw  
    
matches_with_stadiums[['outcome', 'home_points', 'away_points']] = matches_with_stadiums.apply(lambda row: pd.Series(determine_outcome_and_points(row)), axis=1)

In [8]:
pd.set_option('display.max_columns', None)
matches_with_stadiums.head(2)

Unnamed: 0,season,date,time,round,attendance_value,referee,home_manager,away_manager,home_captain,away_captain,formation_home,formation_away,home_team,away_team,home_minutes,home_goals,home_assists,home_pens_made,home_pens_att,home_shots,home_shots_on_target,home_cards_yellow,home_cards_red,home_touches,home_tackles,home_interceptions,home_blocks,home_xg,home_npxg,home_xg_assist,home_sca,home_gca,home_passes_completed,home_passes,home_passes_pct,home_progressive_passes,home_carries,home_progressive_carries,home_take_ons,home_take_ons_won,home_passes_total_distance,home_passes_progressive_distance,home_passes_completed_short,home_passes_short,home_passes_pct_short,home_passes_completed_medium,home_passes_medium,home_passes_pct_medium,home_passes_completed_long,home_passes_long,home_passes_pct_long,home_pass_xa,home_assisted_shots,home_passes_into_final_third,home_passes_into_penalty_area,home_crosses_into_penalty_area,home_passes_live,home_passes_dead,home_passes_free_kicks,home_through_balls,home_passes_switches,home_crosses,home_throw_ins,home_corner_kicks,home_corner_kicks_in,home_corner_kicks_out,home_corner_kicks_straight,home_passes_offsides,home_passes_blocked,home_tackles_won,home_tackles_def_3rd,home_tackles_mid_3rd,home_tackles_att_3rd,home_challenge_tackles,home_challenges,home_challenge_tackles_pct,home_challenges_lost,home_blocked_shots,home_blocked_passes,home_tackles_interceptions,home_clearances,home_errors,home_touches_def_pen_area,home_touches_def_3rd,home_touches_mid_3rd,home_touches_att_3rd,home_touches_att_pen_area,home_touches_live_ball,home_take_ons_won_pct,home_take_ons_tackled,home_take_ons_tackled_pct,home_carries_distance,home_carries_progressive_distance,home_carries_into_final_third,home_carries_into_penalty_area,home_miscontrols,home_dispossessed,home_passes_received,home_progressive_passes_received,home_cards_yellow_red,home_fouls,home_fouled,home_offsides,home_pens_won,home_pens_conceded,home_own_goals,home_ball_recoveries,home_aerials_won,home_aerials_lost,home_aerials_won_pct,away_minutes,away_goals,away_assists,away_pens_made,away_pens_att,away_shots,away_shots_on_target,away_cards_yellow,away_cards_red,away_touches,away_tackles,away_interceptions,away_blocks,away_xg,away_npxg,away_xg_assist,away_sca,away_gca,away_passes_completed,away_passes,away_passes_pct,away_progressive_passes,away_carries,away_progressive_carries,away_take_ons,away_take_ons_won,away_passes_total_distance,away_passes_progressive_distance,away_passes_completed_short,away_passes_short,away_passes_pct_short,away_passes_completed_medium,away_passes_medium,away_passes_pct_medium,away_passes_completed_long,away_passes_long,away_passes_pct_long,away_pass_xa,away_assisted_shots,away_passes_into_final_third,away_passes_into_penalty_area,away_crosses_into_penalty_area,away_passes_live,away_passes_dead,away_passes_free_kicks,away_through_balls,away_passes_switches,away_crosses,away_throw_ins,away_corner_kicks,away_corner_kicks_in,away_corner_kicks_out,away_corner_kicks_straight,away_passes_offsides,away_passes_blocked,away_tackles_won,away_tackles_def_3rd,away_tackles_mid_3rd,away_tackles_att_3rd,away_challenge_tackles,away_challenges,away_challenge_tackles_pct,away_challenges_lost,away_blocked_shots,away_blocked_passes,away_tackles_interceptions,away_clearances,away_errors,away_touches_def_pen_area,away_touches_def_3rd,away_touches_mid_3rd,away_touches_att_3rd,away_touches_att_pen_area,away_touches_live_ball,away_take_ons_won_pct,away_take_ons_tackled,away_take_ons_tackled_pct,away_carries_distance,away_carries_progressive_distance,away_carries_into_final_third,away_carries_into_penalty_area,away_miscontrols,away_dispossessed,away_passes_received,away_progressive_passes_received,away_cards_yellow_red,away_fouls,away_fouled,away_offsides,away_pens_won,away_pens_conceded,away_own_goals,away_ball_recoveries,away_aerials_won,away_aerials_lost,away_aerials_won_pct,total_goals,Stadium,Club,Location,Capacity,Pitch length m,Pitch width m,latitude,longitude,outcome,home_points,away_points
0,2023-2024,2023-08-11,20:00,1,21572.0,Craig Pawson,Vincent Kompany,Pep Guardiola,Josh Cullen,Kevin De Bruyne,5-4-1,4-2-3-1,Burnley,Manchester City,989,0,0,0,0,6,1,0,1,496,12,7,12,0.3,0.3,0.3,10,0,313,395,79.2,18,225,10,15,3,5807,1910,133,148,89.9,134,151,88.7,40,74,54.1,0.2,5,14,3,0,356,39,9,0,4,7,15,6,2,3,0,0,8,8,5,5,2,3,6,50.0,3,5,7,19,16,0,89,224,192,81,14,496,20.0,12,80.0,1181,508,9,3,13,5,313,18,0,11,8,0,0,0,0,45,13,13,50.0,990,3,2,0,0,17,8,0,0,835,17,4,9,2.1,2.1,1.3,29,5,655,740,88.5,28,558,12,10,3,10790,3194,325,339,95.9,252,277,91.0,51,84,60.7,1.3,14,42,10,2,703,36,10,7,1,15,13,5,0,4,0,1,7,12,10,5,2,12,15,80.0,3,2,7,21,7,2,75,267,412,158,23,835,30.0,3,30.0,2724,1240,13,3,9,9,632,28,0,8,11,1,0,0,0,54,13,13,50.0,3,Turf Moor,Burnley,Burnley,21744.0,105.0,68.0,53°47′21″N,2°13′49″W,2,0,3
1,2023-2024,2023-08-12,12:30,1,59984.0,Michael Oliver,Mikel Arteta,Steve Cooper,Martin Ødegaard,Joe Worrall,4-3-3,3-4-3,Arsenal,Nottingham Forest,990,2,2,0,0,15,7,2,0,902,19,7,6,0.8,0.8,0.6,30,4,716,807,88.7,58,706,34,15,4,11400,3204,367,393,93.4,306,334,91.6,35,55,63.6,0.9,13,61,13,1,756,49,13,5,1,14,18,8,8,0,0,2,7,15,4,12,3,8,15,53.3,7,1,5,26,9,0,44,185,485,238,30,902,26.7,8,53.3,3482,1908,24,6,10,9,712,58,0,12,12,2,0,0,0,48,12,20,37.5,990,1,1,0,0,6,2,2,0,336,17,8,10,1.2,1.2,1.2,10,2,154,228,67.5,14,159,11,18,7,3239,1786,57,76,75.0,68,83,81.9,25,53,47.2,0.9,5,15,5,0,193,34,14,1,0,6,9,3,2,1,0,1,5,10,13,3,1,8,12,66.7,4,5,5,25,18,0,61,160,126,54,12,336,38.9,8,44.4,959,579,8,2,11,11,151,14,0,12,12,1,0,0,0,34,20,12,62.5,3,Emirates Stadium,Arsenal,London,60704.0,105.0,68.0,51°33′18″N,000°06′31″W,1,3,0


### Create dynamic table updated after every match played

In [18]:
cols = matches_with_stadiums.columns
attributes = []
for col in cols:
    if col.startswith("home_"):
        attributes.append(col.replace("home_", ""))
attributes = attributes[3:] # delete team, captain and manager attributes
# attributes consists of only number columns - not nominal columns

In [19]:
standings_columns = ['matches_played', 'wins', 'draws', 'defeats', 'goal_difference', 'goals_conceded'] + attributes
standings = pd.DataFrame(columns=['season', 'team'] + standings_columns)
matches_sorted = matches_with_stadiums.sort_values(by=['season', 'date'])

In [24]:
for season in matches_sorted['season'].unique():
    season_data = matches_sorted[matches_sorted['season'] == season]
    teams = season_data['home_team'].unique()
    
    standings_dict = {team: {attribute: 0 for attribute in standings_columns} for team in teams}

    # tu można dodać punkty karne, z którymi drużyny zaczynają sezon, bardzo niewiele to zmieni ale jednak w teorii dokładniejszy model!!!!!!
    
    for dt in season_data['date'].unique():
        round_data = season_data[season_data['date'] == dt]
        
        for index, row in round_data.iterrows():
            home_team = row['home_team']
            away_team = row['away_team']
            home_goals = row['home_goals']
            away_goals = row['away_goals']
            home_points = row['home_points']
            away_points = row['away_points']
            outcome = row['outcome']
            
            # Update home team standings
            standings_dict[home_team]['matches_played'] += 1
            standings_dict[home_team]['goals_conceded'] += away_goals
            
            for attr in attributes:
                standings_dict[home_team][attr] += row["home_" + attr]

            standings_dict[home_team]['goal_difference'] = standings_dict[home_team]['goals'] - standings_dict[home_team]['goals_conceded']
            
            # Update away team standings
            standings_dict[away_team]['matches_played'] += 1
            standings_dict[away_team]['goals_conceded'] += home_goals
            
            for attr in attributes:
                standings_dict[away_team][attr] += row["away_" + attr]

            standings_dict[away_team]['goal_difference'] = standings_dict[away_team]['goals'] - standings_dict[away_team]['goals_conceded']

            # Update W L D
            if outcome == 1:
                standings_dict[home_team]['wins'] += 1
                standings_dict[away_team]['defeats'] += 1
            elif outcome == 0:
                standings_dict[home_team]['draws'] += 1
                standings_dict[away_team]['draws'] += 1
            else:
                standings_dict[home_team]['defeats'] += 1
                standings_dict[away_team]['wins'] += 1

        temp_standings_data = {
        'season': season,
        'date': dt,
        'team': list(standings_dict.keys()),
        'matches_played': [standings_dict[team]['matches_played'] for team in standings_dict],
        'goals_conceded': [standings_dict[team]['goals_conceded'] for team in standings_dict],
        'goal_difference': [standings_dict[team]['goal_difference'] for team in standings_dict],
        'wins': [standings_dict[team]['wins'] for team in standings_dict],
        'draws': [standings_dict[team]['draws'] for team in standings_dict],
        'defeats': [standings_dict[team]['defeats'] for team in standings_dict]}


        for attr in attributes:
            temp_standings_data[attr] = [standings_dict[team][attr] for team in standings_dict]

        temp_standings = pd.DataFrame(temp_standings_data)

        standings = pd.concat([standings, temp_standings])
standings = standings.sort_values(by=['season', 'date', 'points', 'goal_difference', 'goals'], ascending=[True, True, False, False, False])
standings.reset_index(drop=True, inplace=True)

In [25]:
standings[standings["matches_played"] == 38].head(2)

Unnamed: 0,season,team,matches_played,wins,draws,defeats,goal_difference,goals_conceded,minutes,goals,assists,pens_made,pens_att,shots,shots_on_target,cards_yellow,cards_red,touches,tackles,interceptions,blocks,xg,npxg,xg_assist,sca,gca,passes_completed,passes,passes_pct,progressive_passes,carries,progressive_carries,take_ons,take_ons_won,passes_total_distance,passes_progressive_distance,passes_completed_short,passes_short,passes_pct_short,passes_completed_medium,passes_medium,passes_pct_medium,passes_completed_long,passes_long,passes_pct_long,pass_xa,assisted_shots,passes_into_final_third,passes_into_penalty_area,crosses_into_penalty_area,passes_live,passes_dead,passes_free_kicks,through_balls,passes_switches,crosses,throw_ins,corner_kicks,corner_kicks_in,corner_kicks_out,corner_kicks_straight,passes_offsides,passes_blocked,tackles_won,tackles_def_3rd,tackles_mid_3rd,tackles_att_3rd,challenge_tackles,challenges,challenge_tackles_pct,challenges_lost,blocked_shots,blocked_passes,tackles_interceptions,clearances,errors,touches_def_pen_area,touches_def_3rd,touches_mid_3rd,touches_att_3rd,touches_att_pen_area,touches_live_ball,take_ons_won_pct,take_ons_tackled,take_ons_tackled_pct,carries_distance,carries_progressive_distance,carries_into_final_third,carries_into_penalty_area,miscontrols,dispossessed,passes_received,progressive_passes_received,cards_yellow_red,fouls,fouled,offsides,pens_won,pens_conceded,own_goals,ball_recoveries,aerials_won,aerials_lost,aerials_won_pct,points,date
4280,2018-2019,Manchester City,38,32,2,4,72,23,37618,95,71,3,4,679,252,44,1,32238,518,362,319,85.7,82.7,65.8,1216,168,24678,28389,3293.1,2429,22457,1325,734,437,395041,113549,12249,13188,3521.6,9318,10379,3405.4,1907,2834,2555.4,63.4,516,1948,554,62,26660,1631,344,112,226,783,739,298,85,121,1,98,384,327,214,215,89,193,467,1565.9,274,63,256,880,570,15,1825,6098,16583,9800,1458,32234,2252.0,296,1544.1,118990,68197,1017,366,509,393,24519,2412,0,328,301,98,4,4,0,2000,540,522,1958.8,98,2019-05-12
4281,2018-2019,Manchester City,38,32,2,4,72,23,37618,95,71,3,4,679,252,44,1,32238,518,362,319,85.7,82.7,65.8,1216,168,24678,28389,3293.1,2429,22457,1325,734,437,395041,113549,12249,13188,3521.6,9318,10379,3405.4,1907,2834,2555.4,63.4,516,1948,554,62,26660,1631,344,112,226,783,739,298,85,121,1,98,384,327,214,215,89,193,467,1565.9,274,63,256,880,570,15,1825,6098,16583,9800,1458,32234,2252.0,296,1544.1,118990,68197,1017,366,509,393,24519,2412,0,328,301,98,4,4,0,2000,540,522,1958.8,98,2019-05-12


In [34]:
matches_filtered = matches_with_stadiums[matches_with_stadiums['round'] > 3]
df = matches_filtered.copy()
df.head(2)

Unnamed: 0,season,date,time,round,attendance_value,referee,home_manager,away_manager,home_captain,away_captain,formation_home,formation_away,home_team,away_team,home_minutes,home_goals,home_assists,home_pens_made,home_pens_att,home_shots,home_shots_on_target,home_cards_yellow,home_cards_red,home_touches,home_tackles,home_interceptions,home_blocks,home_xg,home_npxg,home_xg_assist,home_sca,home_gca,home_passes_completed,home_passes,home_passes_pct,home_progressive_passes,home_carries,home_progressive_carries,home_take_ons,home_take_ons_won,home_passes_total_distance,home_passes_progressive_distance,home_passes_completed_short,home_passes_short,home_passes_pct_short,home_passes_completed_medium,home_passes_medium,home_passes_pct_medium,home_passes_completed_long,home_passes_long,home_passes_pct_long,home_pass_xa,home_assisted_shots,home_passes_into_final_third,home_passes_into_penalty_area,home_crosses_into_penalty_area,home_passes_live,home_passes_dead,home_passes_free_kicks,home_through_balls,home_passes_switches,home_crosses,home_throw_ins,home_corner_kicks,home_corner_kicks_in,home_corner_kicks_out,home_corner_kicks_straight,home_passes_offsides,home_passes_blocked,home_tackles_won,home_tackles_def_3rd,home_tackles_mid_3rd,home_tackles_att_3rd,home_challenge_tackles,home_challenges,home_challenge_tackles_pct,home_challenges_lost,home_blocked_shots,home_blocked_passes,home_tackles_interceptions,home_clearances,home_errors,home_touches_def_pen_area,home_touches_def_3rd,home_touches_mid_3rd,home_touches_att_3rd,home_touches_att_pen_area,home_touches_live_ball,home_take_ons_won_pct,home_take_ons_tackled,home_take_ons_tackled_pct,home_carries_distance,home_carries_progressive_distance,home_carries_into_final_third,home_carries_into_penalty_area,home_miscontrols,home_dispossessed,home_passes_received,home_progressive_passes_received,home_cards_yellow_red,home_fouls,home_fouled,home_offsides,home_pens_won,home_pens_conceded,home_own_goals,home_ball_recoveries,home_aerials_won,home_aerials_lost,home_aerials_won_pct,away_minutes,away_goals,away_assists,away_pens_made,away_pens_att,away_shots,away_shots_on_target,away_cards_yellow,away_cards_red,away_touches,away_tackles,away_interceptions,away_blocks,away_xg,away_npxg,away_xg_assist,away_sca,away_gca,away_passes_completed,away_passes,away_passes_pct,away_progressive_passes,away_carries,away_progressive_carries,away_take_ons,away_take_ons_won,away_passes_total_distance,away_passes_progressive_distance,away_passes_completed_short,away_passes_short,away_passes_pct_short,away_passes_completed_medium,away_passes_medium,away_passes_pct_medium,away_passes_completed_long,away_passes_long,away_passes_pct_long,away_pass_xa,away_assisted_shots,away_passes_into_final_third,away_passes_into_penalty_area,away_crosses_into_penalty_area,away_passes_live,away_passes_dead,away_passes_free_kicks,away_through_balls,away_passes_switches,away_crosses,away_throw_ins,away_corner_kicks,away_corner_kicks_in,away_corner_kicks_out,away_corner_kicks_straight,away_passes_offsides,away_passes_blocked,away_tackles_won,away_tackles_def_3rd,away_tackles_mid_3rd,away_tackles_att_3rd,away_challenge_tackles,away_challenges,away_challenge_tackles_pct,away_challenges_lost,away_blocked_shots,away_blocked_passes,away_tackles_interceptions,away_clearances,away_errors,away_touches_def_pen_area,away_touches_def_3rd,away_touches_mid_3rd,away_touches_att_3rd,away_touches_att_pen_area,away_touches_live_ball,away_take_ons_won_pct,away_take_ons_tackled,away_take_ons_tackled_pct,away_carries_distance,away_carries_progressive_distance,away_carries_into_final_third,away_carries_into_penalty_area,away_miscontrols,away_dispossessed,away_passes_received,away_progressive_passes_received,away_cards_yellow_red,away_fouls,away_fouled,away_offsides,away_pens_won,away_pens_conceded,away_own_goals,away_ball_recoveries,away_aerials_won,away_aerials_lost,away_aerials_won_pct,total_goals,Stadium,Club,Location,Capacity,Pitch length m,Pitch width m,latitude,longitude,outcome,home_points,away_points
29,2023-2024,2023-09-01,20:00,4,10802.0,Paul Tierney,Rob Edwards,David Moyes,Carlton Morris,Kurt Zouma,5-3-2,4-2-3-1,Luton Town,West Ham United,990,1,1,0,0,16,1,1,0,547,14,7,14,1.4,1.4,1.1,29,2,295,419,70.4,37,219,12,24,9,5318,2199,146,171,85.4,108,137,78.8,35,90,38.9,1.5,12,27,10,5,361,58,18,0,2,31,21,9,6,1,2,0,10,12,8,3,3,8,16,50.0,8,2,12,21,25,0,60,158,206,188,30,547,37.5,14,58.3,1299,528,10,5,17,8,294,37,0,8,11,0,0,0,0,60,20,19,51.3,990,2,2,0,0,9,3,1,0,780,22,9,12,1.0,1.0,0.8,18,4,526,655,80.3,43,443,14,17,8,8895,2848,242,262,92.4,213,248,85.9,46,89,51.7,0.7,5,43,7,0,602,47,8,6,7,15,20,6,4,2,0,6,12,14,13,6,3,14,23,60.9,9,5,7,31,34,0,68,241,359,183,22,780,47.1,8,47.1,2194,1051,14,5,20,6,524,43,0,13,7,6,0,0,0,64,19,20,48.7,3,Kenilworth Road,Luton Town,Luton,10265.0,100.6,65.8,51°53′03″N,0°25′54″W,2,0,3
30,2023-2024,2023-09-02,12:30,4,31124.0,Andy Madley,Paul Heckingbottom,Sean Dyche,John Egan,James Tarkowski,3-5-2,4-4-1-1,Sheffield United,Everton,990,2,1,0,0,13,7,1,0,563,16,11,20,1.2,1.2,0.6,20,2,317,435,72.9,27,322,6,19,7,5283,2492,163,188,86.7,103,128,80.5,36,84,42.9,0.5,7,21,8,2,378,56,15,0,2,14,27,4,1,2,0,1,14,9,9,6,1,9,19,47.4,10,6,14,27,39,1,80,256,206,106,26,563,36.8,9,47.4,1343,583,6,2,10,14,314,27,0,11,13,1,0,0,0,51,18,19,48.6,990,2,1,0,0,16,6,2,0,635,23,7,16,2.6,2.6,2.1,25,3,383,518,73.9,41,388,22,20,10,6682,2601,182,207,87.9,147,187,78.6,43,87,49.4,0.8,10,32,5,1,469,47,10,1,4,24,26,6,2,3,1,2,15,18,13,9,1,9,16,56.3,7,2,14,30,18,0,52,174,301,163,24,635,50.0,9,45.0,1863,1028,16,8,9,7,380,41,0,13,11,2,0,0,1,59,19,18,51.4,4,Bramall Lane,Sheffield United,Sheffield,32050.0,101.0,68.0,53°22′13″N,001°28′15″W,0,1,1


### Liczenie agregacji statystyk na bazie tabeli

In [35]:
# Function to get team stats
def get_team_stats(season, team, matches_played):
    return standings[(standings["season"] == season) & (standings["team"] == team) & (standings["matches_played"] == matches_played)].iloc[0]


def calculate_rolling_stats(n, stats, stats_old, matches_played, real_matches_played):
    rolling_stats = {}
    for feature in standings_columns:
        if matches_played == 0:
            rolling_stats[feature] = stats[feature]
            if real_matches_played != 0:
                rolling_stats[feature] = rolling_stats[feature] / real_matches_played
        else:
            rolling_stats[feature] = stats[feature] - stats_old[feature]
            rolling_stats[feature] = rolling_stats[feature] / n
    return rolling_stats

def create_rolling_stats(n, df):
    for index, row in df.iterrows():
        date = row["date"]
        season = row["season"]
        home_team = row["home_team"]
        away_team = row["away_team"]

        home_team_matches_played = standings[(standings["date"] == date) & (standings["team"] == home_team)]["matches_played"].values[0] - 1
        away_team_matches_played = standings[(standings["date"] == date) & (standings["team"] == away_team)]["matches_played"].values[0] - 1

        home_team_stats = get_team_stats(season, home_team, home_team_matches_played)
        away_team_stats = get_team_stats(season, away_team, away_team_matches_played)

        home_team_matches_played_old = max(0, home_team_matches_played - n)
        away_team_matches_played_old = max(0, away_team_matches_played - n)

        if home_team_matches_played_old > 0:
            home_team_stats_old = get_team_stats(season, home_team, home_team_matches_played_old)
        if away_team_matches_played_old > 0:
            away_team_stats_old = get_team_stats(season, away_team, away_team_matches_played_old)

        home_rolling_stats = calculate_rolling_stats(n, home_team_stats, home_team_stats_old if home_team_matches_played_old > 0 else None, home_team_matches_played_old, home_team_matches_played)
        away_rolling_stats = calculate_rolling_stats(n, away_team_stats, away_team_stats_old if away_team_matches_played_old > 0 else None, away_team_matches_played_old, away_team_matches_played)

        for feature, value in home_rolling_stats.items():
            df.at[index, f"home_last{n}_{feature}"] = value
        for feature, value in away_rolling_stats.items():
            df.at[index, f"away_last{n}_{feature}"] = value

        for feature in ['wins', 'draws', 'defeats', 'goal_difference', 'goals_conceded']:
            if feature in standings_columns:
                df.at[index, f"home_last{n}_{feature}"] = home_team_stats[feature]
                df.at[index, f"away_last{n}_{feature}"] = away_team_stats[feature]
                if home_team_matches_played > 0:
                    df.at[index, f"home_last{n}_{feature}"] = home_team_stats[feature] / home_team_matches_played
                if away_team_matches_played > 0:
                    df.at[index, f"away_last{n}_{feature}"] = away_team_stats[feature] / away_team_matches_played

In [38]:
pd.options.mode.chained_assignment = None
create_rolling_stats(n = 5, df = df)

In [39]:
df.head(2)

Unnamed: 0,season,date,time,round,attendance_value,referee,home_manager,away_manager,home_captain,away_captain,formation_home,formation_away,home_team,away_team,home_minutes,home_goals,home_assists,home_pens_made,home_pens_att,home_shots,home_shots_on_target,home_cards_yellow,home_cards_red,home_touches,home_tackles,home_interceptions,home_blocks,home_xg,home_npxg,home_xg_assist,home_sca,home_gca,home_passes_completed,home_passes,home_passes_pct,home_progressive_passes,home_carries,home_progressive_carries,home_take_ons,home_take_ons_won,home_passes_total_distance,home_passes_progressive_distance,home_passes_completed_short,home_passes_short,home_passes_pct_short,home_passes_completed_medium,home_passes_medium,home_passes_pct_medium,home_passes_completed_long,home_passes_long,home_passes_pct_long,home_pass_xa,home_assisted_shots,home_passes_into_final_third,home_passes_into_penalty_area,home_crosses_into_penalty_area,home_passes_live,home_passes_dead,home_passes_free_kicks,home_through_balls,home_passes_switches,home_crosses,home_throw_ins,home_corner_kicks,home_corner_kicks_in,home_corner_kicks_out,home_corner_kicks_straight,home_passes_offsides,home_passes_blocked,home_tackles_won,home_tackles_def_3rd,home_tackles_mid_3rd,home_tackles_att_3rd,home_challenge_tackles,home_challenges,home_challenge_tackles_pct,home_challenges_lost,home_blocked_shots,home_blocked_passes,home_tackles_interceptions,home_clearances,home_errors,home_touches_def_pen_area,home_touches_def_3rd,home_touches_mid_3rd,home_touches_att_3rd,home_touches_att_pen_area,home_touches_live_ball,home_take_ons_won_pct,home_take_ons_tackled,home_take_ons_tackled_pct,home_carries_distance,home_carries_progressive_distance,home_carries_into_final_third,home_carries_into_penalty_area,home_miscontrols,home_dispossessed,home_passes_received,home_progressive_passes_received,home_cards_yellow_red,home_fouls,home_fouled,home_offsides,home_pens_won,home_pens_conceded,home_own_goals,home_ball_recoveries,home_aerials_won,home_aerials_lost,home_aerials_won_pct,away_minutes,away_goals,away_assists,away_pens_made,away_pens_att,away_shots,away_shots_on_target,away_cards_yellow,away_cards_red,away_touches,away_tackles,away_interceptions,away_blocks,away_xg,away_npxg,away_xg_assist,away_sca,away_gca,away_passes_completed,away_passes,away_passes_pct,away_progressive_passes,away_carries,away_progressive_carries,away_take_ons,away_take_ons_won,away_passes_total_distance,away_passes_progressive_distance,away_passes_completed_short,away_passes_short,away_passes_pct_short,away_passes_completed_medium,away_passes_medium,away_passes_pct_medium,away_passes_completed_long,away_passes_long,away_passes_pct_long,away_pass_xa,away_assisted_shots,away_passes_into_final_third,away_passes_into_penalty_area,away_crosses_into_penalty_area,away_passes_live,away_passes_dead,away_passes_free_kicks,away_through_balls,away_passes_switches,away_crosses,away_throw_ins,away_corner_kicks,away_corner_kicks_in,away_corner_kicks_out,away_corner_kicks_straight,away_passes_offsides,away_passes_blocked,away_tackles_won,away_tackles_def_3rd,away_tackles_mid_3rd,away_tackles_att_3rd,away_challenge_tackles,away_challenges,away_challenge_tackles_pct,away_challenges_lost,away_blocked_shots,away_blocked_passes,away_tackles_interceptions,away_clearances,away_errors,away_touches_def_pen_area,away_touches_def_3rd,away_touches_mid_3rd,away_touches_att_3rd,away_touches_att_pen_area,away_touches_live_ball,away_take_ons_won_pct,away_take_ons_tackled,away_take_ons_tackled_pct,away_carries_distance,away_carries_progressive_distance,away_carries_into_final_third,away_carries_into_penalty_area,away_miscontrols,away_dispossessed,away_passes_received,away_progressive_passes_received,away_cards_yellow_red,away_fouls,away_fouled,away_offsides,away_pens_won,away_pens_conceded,away_own_goals,away_ball_recoveries,away_aerials_won,away_aerials_lost,away_aerials_won_pct,total_goals,Stadium,Club,Location,Capacity,Pitch length m,Pitch width m,latitude,longitude,outcome,home_points,away_points,home_last5_matches_played,home_last5_wins,home_last5_draws,home_last5_defeats,home_last5_goal_difference,home_last5_goals_conceded,home_last5_minutes,home_last5_goals,home_last5_assists,home_last5_pens_made,home_last5_pens_att,home_last5_shots,home_last5_shots_on_target,home_last5_cards_yellow,home_last5_cards_red,home_last5_touches,home_last5_tackles,home_last5_interceptions,home_last5_blocks,home_last5_xg,home_last5_npxg,home_last5_xg_assist,home_last5_sca,home_last5_gca,home_last5_passes_completed,home_last5_passes,home_last5_passes_pct,home_last5_progressive_passes,home_last5_carries,home_last5_progressive_carries,home_last5_take_ons,home_last5_take_ons_won,home_last5_passes_total_distance,home_last5_passes_progressive_distance,home_last5_passes_completed_short,home_last5_passes_short,home_last5_passes_pct_short,home_last5_passes_completed_medium,home_last5_passes_medium,home_last5_passes_pct_medium,home_last5_passes_completed_long,home_last5_passes_long,home_last5_passes_pct_long,home_last5_pass_xa,home_last5_assisted_shots,home_last5_passes_into_final_third,home_last5_passes_into_penalty_area,home_last5_crosses_into_penalty_area,home_last5_passes_live,home_last5_passes_dead,home_last5_passes_free_kicks,home_last5_through_balls,home_last5_passes_switches,home_last5_crosses,home_last5_throw_ins,home_last5_corner_kicks,home_last5_corner_kicks_in,home_last5_corner_kicks_out,home_last5_corner_kicks_straight,home_last5_passes_offsides,home_last5_passes_blocked,home_last5_tackles_won,home_last5_tackles_def_3rd,home_last5_tackles_mid_3rd,home_last5_tackles_att_3rd,home_last5_challenge_tackles,home_last5_challenges,home_last5_challenge_tackles_pct,home_last5_challenges_lost,home_last5_blocked_shots,home_last5_blocked_passes,home_last5_tackles_interceptions,home_last5_clearances,home_last5_errors,home_last5_touches_def_pen_area,home_last5_touches_def_3rd,home_last5_touches_mid_3rd,home_last5_touches_att_3rd,home_last5_touches_att_pen_area,home_last5_touches_live_ball,home_last5_take_ons_won_pct,home_last5_take_ons_tackled,home_last5_take_ons_tackled_pct,home_last5_carries_distance,home_last5_carries_progressive_distance,home_last5_carries_into_final_third,home_last5_carries_into_penalty_area,home_last5_miscontrols,home_last5_dispossessed,home_last5_passes_received,home_last5_progressive_passes_received,home_last5_cards_yellow_red,home_last5_fouls,home_last5_fouled,home_last5_offsides,home_last5_pens_won,home_last5_pens_conceded,home_last5_own_goals,home_last5_ball_recoveries,home_last5_aerials_won,home_last5_aerials_lost,home_last5_aerials_won_pct,home_last5_points,away_last5_matches_played,away_last5_wins,away_last5_draws,away_last5_defeats,away_last5_goal_difference,away_last5_goals_conceded,away_last5_minutes,away_last5_goals,away_last5_assists,away_last5_pens_made,away_last5_pens_att,away_last5_shots,away_last5_shots_on_target,away_last5_cards_yellow,away_last5_cards_red,away_last5_touches,away_last5_tackles,away_last5_interceptions,away_last5_blocks,away_last5_xg,away_last5_npxg,away_last5_xg_assist,away_last5_sca,away_last5_gca,away_last5_passes_completed,away_last5_passes,away_last5_passes_pct,away_last5_progressive_passes,away_last5_carries,away_last5_progressive_carries,away_last5_take_ons,away_last5_take_ons_won,away_last5_passes_total_distance,away_last5_passes_progressive_distance,away_last5_passes_completed_short,away_last5_passes_short,away_last5_passes_pct_short,away_last5_passes_completed_medium,away_last5_passes_medium,away_last5_passes_pct_medium,away_last5_passes_completed_long,away_last5_passes_long,away_last5_passes_pct_long,away_last5_pass_xa,away_last5_assisted_shots,away_last5_passes_into_final_third,away_last5_passes_into_penalty_area,away_last5_crosses_into_penalty_area,away_last5_passes_live,away_last5_passes_dead,away_last5_passes_free_kicks,away_last5_through_balls,away_last5_passes_switches,away_last5_crosses,away_last5_throw_ins,away_last5_corner_kicks,away_last5_corner_kicks_in,away_last5_corner_kicks_out,away_last5_corner_kicks_straight,away_last5_passes_offsides,away_last5_passes_blocked,away_last5_tackles_won,away_last5_tackles_def_3rd,away_last5_tackles_mid_3rd,away_last5_tackles_att_3rd,away_last5_challenge_tackles,away_last5_challenges,away_last5_challenge_tackles_pct,away_last5_challenges_lost,away_last5_blocked_shots,away_last5_blocked_passes,away_last5_tackles_interceptions,away_last5_clearances,away_last5_errors,away_last5_touches_def_pen_area,away_last5_touches_def_3rd,away_last5_touches_mid_3rd,away_last5_touches_att_3rd,away_last5_touches_att_pen_area,away_last5_touches_live_ball,away_last5_take_ons_won_pct,away_last5_take_ons_tackled,away_last5_take_ons_tackled_pct,away_last5_carries_distance,away_last5_carries_progressive_distance,away_last5_carries_into_final_third,away_last5_carries_into_penalty_area,away_last5_miscontrols,away_last5_dispossessed,away_last5_passes_received,away_last5_progressive_passes_received,away_last5_cards_yellow_red,away_last5_fouls,away_last5_fouled,away_last5_offsides,away_last5_pens_won,away_last5_pens_conceded,away_last5_own_goals,away_last5_ball_recoveries,away_last5_aerials_won,away_last5_aerials_lost,away_last5_aerials_won_pct,away_last5_points
29,2023-2024,2023-09-01,20:00,4,10802.0,Paul Tierney,Rob Edwards,David Moyes,Carlton Morris,Kurt Zouma,5-3-2,4-2-3-1,Luton Town,West Ham United,990,1,1,0,0,16,1,1,0,547,14,7,14,1.4,1.4,1.1,29,2,295,419,70.4,37,219,12,24,9,5318,2199,146,171,85.4,108,137,78.8,35,90,38.9,1.5,12,27,10,5,361,58,18,0,2,31,21,9,6,1,2,0,10,12,8,3,3,8,16,50.0,8,2,12,21,25,0,60,158,206,188,30,547,37.5,14,58.3,1299,528,10,5,17,8,294,37,0,8,11,0,0,0,0,60,20,19,51.3,990,2,2,0,0,9,3,1,0,780,22,9,12,1.0,1.0,0.8,18,4,526,655,80.3,43,443,14,17,8,8895,2848,242,262,92.4,213,248,85.9,46,89,51.7,0.7,5,43,7,0,602,47,8,6,7,15,20,6,4,2,0,6,12,14,13,6,3,14,23,60.9,9,5,7,31,34,0,68,241,359,183,22,780,47.1,8,47.1,2194,1051,14,5,20,6,524,43,0,13,7,6,0,0,0,64,19,20,48.7,3,Kenilworth Road,Luton Town,Luton,10265.0,100.6,65.8,51°53′03″N,0°25′54″W,2,0,3,1.0,0.0,0.0,1.0,-3.0,3.5,990.0,0.5,0.0,0.5,0.5,9.5,1.5,2.5,0.0,411.0,17.5,7.0,8.5,0.95,0.55,0.45,18.0,0.5,222.5,307.0,72.1,28.5,224.0,10.5,11.5,4.0,4397.0,1863.5,94.5,112.5,83.4,94.5,108.5,86.3,30.5,71.0,41.95,0.8,6.5,21.5,4.0,1.5,263.0,42.5,12.5,0.0,4.5,20.0,10.5,5.5,1.5,4.0,0.0,1.5,6.5,11.0,10.0,5.5,2.0,8.0,14.5,55.0,6.5,5.0,3.5,24.5,23.5,1.0,72.0,156.5,160.0,100.5,18.0,410.5,33.7,5.5,48.45,1363.5,655.0,8.5,4.5,10.5,6.0,220.0,28.5,0.0,12.0,12.0,1.5,0.0,0.5,0.0,44.0,13.5,13.0,46.8,0.0,1.0,0.666667,0.333333,0.0,1.333333,1.0,982.0,2.333333,1.666667,0.333333,0.333333,13.0,4.666667,3.666667,0.333333,422.0,19.666667,14.333333,14.333333,1.966667,1.7,1.2,22.666667,4.333333,188.0,280.333333,66.933333,22.666667,173.333333,11.333333,17.0,8.0,3397.666667,1739.333333,87.333333,106.666667,82.0,62.666667,82.666667,75.466667,27.666667,63.0,43.733333,0.9,10.333333,14.666667,6.0,1.333333,237.0,41.0,9.0,2.333333,3.0,11.0,15.333333,3.666667,3.0,0.666667,0.0,2.333333,9.666667,12.0,11.333333,6.666667,1.666667,9.333333,20.0,47.133333,10.666667,5.666667,8.666667,34.0,34.0,0.333333,83.0,206.333333,133.333333,87.666667,23.666667,421.666667,50.333333,6.666667,38.866667,1041.666667,501.666667,6.666667,5.666667,17.333333,8.333333,184.333333,22.666667,0.333333,11.666667,9.333333,2.333333,0.333333,0.333333,0.0,49.0,17.666667,16.0,50.366667,2.333333
30,2023-2024,2023-09-02,12:30,4,31124.0,Andy Madley,Paul Heckingbottom,Sean Dyche,John Egan,James Tarkowski,3-5-2,4-4-1-1,Sheffield United,Everton,990,2,1,0,0,13,7,1,0,563,16,11,20,1.2,1.2,0.6,20,2,317,435,72.9,27,322,6,19,7,5283,2492,163,188,86.7,103,128,80.5,36,84,42.9,0.5,7,21,8,2,378,56,15,0,2,14,27,4,1,2,0,1,14,9,9,6,1,9,19,47.4,10,6,14,27,39,1,80,256,206,106,26,563,36.8,9,47.4,1343,583,6,2,10,14,314,27,0,11,13,1,0,0,0,51,18,19,48.6,990,2,1,0,0,16,6,2,0,635,23,7,16,2.6,2.6,2.1,25,3,383,518,73.9,41,388,22,20,10,6682,2601,182,207,87.9,147,187,78.6,43,87,49.4,0.8,10,32,5,1,469,47,10,1,4,24,26,6,2,3,1,2,15,18,13,9,1,9,16,56.3,7,2,14,30,18,0,52,174,301,163,24,635,50.0,9,45.0,1863,1028,16,8,9,7,380,41,0,13,11,2,0,0,1,59,19,18,51.4,4,Bramall Lane,Sheffield United,Sheffield,32050.0,101.0,68.0,53°22′13″N,001°28′15″W,0,1,1,1.0,0.0,0.0,1.0,-1.0,1.666667,990.0,0.666667,0.0,0.0,0.0,7.0,1.666667,3.333333,0.0,459.666667,18.333333,11.0,16.333333,0.566667,0.566667,0.3,9.666667,0.666667,228.333333,332.333333,67.833333,16.333333,192.666667,7.666667,16.0,4.666667,4118.666667,1844.0,112.0,139.0,79.8,79.333333,101.666667,76.4,30.666667,72.333333,43.7,0.433333,3.333333,18.333333,3.333333,1.0,287.0,43.666667,8.333333,0.666667,1.0,12.333333,17.333333,4.333333,1.333333,2.333333,0.333333,1.666667,9.0,11.0,9.666667,5.0,3.666667,8.666667,18.333333,44.366667,9.666667,6.666667,9.666667,29.333333,27.666667,0.0,70.666667,198.333333,164.666667,100.333333,15.666667,459.666667,28.966667,10.333333,64.4,918.666667,434.666667,7.333333,3.0,14.666667,7.333333,227.333333,16.333333,0.0,12.666667,6.0,1.666667,0.0,0.333333,0.0,46.0,12.666667,24.333333,32.966667,0.0,1.0,0.0,0.0,1.0,-2.0,2.0,990.0,0.0,0.0,0.0,0.0,14.333333,6.0,2.333333,0.0,525.333333,17.666667,9.666667,10.666667,1.566667,1.566667,1.3,23.666667,0.0,329.666667,428.333333,76.933333,29.333333,307.333333,16.0,13.666667,7.333333,5962.0,2209.666667,142.333333,159.0,89.466667,137.666667,159.0,86.566667,38.0,83.333333,45.566667,0.866667,9.666667,27.666667,7.0,1.666667,375.0,50.666667,11.0,1.0,3.333333,23.666667,21.666667,7.666667,4.333333,3.0,0.0,2.666667,7.333333,10.666667,8.333333,5.666667,3.666667,9.0,17.0,51.9,8.0,2.333333,8.333333,27.333333,11.333333,0.666667,47.0,147.0,230.0,154.0,23.0,525.333333,54.033333,5.0,36.1,1530.333333,766.333333,14.333333,4.333333,15.333333,7.333333,325.333333,29.0,0.0,11.333333,9.333333,2.666667,0.0,0.333333,0.0,54.0,10.333333,10.666667,49.033333,0.0


In [None]:
df.to_csv("data/matches_with_rolling_stats_pl.csv", index=False)