In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import missingno

In [30]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [31]:
data_folder = './data'

for entry in os.listdir(data_folder):
    entry_path = os.path.join(data_folder, entry)
    if os.path.isdir(entry_path):
        for file_name in os.listdir(entry_path):
            if file_name.endswith('.csv'):
                file_path = os.path.join(entry_path, file_name)
                df = pd.read_csv(file_path)
                df['league'] = entry
                df.to_csv(file_path, index=False)

In [32]:
data = pd.DataFrame()

for folder in os.listdir('./data'):
    if folder.endswith('csv'):
        continue
    for file in os.listdir(f'./data/{folder}'):
        df = pd.read_csv(f'./data/{folder}/{file}')
        data = pd.concat([data, df], ignore_index=True)

In [35]:
data.isna().sum()

timestamp                                  0
date_GMT                                   0
status                                     0
attendance                             30917
home_team_name                             0
away_team_name                             0
referee                                31947
Game Week                               6853
Pre-Match PPG (Home)                       0
Pre-Match PPG (Away)                       0
home_ppg                                   0
away_ppg                                   0
home_team_goal_count                       0
away_team_goal_count                       0
total_goal_count                           0
total_goals_at_half_time                   0
home_team_goal_count_half_time             0
away_team_goal_count_half_time             0
home_team_goal_timings                 19417
away_team_goal_timings                 27086
home_team_corner_count                     0
away_team_corner_count                     0
home_team_

In [33]:
def clean_data(data):
    #transforming date variable
    data['date_GMT'] = pd.to_datetime(data['date_GMT'])
    data['date'] = pd.to_datetime(data['date_GMT'].dt.date)
    data['time'] = data['date_GMT'].dt.time
    
    #adding aggregated variables
    data['corners_total'] = data['home_team_corner_count'] + data['away_team_corner_count']
    data['yellow_cards_total'] = data['home_team_yellow_cards'] + data['away_team_yellow_cards']
    data['red_cards_total'] = data['home_team_red_cards'] + data['away_team_red_cards']
    data['cards_total'] = data['yellow_cards_total'] + data['red_cards_total']
    data['shots_total'] = data['home_team_shots'] + data['away_team_shots']
    data['shots_on_target_total'] = data['home_team_shots_on_target'] + data['away_team_shots_on_target']
    data['shots_off_target_total'] = data['home_team_shots_off_target'] + data['away_team_shots_off_target']
    data['fouls_total'] = data['home_team_fouls'] + data['away_team_fouls']
    
    #getting stadium names without city in brackets
    # data['base_name'] = data['stadium_name'].str.replace(r" \(.*\)$", "", regex=True)

    # city_map = data[data['stadium_name'].str.contains(r"\(.*\)")].copy()
    # city_map['city'] = city_map['stadium_name'].str.extract(r"\((.*?)\)")[0]
    # city_map = city_map.groupby('base_name')['city'].agg(pd.Series.mode).to_dict()

    # #filling stadium names without city in brackets
    # data['normalized_stadium'] = data.apply(lambda row: f"{row['base_name']} ({city_map.get(row['base_name'], 'Unknown')})" if '(' not in row['stadium_name'] else row['stadium_name'], axis=1)

    #dropping unnecessary features
    data.drop(['timestamp', 'status', 'home_team_goal_timings', 'away_team_goal_timings', 'date_GMT'], axis=1, inplace=True)
    
    #setting one of the target variable - result of the game
    data['result'] = np.where(data['home_team_goal_count'] == data['away_team_goal_count'], 0, np.where(data['home_team_goal_count'] > data['away_team_goal_count'], 1, 2))
    
    def assign_season(date):
        year = date.year
        if date.month >= 8:  # Sezon zaczyna się w sierpniu
            return f'{str(year)[2:]}/{str(year+1)[2:]}'
        else:
            return f'{str(year-1)[2:]}/{str(year)[2:]}'
    
    data['season'] = data['date'].apply(assign_season)
    
    return data

In [9]:
data = clean_data(data)

  data['date_GMT'] = pd.to_datetime(data['date_GMT'])


In [10]:
data.shape

(83142, 74)

In [11]:
data.head()

Unnamed: 0,attendance,home_team_name,away_team_name,referee,Game Week,Pre-Match PPG (Home),Pre-Match PPG (Away),home_ppg,away_ppg,home_team_goal_count,away_team_goal_count,total_goal_count,total_goals_at_half_time,home_team_goal_count_half_time,away_team_goal_count_half_time,home_team_corner_count,away_team_corner_count,home_team_yellow_cards,home_team_red_cards,away_team_yellow_cards,away_team_red_cards,home_team_first_half_cards,home_team_second_half_cards,away_team_first_half_cards,away_team_second_half_cards,home_team_shots,away_team_shots,home_team_shots_on_target,away_team_shots_on_target,home_team_shots_off_target,away_team_shots_off_target,home_team_fouls,away_team_fouls,home_team_possession,away_team_possession,Home Team Pre-Match xG,Away Team Pre-Match xG,team_a_xg,team_b_xg,average_goals_per_match_pre_match,btts_percentage_pre_match,over_15_percentage_pre_match,over_25_percentage_pre_match,over_35_percentage_pre_match,over_45_percentage_pre_match,over_15_HT_FHG_percentage_pre_match,over_05_HT_FHG_percentage_pre_match,over_15_2HG_percentage_pre_match,over_05_2HG_percentage_pre_match,average_corners_per_match_pre_match,average_cards_per_match_pre_match,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,stadium_name,league,date,time,corners_total,yellow_cards_total,red_cards_total,cards_total,shots_total,shots_on_target_total,shots_off_target_total,fouls_total,result,season
0,8016.0,Austria Wien,Admira,,1.0,0.0,0.0,1.61,0.67,2,0,2,0,0,0,6,7,2,0,5,0,0,2,1,4,13,13,7,6,6,7,9,17,53,47,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.36,5.15,9.65,0.0,0.0,0.0,0.0,0.0,0.0,Generali Arena (Wien),austria,2013-07-20,14:30:00,13,7,0,7,26,13,13,26,1,12/13
1,2840.0,Grödig,Ried,,1.0,0.0,0.0,1.44,0.94,0,0,0,0,0,0,6,11,8,0,5,0,5,3,2,3,8,16,4,10,4,6,20,24,49,51,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,2.7,3.42,2.73,0.0,0.0,0.0,0.0,0.0,0.0,DAS.GOLDBERG Stadion,austria,2013-07-20,17:00:00,17,13,0,13,24,14,10,44,0,12/13
2,2696.0,Wiener Neustadt,Salzburg,,1.0,0.0,0.0,1.11,1.94,1,5,6,3,1,2,3,10,4,0,1,0,0,4,0,1,5,18,2,6,3,12,13,16,52,48,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,6.97,4.51,1.51,0.0,0.0,0.0,0.0,0.0,0.0,Teddybären&Plüsch-Stadion,austria,2013-07-20,17:00:00,13,5,0,5,23,8,15,29,2,12/13
3,6413.0,Wolfsberger AC,Rapid Wien,,1.0,0.0,0.0,1.33,1.33,2,2,4,2,0,2,6,3,2,0,4,0,0,2,2,2,7,13,3,6,4,7,8,26,51,49,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,3.29,3.37,2.33,0.0,0.0,0.0,0.0,0.0,0.0,Lavanttal Arena,austria,2013-07-20,17:00:00,9,6,0,6,20,9,11,34,0,12/13
4,7406.0,Wacker Innsbruck,Sturm Graz,,1.0,0.0,0.0,0.94,1.44,2,2,4,0,0,0,7,1,2,0,4,0,0,2,0,4,15,5,5,3,10,2,21,20,48,52,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,2.54,3.37,2.93,0.0,0.0,0.0,0.0,0.0,0.0,Tivoli Stadion Tirol,austria,2013-07-21,14:30:00,8,6,0,6,20,8,12,41,0,12/13


In [12]:
data.isna().sum()

attendance                             30917
home_team_name                             0
away_team_name                             0
referee                                31947
Game Week                               6853
Pre-Match PPG (Home)                       0
Pre-Match PPG (Away)                       0
home_ppg                                   0
away_ppg                                   0
home_team_goal_count                       0
away_team_goal_count                       0
total_goal_count                           0
total_goals_at_half_time                   0
home_team_goal_count_half_time             0
away_team_goal_count_half_time             0
home_team_corner_count                     0
away_team_corner_count                     0
home_team_yellow_cards                     0
home_team_red_cards                        0
away_team_yellow_cards                     0
away_team_red_cards                        0
home_team_first_half_cards                 0
home_team_

# Attendance

In [13]:
# Pierwsze wypełnienie: średnia dla gospodarza i sezonu
data['attendance'] = data['attendance'].fillna(data.groupby(['home_team_name', 'season'])['attendance'].transform('mean'))

In [15]:
data['attendance'].isna().sum()

16647

In [16]:
# Drugie wypełnienie: średnia dla ligi i sezonu
data['attendance'] = data['attendance'].fillna(data.groupby(['league', 'season'])['attendance'].transform('mean'))

In [18]:
data['attendance'].isna().sum()

8968

In [19]:
# Trzecie wypełnienie: średnia dla ligi na przestzeni całego datasetu
data['attendance'] = data['attendance'].fillna(data.groupby(['league'])['attendance'].transform('mean'))

In [20]:
data['attendance'].isna().sum()

0

# Rolling variables

In [21]:
# Funkcja do obliczania średniej strzałów dla drużyny (zarówno dom jak i wyjazd)
def rolling_shots_avg(data, team_col, shots_col):
    return data.groupby(team_col)[shots_col].transform(lambda x: x.rolling(window=5, min_periods=1).mean())

In [22]:
# Obliczanie średniej liczby strzałów dla drużyn domowych i wyjazdowych
data['home_team_shots_rolling_avg'] = rolling_shots_avg(data, 'home_team_name', 'home_team_shots')
data['away_team_shots_rolling_avg'] = rolling_shots_avg(data, 'away_team_name', 'away_team_shots')

In [24]:
# Tworzenie nowych kolumn dla sumy kartek
data['home_team_total_cards'] = data['home_team_yellow_cards'] + data['home_team_red_cards']
data['away_team_total_cards'] = data['away_team_yellow_cards'] + data['away_team_red_cards']

In [25]:
# Funkcja do obliczania średniej ruchomej dla danych statystyk
def rolling_avg(data, team_col, stat_col):
    return data.groupby(team_col)[stat_col].transform(lambda x: x.rolling(window=5, min_periods=1).mean())

# Obliczanie średnich dla goli, rożnych i kartek
# Dla goli
data['home_team_goal_count_rolling_avg'] = rolling_avg(data, 'home_team_name', 'home_team_goal_count')
data['away_team_goal_count_rolling_avg'] = rolling_avg(data, 'away_team_name', 'away_team_goal_count')

# Dla rzutów rożnych
data['home_team_corner_count_rolling_avg'] = rolling_avg(data, 'home_team_name', 'home_team_corner_count')
data['away_team_corner_count_rolling_avg'] = rolling_avg(data, 'away_team_name', 'away_team_corner_count')

# Dla kartek
data['home_team_cards_rolling_avg'] = rolling_avg(data, 'home_team_name', 'home_team_total_cards')
data['away_team_cards_rolling_avg'] = rolling_avg(data, 'away_team_name', 'away_team_total_cards')

In [26]:
data.head()

Unnamed: 0,attendance,home_team_name,away_team_name,referee,Game Week,Pre-Match PPG (Home),Pre-Match PPG (Away),home_ppg,away_ppg,home_team_goal_count,away_team_goal_count,total_goal_count,total_goals_at_half_time,home_team_goal_count_half_time,away_team_goal_count_half_time,home_team_corner_count,away_team_corner_count,home_team_yellow_cards,home_team_red_cards,away_team_yellow_cards,away_team_red_cards,home_team_first_half_cards,home_team_second_half_cards,away_team_first_half_cards,away_team_second_half_cards,home_team_shots,away_team_shots,home_team_shots_on_target,away_team_shots_on_target,home_team_shots_off_target,away_team_shots_off_target,home_team_fouls,away_team_fouls,home_team_possession,away_team_possession,Home Team Pre-Match xG,Away Team Pre-Match xG,team_a_xg,team_b_xg,average_goals_per_match_pre_match,btts_percentage_pre_match,over_15_percentage_pre_match,over_25_percentage_pre_match,over_35_percentage_pre_match,over_45_percentage_pre_match,over_15_HT_FHG_percentage_pre_match,over_05_HT_FHG_percentage_pre_match,over_15_2HG_percentage_pre_match,over_05_2HG_percentage_pre_match,average_corners_per_match_pre_match,average_cards_per_match_pre_match,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,stadium_name,league,date,time,corners_total,yellow_cards_total,red_cards_total,cards_total,shots_total,shots_on_target_total,shots_off_target_total,fouls_total,result,season,home_team_shots_rolling_avg,away_team_shots_rolling_avg,home_team_total_cards,away_team_total_cards,home_team_goal_count_rolling_avg,away_team_goal_count_rolling_avg,home_team_corner_count_rolling_avg,away_team_corner_count_rolling_avg,home_team_cards_rolling_avg,away_team_cards_rolling_avg
0,8016.0,Austria Wien,Admira,,1.0,0.0,0.0,1.61,0.67,2,0,2,0,0,0,6,7,2,0,5,0,0,2,1,4,13,13,7,6,6,7,9,17,53,47,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.36,5.15,9.65,0.0,0.0,0.0,0.0,0.0,0.0,Generali Arena (Wien),austria,2013-07-20,14:30:00,13,7,0,7,26,13,13,26,1,12/13,13.0,13.0,2,5,2.0,0.0,6.0,7.0,2.0,5.0
1,2840.0,Grödig,Ried,,1.0,0.0,0.0,1.44,0.94,0,0,0,0,0,0,6,11,8,0,5,0,5,3,2,3,8,16,4,10,4,6,20,24,49,51,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,2.7,3.42,2.73,0.0,0.0,0.0,0.0,0.0,0.0,DAS.GOLDBERG Stadion,austria,2013-07-20,17:00:00,17,13,0,13,24,14,10,44,0,12/13,8.0,16.0,8,5,0.0,0.0,6.0,11.0,8.0,5.0
2,2696.0,Wiener Neustadt,Salzburg,,1.0,0.0,0.0,1.11,1.94,1,5,6,3,1,2,3,10,4,0,1,0,0,4,0,1,5,18,2,6,3,12,13,16,52,48,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,6.97,4.51,1.51,0.0,0.0,0.0,0.0,0.0,0.0,Teddybären&Plüsch-Stadion,austria,2013-07-20,17:00:00,13,5,0,5,23,8,15,29,2,12/13,5.0,18.0,4,1,1.0,5.0,3.0,10.0,4.0,1.0
3,6413.0,Wolfsberger AC,Rapid Wien,,1.0,0.0,0.0,1.33,1.33,2,2,4,2,0,2,6,3,2,0,4,0,0,2,2,2,7,13,3,6,4,7,8,26,51,49,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,3.29,3.37,2.33,0.0,0.0,0.0,0.0,0.0,0.0,Lavanttal Arena,austria,2013-07-20,17:00:00,9,6,0,6,20,9,11,34,0,12/13,7.0,13.0,2,4,2.0,2.0,6.0,3.0,2.0,4.0
4,7406.0,Wacker Innsbruck,Sturm Graz,,1.0,0.0,0.0,0.94,1.44,2,2,4,0,0,0,7,1,2,0,4,0,0,2,0,4,15,5,5,3,10,2,21,20,48,52,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,2.54,3.37,2.93,0.0,0.0,0.0,0.0,0.0,0.0,Tivoli Stadion Tirol,austria,2013-07-21,14:30:00,8,6,0,6,20,8,12,41,0,12/13,15.0,5.0,2,4,2.0,2.0,7.0,1.0,2.0,4.0


# Game week

In [42]:
df = pd.read_csv('./data/premier league/england-premier-league-matches-2007-to-2008-stats.csv')

In [43]:
df

Unnamed: 0,timestamp,date_GMT,status,attendance,home_team_name,away_team_name,referee,Game Week,Pre-Match PPG (Home),Pre-Match PPG (Away),home_ppg,away_ppg,home_team_goal_count,away_team_goal_count,total_goal_count,total_goals_at_half_time,home_team_goal_count_half_time,away_team_goal_count_half_time,home_team_goal_timings,away_team_goal_timings,home_team_corner_count,away_team_corner_count,home_team_yellow_cards,home_team_red_cards,away_team_yellow_cards,away_team_red_cards,home_team_first_half_cards,home_team_second_half_cards,away_team_first_half_cards,away_team_second_half_cards,home_team_shots,away_team_shots,home_team_shots_on_target,away_team_shots_on_target,home_team_shots_off_target,away_team_shots_off_target,home_team_fouls,away_team_fouls,home_team_possession,away_team_possession,Home Team Pre-Match xG,Away Team Pre-Match xG,team_a_xg,team_b_xg,average_goals_per_match_pre_match,btts_percentage_pre_match,over_15_percentage_pre_match,over_25_percentage_pre_match,over_35_percentage_pre_match,over_45_percentage_pre_match,over_15_HT_FHG_percentage_pre_match,over_05_HT_FHG_percentage_pre_match,over_15_2HG_percentage_pre_match,over_05_2HG_percentage_pre_match,average_corners_per_match_pre_match,average_cards_per_match_pre_match,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,stadium_name,league
0,1186832700,Aug 11 2007 - 11:45am,complete,,Sunderland,Tottenham Hotspur,,,0.00,0.00,1.58,0.89,1,0,1,0,0,0,90'3,,-1,-1,1,0,1,0,0,1,1,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Stadium of Light (Sunderland),premier league
1,1186840800,Aug 11 2007 - 2:00pm,complete,,West Ham United,Manchester City,,,0.00,0.00,1.47,0.95,0,2,2,1,0,1,,1887,-1,-1,0,0,4,0,0,0,2,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Boleyn Ground (London),premier league
2,1186840800,Aug 11 2007 - 2:00pm,complete,,Middlesbrough,Blackburn Rovers,,,0.00,0.00,1.37,1.42,1,2,3,1,1,0,30,6379,-1,-1,3,0,4,0,2,1,0,4,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Riverside Stadium (Middlesbrough),premier league
3,1186840800,Aug 11 2007 - 2:00pm,complete,,Everton,Wigan Athletic,,,0.00,0.00,1.95,0.58,2,1,3,1,1,0,2675,80,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Goodison Park (Liverpool),premier league
4,1186840800,Aug 11 2007 - 2:00pm,complete,,Derby County,Portsmouth,,,0.00,0.00,0.42,1.47,2,2,4,2,1,1,484,2783,-1,-1,1,0,2,0,0,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Pride Park Stadium (Derby),premier league
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,1210514400,May 11 2008 - 2:00pm,complete,,West Ham United,Aston Villa,,,1.50,1.44,1.47,1.42,2,2,4,2,1,1,888,1458,-1,-1,2,0,2,0,2,0,1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,2.94,61,86,56,28,9,28,78,48,86,0.0,3.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Boleyn Ground (London),premier league
376,1210514400,May 11 2008 - 2:00pm,complete,,Tottenham Hotspur,Liverpool,,,1.61,1.72,1.53,1.79,0,2,2,0,0,0,,6974,-1,-1,1,0,1,0,0,1,0,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,3.20,58,84,58,39,17,28,75,56,86,0.0,2.72,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,White Hart Lane (London),premier league
377,1210514400,May 11 2008 - 2:00pm,complete,,Sunderland,Arsenal,,,1.67,1.83,1.58,1.89,0,1,1,1,0,1,,24,-1,-1,1,0,1,0,0,1,0,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,2.75,64,81,53,28,12,28,73,39,84,0.0,3.97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Stadium of Light (Sunderland),premier league
378,1210514400,May 11 2008 - 2:00pm,complete,,Birmingham City,Blackburn Rovers,,,1.28,1.50,1.37,1.42,4,1,5,1,1,0,"32,73,90'1,90'3",49,-1,-1,3,0,2,0,2,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,2.67,67,78,50,28,9,20,61,47,86,0.0,4.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,St Andrew's Trillion Trophy Stadium (Birmingham),premier league


In [46]:
import pandas as pd

# Funkcja do przetwarzania jednego pliku
def process_season(df):
    df['date_GMT'] = pd.to_datetime(df['date_GMT'], format='%b %d %Y - %I:%M%p')
    df = df.sort_values(by='date_GMT')
    if 'Game Week' not in df.columns or df['Game Week'].isna().any():
        df['Game Week'] = df['date_GMT'].rank(method='dense').astype(int)
    return df

# Przykład użycia funkcji
processed_season_df = process_season(df)
processed_season_df.to_csv('path_to_processed_season_csv_file.csv', index=False)

In [47]:
process_season = pd.read_csv('path_to_processed_season_csv_file.csv')

In [48]:
process_season

Unnamed: 0,timestamp,date_GMT,status,attendance,home_team_name,away_team_name,referee,Game Week,Pre-Match PPG (Home),Pre-Match PPG (Away),home_ppg,away_ppg,home_team_goal_count,away_team_goal_count,total_goal_count,total_goals_at_half_time,home_team_goal_count_half_time,away_team_goal_count_half_time,home_team_goal_timings,away_team_goal_timings,home_team_corner_count,away_team_corner_count,home_team_yellow_cards,home_team_red_cards,away_team_yellow_cards,away_team_red_cards,home_team_first_half_cards,home_team_second_half_cards,away_team_first_half_cards,away_team_second_half_cards,home_team_shots,away_team_shots,home_team_shots_on_target,away_team_shots_on_target,home_team_shots_off_target,away_team_shots_off_target,home_team_fouls,away_team_fouls,home_team_possession,away_team_possession,Home Team Pre-Match xG,Away Team Pre-Match xG,team_a_xg,team_b_xg,average_goals_per_match_pre_match,btts_percentage_pre_match,over_15_percentage_pre_match,over_25_percentage_pre_match,over_35_percentage_pre_match,over_45_percentage_pre_match,over_15_HT_FHG_percentage_pre_match,over_05_HT_FHG_percentage_pre_match,over_15_2HG_percentage_pre_match,over_05_2HG_percentage_pre_match,average_corners_per_match_pre_match,average_cards_per_match_pre_match,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,stadium_name,league
0,1186832700,2007-08-11 11:45:00,complete,,Sunderland,Tottenham Hotspur,,1,0.00,0.00,1.58,0.89,1,0,1,0,0,0,90'3,,-1,-1,1,0,1,0,0,1,1,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Stadium of Light (Sunderland),premier league
1,1186840800,2007-08-11 14:00:00,complete,,West Ham United,Manchester City,,2,0.00,0.00,1.47,0.95,0,2,2,1,0,1,,1887,-1,-1,0,0,4,0,0,0,2,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Boleyn Ground (London),premier league
2,1186840800,2007-08-11 14:00:00,complete,,Middlesbrough,Blackburn Rovers,,2,0.00,0.00,1.37,1.42,1,2,3,1,1,0,30,6379,-1,-1,3,0,4,0,2,1,0,4,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Riverside Stadium (Middlesbrough),premier league
3,1186840800,2007-08-11 14:00:00,complete,,Everton,Wigan Athletic,,2,0.00,0.00,1.95,0.58,2,1,3,1,1,0,2675,80,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Goodison Park (Liverpool),premier league
4,1186840800,2007-08-11 14:00:00,complete,,Derby County,Portsmouth,,2,0.00,0.00,0.42,1.47,2,2,4,2,1,1,484,2783,-1,-1,1,0,2,0,0,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,0.00,0,0,0,0,0,0,0,0,0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Pride Park Stadium (Derby),premier league
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,1210514400,2008-05-11 14:00:00,complete,,Wigan Athletic,Manchester United,,200,1.61,1.78,1.53,1.84,0,2,2,1,0,1,,3380,-1,-1,3,0,2,0,0,3,1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,2.28,42,64,33,17,9,28,70,31,72,0.0,3.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,DW Stadium (Wigan),premier league
376,1210514400,2008-05-11 14:00:00,complete,,Portsmouth,Fulham,,200,1.61,0.72,1.53,0.84,0,1,1,0,0,0,,76,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,2.25,45,64,31,20,11,25,58,31,70,0.0,3.90,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fratton Park (Portsmouth),premier league
377,1210514400,2008-05-11 14:00:00,complete,,Derby County,Reading,,200,0.44,0.39,0.42,0.53,0,4,4,1,0,1,,"15,61,69,90'1",-1,-1,0,0,1,0,0,0,0,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,3.06,50,72,53,28,17,36,75,45,75,0.0,4.53,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Pride Park Stadium (Derby),premier league
378,1210514400,2008-05-11 14:00:00,complete,,Chelsea,Bolton Wanderers,,200,2.33,0.56,2.26,0.58,1,1,2,0,0,0,62,90'3,-1,-1,1,0,3,0,0,1,3,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0.0,0.0,0.0,0.0,2.61,44,70,39,28,17,31,67,42,67,0.0,4.34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Stamford Bridge (London),premier league
