In [1]:
import pandas as pd 
import numpy as np
import os

In [2]:
pd.set_option('display.max_columns', None)

# Games base  

In [105]:
games = pd.read_csv(os.getcwd() + "/artifacts/fetched_data/get_game_list.csv")
games.sort_values("kickoff", ascending=False).head(5)

Unnamed: 0,GW,id,home,team_h,away,team_a,finished,kickoff,team_h_score,team_a_score,season_start_year
378,38,379,MUN,14,FUL,9,True,2023-05-28T15:30:00Z,2,1,22
373,38,374,CHE,6,NEW,15,True,2023-05-28T15:30:00Z,1,1,22
370,38,371,ARS,1,WOL,20,True,2023-05-28T15:30:00Z,5,0,22
371,38,372,AVL,2,BHA,5,True,2023-05-28T15:30:00Z,2,1,22
372,38,373,BRE,4,MCI,13,True,2023-05-28T15:30:00Z,1,0,22


In [106]:
# an ID can get a new team for another season, so this could be used.
#team_id = pd.concat([games[['season_start_year', 'team_h', 'home']].rename(columns = {'home':'team', 'team_h':'team_id_season'}),
#                     games[['season_start_year', 'team_a', 'away']].rename(columns = {'away':'team', 'team_a':'team_id_season'})
#            ]).drop_duplicates()

In [109]:
games['kickoff'] = pd.to_datetime(games['kickoff'])
games['kickoff_date'] = games['kickoff'].dt.date
games['kickoff_year'] = games['kickoff'].dt.year
games['kickoff_month'] = games['kickoff'].dt.month
games['rounds_left'] = 38-games['GW']
games['label_1'] = np.where(games['team_h_score'] > games['team_a_score'], 1, 0)
games['label_X'] = np.where(games['team_h_score'] == games['team_a_score'], 1, 0)
games['label_2'] = np.where(games['team_h_score'] < games['team_a_score'], 1, 0)
games['train_score'] = np.where(games['finished'], 'train', 'score')

In [110]:
games_base = games[[
       # id
       'season_start_year', 
       'kickoff_date',
       'GW', 
       'id', 
       'team_h', 
       'team_a', 
       'train_score',

       # label
       'label_1', 
       'label_X', 
       'label_2', 

       # features
       'home',  
       'away', 
       'kickoff_year', 
       'kickoff_month', 
       'rounds_left']]

games_base.sort_values(['season_start_year', 'GW'])

Unnamed: 0,season_start_year,kickoff_date,GW,id,team_h,team_a,train_score,label_1,label_X,label_2,home,away,kickoff_year,kickoff_month,rounds_left
380,19,2019-08-09,1,1,10,14,train,1,0,0,LIV,NOR,2019,8,37
381,19,2019-08-10,1,8,19,11,train,0,0,1,WHU,MCI,2019,8,37
382,19,2019-08-10,1,2,3,15,train,0,1,0,BOU,SHU,2019,8,37
383,19,2019-08-10,1,3,5,16,train,1,0,0,BUR,SOU,2019,8,37
384,19,2019-08-10,1,4,7,8,train,0,1,0,CRY,EVE,2019,8,37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,22,2023-05-28,38,376,8,3,train,1,0,0,EVE,BOU,2023,5,0
376,22,2023-05-28,38,377,11,18,train,0,0,1,LEE,TOT,2023,5,0
377,22,2023-05-28,38,378,10,19,train,1,0,0,LEI,WHU,2023,5,0
378,22,2023-05-28,38,379,14,9,train,1,0,0,MUN,FUL,2023,5,0


# Games features

In [111]:
games = pd.read_csv(os.getcwd() + "/artifacts/fetched_data/get_game_list.csv")

home = games.rename(columns = {'home':'team', 'team_h':'team_id_season'}).drop(['away', 'team_a'], axis=1)
home['home'] = 1

away = games.rename(columns = {'away':'team', 'team_a':'team_id_season'}).drop(['home', 'team_h'], axis=1)
away['home'] = 0

team_games = pd.concat([home, away])

team_games['win'] = np.where((team_games.home == 1) & (team_games['team_h_score'] > team_games['team_a_score']), 1, 
                              np.where((team_games.home == 0) & (team_games['team_h_score'] < team_games['team_a_score']), 1, 0))
team_games['draw'] = np.where((team_games['team_h_score'] == team_games['team_a_score']), 1, 0)
team_games['loss'] = np.where((team_games.home == 1) & (team_games['team_h_score'] < team_games['team_a_score']), 1, 
                              np.where((team_games.home == 0) & (team_games['team_h_score'] > team_games['team_a_score']), 1, 0))

team_games['goals_scored'] = np.where(team_games.home == 1, team_games['team_h_score'], team_games['team_a_score'])
team_games['goals_conceded'] = np.where(team_games.home == 1, team_games['team_a_score'], team_games['team_h_score'])

team_games['kickoff'] = pd.to_datetime(team_games['kickoff'])
team_games = team_games.sort_values('kickoff').reset_index(drop=True)

team_games['next_id'] = team_games.groupby('team_id_season')['id'].shift(-1)

team_games = team_games.drop(['team_h_score', 'team_a_score'], axis=1)


## Overall form

In [112]:
team_games = team_games.sort_values(['season_start_year', 'team_id_season', 'kickoff']).reset_index(drop=True)

team_games['win_share_latest_5_games'] = team_games.groupby(['season_start_year', 'team_id_season'])['win'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
team_games['draw_share_latest_5_games'] = team_games.groupby(['season_start_year', 'team_id_season'])['draw'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
team_games['loss_share_latest_5_games'] = team_games.groupby(['season_start_year', 'team_id_season'])['loss'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)

team_games['avg_goals_scored_latest_5_games'] = team_games.groupby(['season_start_year', 'team_id_season'])['goals_scored'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
team_games['avg_goals_conceded_latest_5_games'] = team_games.groupby(['season_start_year', 'team_id_season'])['goals_conceded'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)


## Home form

In [113]:
home_games = team_games.loc[team_games.home == 1].sort_values(['season_start_year', 'team_id_season', 'kickoff']).reset_index(drop=True)
home_games['next_id_home'] = home_games.groupby('team_id_season')['id'].shift(-1)

home_games['win_share_latest_5_games_home_home_team'] = home_games.groupby(['season_start_year', 'team_id_season'])['win'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
home_games['draw_share_latest_5_games_home_home_team'] = home_games.groupby(['season_start_year', 'team_id_season'])['draw'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
home_games['loss_share_latest_5_games_home_home_team'] = home_games.groupby(['season_start_year', 'team_id_season'])['loss'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)

home_games['avg_goals_scored_latest_5_games_home_home_team'] = home_games.groupby(['season_start_year', 'team_id_season'])['goals_scored'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
home_games['avg_goals_conceded_latest_5_games_home_home_team'] = home_games.groupby(['season_start_year', 'team_id_season'])['goals_conceded'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)

home_games = home_games[['team_id_season', 
                         'next_id_home', 
                         'season_start_year', 
                         'win_share_latest_5_games_home_home_team', 
                         'draw_share_latest_5_games_home_home_team', 
                         'loss_share_latest_5_games_home_home_team', 
                         'avg_goals_scored_latest_5_games_home_home_team', 
                         'avg_goals_conceded_latest_5_games_home_home_team']]

## Away form

In [114]:
away_games = team_games.loc[team_games.home == 0].sort_values(['season_start_year', 'team_id_season', 'kickoff']).reset_index(drop=True)
away_games['next_id_away'] = away_games.groupby('team_id_season')['id'].shift(-1)

away_games['win_share_latest_5_games_away_away_team'] = away_games.groupby(['season_start_year', 'team_id_season'])['win'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
away_games['draw_share_latest_5_games_away_away_team'] = away_games.groupby(['season_start_year', 'team_id_season'])['draw'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
away_games['loss_share_latest_5_games_away_away_team'] = away_games.groupby(['season_start_year', 'team_id_season'])['loss'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)

away_games['avg_goals_scored_latest_5_games_away_away_team'] = away_games.groupby(['season_start_year', 'team_id_season'])['goals_scored'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)
away_games['avg_goals_conceded_latest_5_games_away_away_team'] = away_games.groupby(['season_start_year', 'team_id_season'])['goals_conceded'].rolling(window=5, min_periods=1).mean().reset_index(drop=True)

away_games = away_games[['team_id_season', 
                         'next_id_away', 
                         'season_start_year', 
                         'win_share_latest_5_games_away_away_team', 
                         'draw_share_latest_5_games_away_away_team', 
                         'loss_share_latest_5_games_away_away_team', 
                         'avg_goals_scored_latest_5_games_away_away_team', 
                         'avg_goals_conceded_latest_5_games_away_away_team']]

## Join on form

In [115]:
team_games_home = team_games.rename(columns={
                         'win_share_latest_5_games':'win_share_latest_5_games_overall_home_team', 
                         'draw_share_latest_5_games':'draw_share_latest_5_games_overall_home_team', 
                         'loss_share_latest_5_games':'loss_share_latest_5_games_overall_home_team', 
                         'avg_goals_scored_latest_5_games':'avg_goals_scored_latest_5_games_overall_home_team', 
                         'avg_goals_conceded_latest_5_games':'avg_goals_conceded_latest_5_games_overall_home_team'})
team_games_home = team_games_home[['team_id_season', 
                                    'next_id', 
                                    'season_start_year', 
                                    'win_share_latest_5_games_overall_home_team', 
                                    'draw_share_latest_5_games_overall_home_team', 
                                    'loss_share_latest_5_games_overall_home_team', 
                                    'avg_goals_scored_latest_5_games_overall_home_team', 
                                    'avg_goals_conceded_latest_5_games_overall_home_team']]

team_games_away = team_games.rename(columns={
                         'win_share_latest_5_games':'win_share_latest_5_games_overall_away_team', 
                         'draw_share_latest_5_games':'draw_share_latest_5_games_overall_away_team', 
                         'loss_share_latest_5_games':'loss_share_latest_5_games_overall_away_team', 
                         'avg_goals_scored_latest_5_games':'avg_goals_scored_latest_5_games_overall_away_team', 
                         'avg_goals_conceded_latest_5_games':'avg_goals_conceded_latest_5_games_overall_away_team'})
team_games_away = team_games_away[['team_id_season', 
                                    'next_id', 
                                    'season_start_year', 
                                    'win_share_latest_5_games_overall_away_team', 
                                    'draw_share_latest_5_games_overall_away_team', 
                                    'loss_share_latest_5_games_overall_away_team', 
                                    'avg_goals_scored_latest_5_games_overall_away_team', 
                                    'avg_goals_conceded_latest_5_games_overall_away_team']]

data = games_base.merge(team_games_home, left_on=['season_start_year', 'team_h', 'id'], right_on=['season_start_year', 'team_id_season', 'next_id'], how='inner').drop(['next_id', 'team_id_season'], axis=1)
data = data.merge(team_games_away, left_on=['season_start_year', 'team_a', 'id'], right_on=['season_start_year', 'team_id_season', 'next_id'], how='inner').drop(['next_id', 'team_id_season'], axis=1)
data = data.merge(home_games, left_on=['season_start_year', 'team_h', 'id'], right_on=['season_start_year', 'team_id_season', 'next_id_home'], how='inner').drop(['next_id_home', 'team_id_season'], axis=1)
data = data.merge(away_games, left_on=['season_start_year', 'team_a', 'id'], right_on=['season_start_year', 'team_id_season', 'next_id_away'], how='inner').drop(['next_id_away', 'team_id_season'], axis=1)

# Table features

In [116]:
team_games['points_from_game'] = np.where(team_games['win'] == 1, 3, np.where(team_games['draw'] == 1, 1, 0))
team_games['game'] = 1

table = team_games.sort_values(['season_start_year', 'team_id_season', 'kickoff']).reset_index(drop=True)
table['kickoff_date'] = table['kickoff'].dt.date
table['team_points'] = table.groupby(['season_start_year', 'team_id_season'])['points_from_game'].cumsum()
table['number_of_games'] = table.groupby(['season_start_year', 'team_id_season'])['game'].cumsum()
table = table[['season_start_year', 'kickoff', 'kickoff_date', 'team', 'team_id_season', 'number_of_games', 'points_from_game', 'team_points']]
table['games_left_season'] = 38 - table['number_of_games']

In [117]:
dates = pd.DataFrame(table[["season_start_year", "kickoff_date"]].drop_duplicates()).sort_values("kickoff_date").reset_index(drop=True)
#dates['next_kickoff_date'] = dates.groupby('season_start_year')['kickoff_date'].shift(-1)
dates = dates.rename(columns={'kickoff_date': 'next_kickoff_date'})
dates

Unnamed: 0,season_start_year,next_kickoff_date
0,19,2019-08-09
1,19,2019-08-10
2,19,2019-08-11
3,19,2019-08-17
4,19,2019-08-18
...,...,...
485,22,2023-05-21
486,22,2023-05-22
487,22,2023-05-24
488,22,2023-05-25


In [118]:
a = table.merge(dates, on=["season_start_year"])
a = a.loc[a.kickoff_date < a.next_kickoff_date]
a['rn'] = a.groupby(['team', 'next_kickoff_date'])['kickoff_date'].rank(ascending=False, method='first')
a = a.loc[a.rn == 1].drop("rn", axis=1)
a['position'] = a.groupby(['season_start_year', 'next_kickoff_date'])['team_points'].rank(ascending=False, method='first')
a = a.sort_values(["season_start_year", "next_kickoff_date", "position"], ascending=[True, True, False]).reset_index(drop=True)


In [119]:
a['points_to_team_above'] = (a['team_points'] - a.groupby('next_kickoff_date')['team_points'].shift(-1)).fillna(0)
a['points_to_team_below'] = (a['team_points'] - a.groupby('next_kickoff_date')['team_points'].shift()).fillna(0)

In [120]:
a['games_left_diff_above'] = (a['games_left_season'] - a.groupby('next_kickoff_date')['games_left_season'].shift(-1)).fillna(0)
a['games_left_diff_below'] = (a['games_left_season'] - a.groupby('next_kickoff_date')['games_left_season'].shift()).fillna(0)

In [121]:
win = a.loc[a['position'] == 1][['next_kickoff_date', 'team_points']].rename(columns={'team_points': 'win_points'})
champions_league = a.loc[a['position'] == 4][['next_kickoff_date', 'team_points']].rename(columns={'team_points': 'cl_points'})
euro = a.loc[a['position'] == 7][['next_kickoff_date', 'team_points']].rename(columns={'team_points': 'euro_points'})
regulation = a.loc[a['position'] == 18][['next_kickoff_date', 'team_points']].rename(columns={'team_points': 'regulation_points'})

a = a.merge(win, on="next_kickoff_date")
a = a.merge(champions_league, on="next_kickoff_date")
a = a.merge(euro, on="next_kickoff_date")
a = a.merge(regulation, on="next_kickoff_date")

a['points_to_win'] = a['team_points'] - a['win_points']
a['points_to_cl'] = a['team_points'] - a['cl_points']
a['points_to_euro'] = a['team_points'] - a['euro_points']
a['points_to_regulation'] = a['team_points'] - a['regulation_points']

a = a.drop(['win_points', 'cl_points', 'euro_points', 'regulation_points'], axis=1)

In [122]:
a.loc[(a.season_start_year == 22) & (pd.to_datetime(a.next_kickoff_date) == '2023-05-22')].sort_values("team_points", ascending=False)

Unnamed: 0,season_start_year,kickoff,kickoff_date,team,team_id_season,number_of_games,points_from_game,team_points,games_left_season,next_kickoff_date,position,points_to_team_above,points_to_team_below,games_left_diff_above,games_left_diff_below,points_to_win,points_to_cl,points_to_euro,points_to_regulation
9457,22,2023-05-21 15:00:00+00:00,2023-05-21,MCI,13,36,3,88,2,2023-05-22,1.0,0.0,7.0,0.0,1.0,0,19,30,57
9456,22,2023-05-20 16:30:00+00:00,2023-05-20,ARS,1,37,0,81,1,2023-05-22,2.0,-7.0,12.0,-1.0,-1.0,-7,12,23,50
9455,22,2023-05-20 14:00:00+00:00,2023-05-20,MUN,14,36,3,69,2,2023-05-22,3.0,-12.0,0.0,1.0,0.0,-19,0,11,38
9454,22,2023-05-18 18:30:00+00:00,2023-05-18,NEW,15,36,3,69,2,2023-05-22,4.0,0.0,3.0,0.0,1.0,-19,0,11,38
9453,22,2023-05-20 14:00:00+00:00,2023-05-20,LIV,12,37,1,66,1,2023-05-22,5.0,-3.0,5.0,-1.0,-1.0,-22,-3,8,35
9452,22,2023-05-21 13:00:00+00:00,2023-05-21,BHA,5,36,3,61,2,2023-05-22,6.0,-5.0,3.0,1.0,1.0,-27,-8,3,30
9451,22,2023-05-20 14:00:00+00:00,2023-05-20,AVL,2,37,1,58,1,2023-05-22,7.0,-3.0,1.0,-1.0,0.0,-30,-11,0,27
9450,22,2023-05-20 11:30:00+00:00,2023-05-20,TOT,18,37,0,57,1,2023-05-22,8.0,-1.0,1.0,0.0,0.0,-31,-12,-1,26
9449,22,2023-05-20 11:30:00+00:00,2023-05-20,BRE,4,37,3,56,1,2023-05-22,9.0,-1.0,4.0,0.0,0.0,-32,-13,-2,25
9448,22,2023-05-20 14:00:00+00:00,2023-05-20,FUL,9,37,1,52,1,2023-05-22,10.0,-4.0,8.0,0.0,0.0,-36,-17,-6,21


In [123]:
a = a.drop(['kickoff', 'kickoff_date', 'team'], axis=1).rename(columns=({'points_from_game':'points_from_last_game'}))

In [128]:
a.loc[(a.season_start_year == 22) & (a.team_id_season == 1)]

Unnamed: 0,season_start_year,team_id_season,number_of_games,points_from_last_game,team_points,games_left_season,next_kickoff_date,position,points_to_team_above,points_to_team_below,games_left_diff_above,games_left_diff_below,points_to_win,points_to_cl,points_to_euro,points_to_regulation
7257,22,1,1,3,3,37,2022-08-13,1.0,0.0,0.0,0.0,0.0,0,0,0,3
7277,22,1,2,3,6,36,2022-08-14,1.0,0.0,0.0,0.0,0.0,0,2,3,6
7297,22,1,2,3,6,36,2022-08-15,1.0,0.0,0.0,0.0,0.0,0,2,2,6
7317,22,1,2,3,6,36,2022-08-20,1.0,0.0,0.0,0.0,0.0,0,2,2,6
7337,22,1,3,3,9,35,2022-08-21,1.0,0.0,2.0,0.0,0.0,0,4,5,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9436,22,1,37,0,81,1,2023-05-21,2.0,-4.0,12.0,-2.0,-1.0,-4,12,23,50
9456,22,1,37,0,81,1,2023-05-22,2.0,-7.0,12.0,-1.0,-1.0,-7,12,23,50
9476,22,1,37,0,81,1,2023-05-24,2.0,-7.0,11.0,-1.0,0.0,-7,12,23,50
9496,22,1,37,0,81,1,2023-05-25,2.0,-8.0,11.0,0.0,0.0,-8,12,23,50


In [124]:
data

Unnamed: 0,season_start_year,kickoff_date,GW,id,team_h,team_a,train_score,label_1,label_X,label_2,home,away,kickoff_year,kickoff_month,rounds_left,win_share_latest_5_games_overall_home_team,draw_share_latest_5_games_overall_home_team,loss_share_latest_5_games_overall_home_team,avg_goals_scored_latest_5_games_overall_home_team,avg_goals_conceded_latest_5_games_overall_home_team,win_share_latest_5_games_overall_away_team,draw_share_latest_5_games_overall_away_team,loss_share_latest_5_games_overall_away_team,avg_goals_scored_latest_5_games_overall_away_team,avg_goals_conceded_latest_5_games_overall_away_team,win_share_latest_5_games_home_home_team,draw_share_latest_5_games_home_home_team,loss_share_latest_5_games_home_home_team,avg_goals_scored_latest_5_games_home_home_team,avg_goals_conceded_latest_5_games_home_home_team,win_share_latest_5_games_away_away_team,draw_share_latest_5_games_away_away_team,loss_share_latest_5_games_away_away_team,avg_goals_scored_latest_5_games_away_away_team,avg_goals_conceded_latest_5_games_away_away_team
0,22,2022-08-20,3,29,18,20,train,1,0,0,TOT,WOL,2022,8,35,0.5,0.5,0.0,3.0,1.5,0.0,0.5,0.5,0.5,1.0,1.0,0.0,0.0,4.0,1.0,0.0,0.0,1.0,1.0,2.0
1,22,2022-08-20,3,22,7,2,train,1,0,0,CRY,AVL,2022,8,35,0.0,0.5,0.5,0.5,1.5,0.5,0.0,0.5,1.0,1.5,0.0,0.0,1.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0
2,22,2022-08-20,3,23,8,16,train,0,1,0,EVE,NFO,2022,8,35,0.0,0.0,1.0,0.5,1.5,0.5,0.0,0.5,0.5,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0
3,22,2022-08-20,3,24,9,4,train,1,0,0,FUL,BRE,2022,8,35,0.0,1.0,0.0,1.0,1.0,0.5,0.5,0.0,3.0,1.0,0.0,1.0,0.0,2.0,2.0,0.0,1.0,0.0,2.0,2.0
4,22,2022-08-20,3,26,10,17,train,0,0,1,LEI,SOU,2022,8,35,0.0,0.5,0.5,2.0,3.0,0.0,0.5,0.5,1.5,3.0,0.0,1.0,0.0,2.0,2.0,0.0,0.0,1.0,1.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1434,21,2022-05-22,38,376,7,13,train,1,0,0,CRY,MUN,2022,5,0,0.4,0.4,0.2,1.2,1.0,0.2,0.2,0.6,1.0,2.4,0.4,0.6,0.0,1.0,0.2,0.0,0.0,1.0,0.4,3.2
1435,21,2022-05-22,38,377,9,16,train,1,0,0,LEI,SOU,2022,5,0,0.4,0.2,0.4,2.2,1.4,0.0,0.2,0.8,0.8,2.2,0.6,0.2,0.2,1.6,0.8,0.0,0.4,0.6,0.6,2.4
1436,21,2022-05-22,38,378,11,20,train,1,0,0,LIV,WOL,2022,5,0,0.8,0.2,0.0,1.6,0.6,0.0,0.4,0.6,0.8,2.4,0.8,0.2,0.0,2.0,0.2,0.2,0.2,0.6,0.6,1.0
1437,21,2022-05-22,38,379,12,2,train,1,0,0,MCI,AVL,2022,5,0,0.8,0.2,0.0,4.2,0.8,0.4,0.4,0.2,1.6,1.0,0.8,0.2,0.0,3.8,0.8,0.4,0.2,0.4,1.6,1.0


In [126]:
data = data.merge(a.add_prefix('tbl_home_'), left_on=['season_start_year', 'kickoff_date', 'team_h'], right_on=['tbl_home_season_start_year', 'tbl_home_next_kickoff_date', 'tbl_home_team_id_season'])
data = data.merge(a.add_prefix('tbl_away_'), left_on=['season_start_year', 'kickoff_date', 'team_h'], right_on=['tbl_away_season_start_year', 'tbl_away_next_kickoff_date', 'tbl_away_team_id_season'])

Unnamed: 0,season_start_year,kickoff_date,GW,id,team_h,team_a,train_score,label_1,label_X,label_2,home,away,kickoff_year,kickoff_month,rounds_left,win_share_latest_5_games_overall_home_team,draw_share_latest_5_games_overall_home_team,loss_share_latest_5_games_overall_home_team,avg_goals_scored_latest_5_games_overall_home_team,avg_goals_conceded_latest_5_games_overall_home_team,win_share_latest_5_games_overall_away_team,draw_share_latest_5_games_overall_away_team,loss_share_latest_5_games_overall_away_team,avg_goals_scored_latest_5_games_overall_away_team,avg_goals_conceded_latest_5_games_overall_away_team,win_share_latest_5_games_home_home_team,draw_share_latest_5_games_home_home_team,loss_share_latest_5_games_home_home_team,avg_goals_scored_latest_5_games_home_home_team,avg_goals_conceded_latest_5_games_home_home_team,win_share_latest_5_games_away_away_team,draw_share_latest_5_games_away_away_team,loss_share_latest_5_games_away_away_team,avg_goals_scored_latest_5_games_away_away_team,avg_goals_conceded_latest_5_games_away_away_team,tbl_home_season_start_year,tbl_home_team_id_season,tbl_home_number_of_games,tbl_home_points_from_last_game,tbl_home_team_points,tbl_home_games_left_season,tbl_home_next_kickoff_date,tbl_home_position,tbl_home_points_to_team_above,tbl_home_points_to_team_below,tbl_home_games_left_diff_above,tbl_home_games_left_diff_below,tbl_home_points_to_win,tbl_home_points_to_cl,tbl_home_points_to_euro,tbl_home_points_to_regulation,tbl_away_season_start_year,tbl_away_team_id_season,tbl_away_number_of_games,tbl_away_points_from_last_game,tbl_away_team_points,tbl_away_games_left_season,tbl_away_next_kickoff_date,tbl_away_position,tbl_away_points_to_team_above,tbl_away_points_to_team_below,tbl_away_games_left_diff_above,tbl_away_games_left_diff_below,tbl_away_points_to_win,tbl_away_points_to_cl,tbl_away_points_to_euro,tbl_away_points_to_regulation
0,22,2022-08-20,3,29,18,20,train,1,0,0,TOT,WOL,2022,8,35,0.5,0.5,0.0,3.0,1.5,0.0,0.5,0.5,0.5,1.0,1.0,0.0,0.0,4.0,1.0,0.0,0.0,1.0,1.0,2.0,22,18,2,1,4,36,2022-08-20,8.0,0.0,1.0,0.0,0.0,-2,0,0,4,22,18,2,1,4,36,2022-08-20,8.0,0.0,1.0,0.0,0.0,-2,0,0,4
1,22,2022-08-20,3,22,7,2,train,1,0,0,CRY,AVL,2022,8,35,0.0,0.5,0.5,0.5,1.5,0.5,0.0,0.5,1.0,1.5,0.0,0.0,1.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,22,7,2,1,1,36,2022-08-20,14.0,-1.0,0.0,0.0,0.0,-5,-3,-3,1,22,7,2,1,1,36,2022-08-20,14.0,-1.0,0.0,0.0,0.0,-5,-3,-3,1
2,22,2022-08-20,3,23,8,16,train,0,1,0,EVE,NFO,2022,8,35,0.0,0.0,1.0,0.5,1.5,0.5,0.0,0.5,0.5,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,22,8,2,0,0,36,2022-08-20,18.0,-1.0,0.0,0.0,0.0,-6,-4,-4,0,22,8,2,0,0,36,2022-08-20,18.0,-1.0,0.0,0.0,0.0,-6,-4,-4,0
3,22,2022-08-20,3,24,9,4,train,1,0,0,FUL,BRE,2022,8,35,0.0,1.0,0.0,1.0,1.0,0.5,0.5,0.0,3.0,1.0,0.0,1.0,0.0,2.0,2.0,0.0,1.0,0.0,2.0,2.0,22,9,2,1,2,36,2022-08-20,12.0,-1.0,0.0,0.0,0.0,-4,-2,-2,2,22,9,2,1,2,36,2022-08-20,12.0,-1.0,0.0,0.0,0.0,-4,-2,-2,2
4,22,2022-08-20,3,26,10,17,train,0,0,1,LEI,SOU,2022,8,35,0.0,0.5,0.5,2.0,3.0,0.0,0.5,0.5,1.5,3.0,0.0,1.0,0.0,2.0,2.0,0.0,0.0,1.0,1.0,4.0,22,10,2,0,1,36,2022-08-20,15.0,0.0,0.0,0.0,0.0,-5,-3,-3,1,22,10,2,0,1,36,2022-08-20,15.0,0.0,0.0,0.0,0.0,-5,-3,-3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1434,21,2022-05-22,38,376,7,13,train,1,0,0,CRY,MUN,2022,5,0,0.4,0.4,0.2,1.2,1.0,0.2,0.2,0.6,1.0,2.4,0.4,0.6,0.0,1.0,0.2,0.0,0.0,1.0,0.4,3.2,21,7,37,0,45,1,2022-05-22,14.0,0.0,5.0,0.0,0.0,-45,-23,-11,10,21,7,37,0,45,1,2022-05-22,14.0,0.0,5.0,0.0,0.0,-45,-23,-11,10
1435,21,2022-05-22,38,377,9,16,train,1,0,0,LEI,SOU,2022,5,0,0.4,0.2,0.4,2.2,1.4,0.0,0.2,0.8,0.8,2.2,0.6,0.2,0.2,1.6,0.8,0.0,0.4,0.6,0.6,2.4,21,9,37,1,49,1,2022-05-22,9.0,-2.0,1.0,0.0,0.0,-41,-19,-7,14,21,9,37,1,49,1,2022-05-22,9.0,-2.0,1.0,0.0,0.0,-41,-19,-7,14
1436,21,2022-05-22,38,378,11,20,train,1,0,0,LIV,WOL,2022,5,0,0.8,0.2,0.0,1.6,0.6,0.0,0.4,0.6,0.8,2.4,0.8,0.2,0.0,2.0,0.2,0.2,0.2,0.6,0.6,1.0,21,11,37,3,89,1,2022-05-22,2.0,-1.0,18.0,0.0,0.0,-1,21,33,54,21,11,37,3,89,1,2022-05-22,2.0,-1.0,18.0,0.0,0.0,-1,21,33,54
1437,21,2022-05-22,38,379,12,2,train,1,0,0,MCI,AVL,2022,5,0,0.8,0.2,0.0,4.2,0.8,0.4,0.4,0.2,1.6,1.0,0.8,0.2,0.0,3.8,0.8,0.4,0.2,0.4,1.6,1.0,21,12,37,1,90,1,2022-05-22,1.0,0.0,1.0,0.0,0.0,0,22,34,55,21,12,37,1,90,1,2022-05-22,1.0,0.0,1.0,0.0,0.0,0,22,34,55


## Save data

In [29]:
data.to_csv('artifacts/data.csv', index=False)

In [13]:
'hej_{0}__da'.format("heypa")

'hej_heypa__da'