In [100]:
import pandas as pd
import numpy as np
from fifaindex_scraping import get_basic_data, get_additional_data
from bs4 import BeautifulSoup as bs
import requests

## Importing and checking data

Import the scraped data

In [101]:
all_matches = pd.read_csv("data/matches_soccerway.csv")
all_matches.columns

Index(['Season', 'Round', 'HomeTeam', 'AwayTeam', 'Date', 'Hour',
       'HomeTeamPlayers', 'AwayTeamPlayers', 'GoalsHome', 'GoalsAway',
       'CornersHome', 'CornersAway', 'ShotsTargetHome', 'ShotsTargetAway',
       'ShotsWideHome', 'ShotsWideAway', 'FoulsHome', 'FoulsAway',
       'OffsidesHome', 'OffsidesAway', 'HomeTeamPossession',
       'AwayTeamPossession'],
      dtype='object')

In [102]:
all_players = pd.read_csv("data/players_fifa.csv")
all_players = all_players.drop(["ID"], axis = 1)
all_players.columns

Index(['FIFA', 'Name', 'Nationality', 'OVR', 'POT', 'Positions', 'Age', 'Team',
       'Height', 'Weight', 'Foot', 'WorkRate', 'WeakFoot', 'SkillMoves',
       'Value', 'Wage', 'Ball_Control', 'Dribbling', 'Marking', 'Slide_Tackle',
       'Stand_Tackle', 'Aggression', 'Reactions', 'Att._Position',
       'Interceptions', 'Vision', 'Crossing', 'Short_Pass', 'Long_Pass',
       'Acceleration', 'Stamina', 'Strength', 'Balance', 'Sprint_Speed',
       'Agility', 'Jumping', 'Heading', 'Shot_Power', 'Finishing',
       'Long_Shots', 'Curve', 'FK_Acc.', 'Penalties', 'Volleys',
       'GK_Positioning', 'GK_Diving', 'GK_Handling', 'GK_Kicking',
       'GK_Reflexes'],
      dtype='object')

In [131]:
clubs = pd.read_csv("data/clubs_fifa.csv")
clubs = clubs.drop(["Unnamed: 0"], axis = 1)
clubs.columns

Index(['FIFA', 'Name', 'Rating', 'RivalTeam', 'Attack', 'Midfield', 'Defence',
       'TransferBudget'],
      dtype='object')

In [104]:
def get_missing_players(team, players, fifa, missing_players, last_names):
    s = requests.session()
    BASE_URL = "https://www.fifaindex.com"
    for player_name in players:
        last_name = player_name.split(" ")[-1]
        found_player = all_players.loc[(all_players["FIFA"] == int(fifa)) & 
                                       (all_players['Team'].str.contains(team)) & 
                                       (all_players["Name"].str.contains(last_name))]
        
        if len(found_player) == 0:
            found_player_other_season = all_players.loc[((all_players["FIFA"] == (int(fifa) - 1)) |
                                                 (all_players["FIFA"] == (int(fifa) + 1))) & 
                                                (all_players['Team'].str.contains(team)) & 
                                                (all_players["Name"].str.contains(last_name))]
            if len(found_player_other_season) > 0:
                player_other_season = found_player_other_season.iloc[0].values
                player_other_season[0] = fifa
                missing_players.append(player_other_season)
                last_names.append(player_other_season[1].split(" ")[-1])
        
            if len(found_player_other_season) == 0 and last_name not in last_names:
                last_names.append(last_name)
                search_url = f"{BASE_URL}/players/fifa{fifa}/?name={last_name}&order=desc"
                req = s.get(search_url)
                page_parsed = bs(req.text, 'html.parser')

                table = page_parsed.find("table", {"class": "table table-striped table-players"})

                rows = table.find("tbody").find_all("tr")
                if len(rows) > 2:
                    player_row = table.find("tbody").find_all("tr")[2]

                    player_data = [fifa] + get_basic_data(player_row)

                    profile_url = f'{BASE_URL}{player_row.find("td", {"data-title": "Name"}).find("a").get("href")}'
                    profile_parsed = bs(s.get(profile_url).text, 'html.parser')

                    player_data.extend(get_additional_data(profile_parsed))
                    player_data[7] = team

                    missing_players.append(player_data)

In [105]:
missing_players = []
last_names = []

for i in range(len(all_matches)):
    
    match = all_matches.iloc[i]
    
    home_team = match["HomeTeam"]
    away_team = match["AwayTeam"]
    
    fifa = match["Season"].split("/")[1][-2:]
    
    h_players, a_players = match["HomeTeamPlayers"].split("/"), match["AwayTeamPlayers"].split("/")
    get_missing_players(home_team, h_players, fifa, missing_players, last_names)
    get_missing_players(away_team, a_players, fifa, missing_players, last_names)

In [106]:
missing_players_df = pd.DataFrame(missing_players, columns=all_players.columns)
missing_players_df.to_csv("data/players_fifa_missing.csv")

In [107]:
missing_players_df = pd.read_csv("data/players_fifa_missing.csv")
missing_players_df = missing_players_df.drop(["Unnamed: 0"], axis = 1)
all_players = all_players.append(missing_players_df, ignore_index=True)

In [108]:
all_players

Unnamed: 0,FIFA,Name,Nationality,OVR,POT,Positions,Age,Team,Height,Weight,...,Long_Shots,Curve,FK_Acc.,Penalties,Volleys,GK_Positioning,GK_Diving,GK_Handling,GK_Kicking,GK_Reflexes
0,12,Franck Ribéry,France,89,91,LM/RW/RM,28,Bayern Munich,170,72,...,75,88,85,87,82,7,15,6.0,9.0,10.0
1,12,Arjen Robben,Netherlands,89,90,RM/LM/RW/LW,28,Bayern Munich,181,75,...,89,85,84,83,84,5,10,8.0,11.0,15.0
2,12,Manuel Neuer,Germany,87,91,GK,25,Bayern Munich,193,90,...,12,8,7,37,7,82,90,81.0,91.0,89.0
3,12,Bastian Schweinsteiger,Germany,87,88,CM/RM/CDM/LM,27,Bayern Munich,183,76,...,86,87,71,82,85,13,14,14.0,13.0,11.0
4,12,Philipp Lahm,Germany,87,88,LB/RB,28,Bayern Munich,170,64,...,67,77,59,69,66,14,11,12.0,5.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6822,21,Aleksandar Dragović,Austria,75,75,CB,29,Bayer Leverkusen,186,84,...,53,32,61,65,37,13,10,7.0,11.0,8.0
6823,21,Lautaro Martínez,Argentina,84,91,ST,23,Mainz 05,174,81,...,74,78,43,74,83,8,11,8.0,8.0,13.0
6824,21,Javairô Dilrosun,Netherlands,74,82,LM,22,Hertha BSC,174,74,...,42,60,49,55,60,7,8,15.0,11.0,11.0
6825,21,Marin Pongračić,Croatia,76,83,CB,23,Wolfsburg,190,95,...,22,31,25,30,25,8,6,6.0,14.0,5.0


For each season check if there are 306 games with unique Home and Away team.

In [109]:
for season in [f"{year}/{year + 1}" for year in range(2011, 2021)]:
    first_round_matches = all_matches.loc[(all_matches['Season'] == season) & (all_matches['Round'] == 1)]
    teams_season = list(first_round_matches["HomeTeam"]) + list(first_round_matches["AwayTeam"])
    count = 0
    for a in teams_season:
        for b in teams_season:
            if a != b:
                find_one = all_matches.loc[(all_matches['Season'] == season) & (all_matches['HomeTeam'] == a) & (all_matches["AwayTeam"] == b)]
                if len(find_one) == 1:
                    count += 1
    print(season, count)

2011/2012 306
2012/2013 306
2013/2014 306
2014/2015 306
2015/2016 306
2016/2017 306
2017/2018 306
2018/2019 306
2019/2020 0
2020/2021 126


## Creating tables

We are to create dictionary of tables for each round of each season

Function for creating empty table for round 0 of season (note that team has form of 1.0 at the beginning of season)

In [110]:
gammas = [0.1, 0.33, 0.5, 0.66]

In [111]:
def create_empty_table(matches):
    table_rows = []
    form_columns = [f"Form_{gamma}" for gamma in gammas]
    for team in (list(matches["HomeTeam"]) + list(matches["AwayTeam"])):
        table_rows.append([team, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ([1.0] * len(gammas)))
    new_table = pd.DataFrame(table_rows, columns=["Team", "Won", "Drawn", "Lost", "GF", "GA", "Points",
                                                  "FoulsCommited", "Offsides", "ShotsTarget",
                                                  "Corners", "Possession"] + form_columns)
    return new_table

In [112]:
def update_table_win(tab, win_team, lose_team):
        tab.loc[tab["Team"] == win_team, 'Won'] += 1
        tab.loc[tab["Team"] == lose_team, 'Lost'] += 1
        tab.loc[tab["Team"] == win_team, 'Points'] += 3
        for gamma in gammas:
            lose_team_form = float(tab.loc[tab["Team"] == lose_team, f'Form_{gamma}'])
            tab.loc[tab["Team"] == win_team, f'Form_{gamma}'] += (gamma * lose_team_form)
            tab.loc[tab["Team"] == lose_team, f'Form_{gamma}'] -= (gamma * lose_team_form)

In [113]:
def update_table_draw(tab, h_team, a_team):
        tab.loc[tab["Team"] == h_team, 'Drawn'] += 1
        tab.loc[tab["Team"] == a_team, 'Drawn'] += 1
        tab.loc[tab["Team"] == a_team, 'Points'] += 1
        tab.loc[tab["Team"] == h_team, 'Points'] += 1
        for gamma in gammas:
            h_team_form = float(tab.loc[tab["Team"] == h_team, f'Form_{gamma}'])
            a_team_form = float(tab.loc[tab["Team"] == a_team, f'Form_{gamma}'])
            tab.loc[tab["Team"] == h_team, f'Form_{gamma}'] -= (gamma * (h_team_form - a_team_form))
            tab.loc[tab["Team"] == a_team, f'Form_{gamma}'] -= (gamma * (a_team_form - h_team_form))

In [114]:
def update_table(new_table, matches):
    for match in matches.itertuples():
        h_team, h_goals, a_team, a_goals = match.HomeTeam, match.GoalsHome, match.AwayTeam, match.GoalsAway

        if h_goals > a_goals:
            update_table_win(new_table, h_team, a_team)
        elif h_goals < a_goals:
            update_table_win(new_table, a_team, h_team)
        elif h_goals == a_goals:
            update_table_draw(new_table, h_team, a_team)
        
        update_list = [("GF", match.GoalsHome, match.GoalsAway),
                       ("GA", match.GoalsAway, match.GoalsHome),
                       ("Corners", match.CornersHome, match.CornersAway),
                       ("Offsides", match.OffsidesHome, match.OffsidesAway),
                       ("FoulsCommited", match.FoulsHome, match.FoulsAway),
                       ("ShotsTarget", match.ShotsTargetHome, match.ShotsTargetAway),
                       ("Possession", match.HomeTeamPossession, match.AwayTeamPossession)]
        
        for (attribute, home_add, away_add) in update_list:
            new_table.loc[new_table["Team"] == h_team, attribute] += home_add
            new_table.loc[new_table["Team"] == a_team, attribute] += away_add

    new_table = new_table.sort_values(["Points"], ascending = False)
    return new_table

In [115]:
def create_tables():
    tables = {}
    for season in [f"{year}/{year + 1}" for year in range(2011, 2019)] + ["2020/2021"]:
        tables[season] = {}
        first_round_matches = all_matches.loc[(all_matches['Season'] == season) & (all_matches['Round'] == 1)]
        tables[season][0] = create_empty_table(first_round_matches)
        end = 35
        if season == "2020/2021":
            end = 15
        for rnd in range(1, end):
            matches = all_matches.loc[(all_matches['Season'] == season) & (all_matches['Round'] == rnd)]
            former_table = tables[season][rnd - 1]
            new_table = update_table(former_table.copy(), matches)
            tables[season][rnd] = new_table
    return tables

In [116]:
tables = create_tables()

## Creating features

In [117]:
def get_last_k_rounds_table(season, rnd, k):
    if k > rnd:
        return None
    
    l_k_tab = tables[season][rnd].copy()
    m_k_tab = tables[season][rnd - k]
    
    for team in l_k_tab["Team"]:
        rel_columns = ["Won", "Drawn", "Lost", "GF", "GA", "Points", "FoulsCommited",
                       "Offsides", "ShotsTarget", "Corners", "Possession"]
        for column in rel_columns:
            l_k_tab.loc[l_k_tab["Team"] == team, column] -= m_k_tab.loc[m_k_tab["Team"] == team, column]
    
    return l_k_tab[["Team"] + rel_columns]

In [118]:
def get_points_for_round(team, season, rnd):
    home = True
    match = all_matches.loc[(all_matches['Season'] == season)
                              & (all_matches['Round'] == rnd)
                              & (all_matches["HomeTeam"] == team)]
    if len(match) == 0:
        home = False
        match = all_matches.loc[(all_matches['Season'] == season)
                              & (all_matches['Round'] == rnd)
                              & (all_matches["AwayTeam"] == team)]

    if home and (int(match["GoalsHome"]) > int(match["GoalsAway"])):
        return 3
    elif not home and (int(match["GoalsHome"]) < int(match["GoalsAway"])):
        return 3
    elif int(match["GoalsHome"]) == int(match["GoalsAway"]):
        return 1
    return 0

In [119]:
def weighted_points(season, team, j, k):
    sum = 0
    start = j - k
    for rnd in range(start, j):
        res_round = get_points_for_round(team, season, rnd)
        sum += ((rnd - (start - 1)) * res_round)
    
    return sum / (3 * k * ((k + 1) / 2))

In [120]:
def get_players_data_for_match(fifa, team, last_names):
    players_data = all_players.loc[(all_players["FIFA"] == int(fifa)) & 
                                   (all_players['Team'].str.contains(team)) & 
                                   (all_players["Name"].str.contains('|'.join(last_names)))]
    return players_data

In [121]:
def get_players_features(match):
    n = 5
    players_features = []

    home_team = match.HomeTeam
    away_team = match.AwayTeam
    fifa = match.Season.split("/")[1][-2:]

    home_team_players = match.HomeTeamPlayers.split("/")
    last_names_home = [player_name.split(" ")[-1] for player_name in home_team_players]
    away_team_players = match.AwayTeamPlayers.split("/")
    last_names_away = [player_name.split(" ")[-1] for player_name in away_team_players]

    h_players = get_players_data_for_match(fifa, home_team, last_names_home)
    a_players = get_players_data_for_match(fifa, away_team, last_names_away)

    gk_attributes = ["GK_Positioning", "GK_Diving", "GK_Handling", "GK_Kicking", "GK_Reflexes"]
    phys_ment_attributes = ["Acceleration", "Stamina", "Strength", "Balance",
                            "Sprint_Speed", "Agility", "Jumping", "Aggression", "Reactions",
                            "Att._Position", "Interceptions", "Vision"]
    pass_attributes = ["Crossing", "Short_Pass", "Long_Pass"]
    def_attributes = ["Marking", "Slide_Tackle", "Stand_Tackle"]
    attack_attributes = ["Heading", "Shot_Power", "Finishing", "Long_Shots",
                         "Curve", "FK_Acc.", "Penalties", "Volleys"]
    tackling_attr = ["Stand_Tackle", "Slide_Tackle"]
    ball_skils_attr = ["Ball_Control", "Dribbling"]
    
    gk_avg_h = 0
    if len(h_players.loc[h_players["Positions"].str.contains("GK")]) == 0:
        gk_avg_h = 70
    else:
        gk_avg_h = np.average(h_players.loc[h_players["Positions"].str.contains("GK"), gk_attributes].values)
    
    gk_avg_a = 0
    if len(a_players.loc[a_players["Positions"].str.contains("GK")]) == 0:
        gk_avg_a = 70
    else:
        gk_avg_a = np.average(a_players.loc[a_players["Positions"].str.contains("GK"), gk_attributes].values)        

    top_n_marking_avg_h = np.average(h_players.sort_values("Marking", ascending=False)["Marking"].values[0:n])
    top_n_marking_avg_a = np.average(a_players.sort_values("Marking", ascending=False)["Marking"].values[0:n])

    n_tckl_h = h_players.sort_values(tackling_attr, ascending=False)[tackling_attr][0:n]
    n_tckl_sum_h = [st + sl for st, sl in zip(n_tckl_h["Stand_Tackle"], n_tckl_h["Slide_Tackle"])]
    top_n_tackling_avg_h = sum(n_tckl_sum_h) / len(n_tckl_sum_h)
    n_tckl_a = a_players.sort_values(tackling_attr, ascending=False)[tackling_attr][0:n]
    n_tckl_sum_a = [st + sl for st, sl in zip(n_tckl_a["Stand_Tackle"], n_tckl_a["Slide_Tackle"])]
    top_n_tackling_avg_a = sum(n_tckl_sum_a) / len(n_tckl_sum_a)

    for attr in phys_ment_attributes:
        h_a = np.average(h_players.loc[~h_players["Positions"].str.contains("GK"), attr].values)
        a_a = np.average(a_players.loc[~a_players["Positions"].str.contains("GK"), attr].values)
        players_features.append(h_a - a_a)

    for attr in ball_skils_attr:
        top_n_attribute_avg_h = np.average(h_players.sort_values(attr, ascending=False)[attr][0:n])
        n_diff_h = top_n_attribute_avg_h - top_n_tackling_avg_a
        top_n_attribute_avg_a = np.average(a_players.sort_values(attr, ascending=False)[attr][0:n])
        n_diff_a = top_n_attribute_avg_a - top_n_tackling_avg_h
        players_features.append(n_diff_h - n_diff_a)

    for attr in pass_attributes:
        top_n_attribute_avg_h = np.average(h_players.sort_values(attr, ascending=False)[attr][0:n])
        n_diff_h = top_n_attribute_avg_h - top_n_marking_avg_a
        top_n_attribute_avg_a = np.average(a_players.sort_values(attr, ascending=False)[attr][0:n])
        n_diff_a = top_n_attribute_avg_a - top_n_marking_avg_h
        players_features.append(n_diff_h - n_diff_a)

    for attr in attack_attributes:
        h_a = np.average(h_players.loc[~h_players["Positions"].str.contains("GK"), attr].values)
        h_a_diff_gk = h_a - gk_avg_a
        a_a = np.average(a_players.loc[~a_players["Positions"].str.contains("GK"), attr].values)
        a_a_diff_gk = a_a - gk_avg_h
        players_features.append(h_a_diff_gk - a_a_diff_gk)
    
    return players_features

In [134]:
def create_features(k = 5):
    dataset = []
    matches_after_k_round = all_matches.loc[all_matches["Round"] > k]
    for i, match in enumerate(matches_after_k_round.itertuples()):
        
        if i % 20 == 0:
            print(i)
        
        row = []
        season = match.Season
        rnd = match.Round
        fifa_version = int(season.split("/")[1][-2:])
        home_team, away_team = match.HomeTeam, match.AwayTeam

        home_team_fifa_data = clubs.loc[(clubs["FIFA"] == fifa_version) & (clubs["Name"].str.contains(home_team))]
        away_team_fifa_data = clubs.loc[(clubs["FIFA"] == fifa_version) & (clubs["Name"].str.contains(away_team))]

        table = tables[season][rnd - 1]

        home_team_table_data = table.loc[table["Team"] == home_team]
        away_team_table_data = table.loc[table["Team"] == away_team]

        last_k_table = get_last_k_rounds_table(season, rnd - 1, k)

        h_last_k_table_data = last_k_table.loc[table["Team"] == home_team]
        a_last_k_table_data = last_k_table.loc[table["Team"] == away_team]

        att_diff = int(home_team_fifa_data.Attack) - int(away_team_fifa_data.Attack) 
        mid_diff = int(home_team_fifa_data.Midfield) - int(away_team_fifa_data.Midfield)
        def_diff = int(home_team_fifa_data.Defence) - int(away_team_fifa_data.Defence)
        rat_diff = int(home_team_fifa_data.Rating) - int(away_team_fifa_data.Rating)
        bud_diff = int(home_team_fifa_data.TransferBudget) - int(away_team_fifa_data.TransferBudget)
        h_gf, h_ga = int(home_team_table_data.GF), int(home_team_table_data.GA)
        a_gf, a_ga = int(away_team_table_data.GF), int(away_team_table_data.GA)
        g_diff = (h_gf - h_ga) - (a_gf - h_ga)
        points_diff = int(home_team_table_data.Points) - int(away_team_table_data.Points)
        last_k_corners_diff = (int(h_last_k_table_data.Corners) - int(a_last_k_table_data.Corners)) / k
        last_k_goals_diff = (int(h_last_k_table_data.GF) - int(a_last_k_table_data.GF)) / k
        last_k_shots_diff = (int(h_last_k_table_data.ShotsTarget) - int(a_last_k_table_data.ShotsTarget)) / k
        last_k_offsides_diff = (int(h_last_k_table_data.Offsides) - int(a_last_k_table_data.Offsides)) / k
        last_k_possession_diff = (int(h_last_k_table_data.Possession) - int(a_last_k_table_data.Possession)) / k
        last_k_fouls_diff = (int(h_last_k_table_data.FoulsCommited) - int(a_last_k_table_data.FoulsCommited)) / k
        last_k_points_diff = (int(h_last_k_table_data.Points) - int(a_last_k_table_data.Points)) / k
        last_k_points_w_diff = weighted_points(season, home_team, rnd, k) - weighted_points(season, away_team, rnd, k)
        
        form_diff_01 = float(home_team_table_data["Form_0.1"]) - float(away_team_table_data["Form_0.1"])
        form_diff_033 = float(home_team_table_data["Form_0.33"]) - float(away_team_table_data["Form_0.33"])
        form_diff_05 = float(home_team_table_data["Form_0.5"]) - float(away_team_table_data["Form_0.5"])
        form_diff_066 = float(home_team_table_data["Form_0.66"]) - float(away_team_table_data["Form_0.66"])
        
        fifa_players_features = get_players_features(match)
        
        result = "H"
        if match.GoalsHome < match.GoalsAway:
            result = "A"
        elif match.GoalsHome == match.GoalsAway:
            result = "D"
        
        resultGoals = match.GoalsHome - match.GoalsAway

        row = [season, rnd, home_team, away_team, form_diff_01, form_diff_033,
               form_diff_05, form_diff_066, last_k_points_diff,
               last_k_shots_diff, last_k_goals_diff, last_k_corners_diff,
               last_k_offsides_diff, last_k_possession_diff, last_k_fouls_diff,
               att_diff, mid_diff, def_diff, rat_diff, bud_diff, g_diff, points_diff,
               last_k_points_w_diff, *fifa_players_features, result, resultGoals]

        dataset.append(row)
    
    return dataset

In [135]:
def export_features_to_csv(dataset, k):
    columns = ["Season", "Round", "Home", "Away", "FormDiff_01", "FormDiff_033",
               "FormDiff_05","FormDiff_066", "L_k_Pts_Diff",
               "L_k_St_A_Diff", "L_k_Gls_A_Diff", "L_k_cor_A_Diff",
               "L_k_Off_A_Diff", "L_k_Poss_A_Diff", "L_k_Fls_A_Diff",
               "Att_Diff", "Mid_Diff", "Def_Diff", "Rat_Diff", "Bud_Diff",
               "Goal_Diff", "Pts_Diff", "W_Pts_Diff", "Accel_Diff", "Stam_Diff",
               "Str_Diff", "Balance_Diff", "Sprint_Diff", "Agility_Diff",
               "Jumping_Diff", "Aggression_Diff", "Reactions_Diff",
               "Att._Position_Diff", "Interceptions_Diff", "Vision_Diff",
               "Ball_Ctrl_Diff", "Dribbling_Diff", "Crossing_Diff",
               "Short_Pass_Diff", "Long_Pass_Diff", "Heading_Diff",
               "Shot_Power_Diff", "Finishing_Diff", "Long_Shots_Diff",
               "Curve_Diff", "FK_Acc._Diff", "Penalties_Diff",
               "Volleys_Diff", "Result", "ResultGoals"]
    
    df = pd.DataFrame(dataset, columns = columns)
    df.to_csv(f"data/learning_data_{k}.csv")
    
    return df

In [136]:
k = 5
data = create_features(k)

0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560
580
600
620
640
660
680
700
720
740
760
780
800
820
840
860
880
900
920
940
960
980
1000
1020
1040
1060
1080
1100
1120
1140
1160
1180
1200
1220
1240
1260
1280
1300
1320
1340
1360
1380
1400
1420
1440
1460
1480
1500
1520
1540
1560
1580
1600
1620
1640
1660
1680
1700
1720
1740
1760
1780
1800
1820
1840
1860
1880
1900
1920
1940
1960
1980
2000
2020
2040
2060
2080
2100
2120
2140
2160


In [137]:
df = export_features_to_csv(data, k)
df

Unnamed: 0,Season,Round,Home,Away,FormDiff_01,FormDiff_033,FormDiff_05,FormDiff_066,L_k_Pts_Diff,L_k_St_A_Diff,...,Long_Pass_Diff,Heading_Diff,Shot_Power_Diff,Finishing_Diff,Long_Shots_Diff,Curve_Diff,FK_Acc._Diff,Penalties_Diff,Volleys_Diff,Result
0,2011/2012,6,Freiburg,Stuttgart,-0.198720,-0.596312,-0.750000,-0.713728,-0.6,-0.6,...,0.4,-10.934722,-9.059722,-10.802778,-2.087500,2.023611,-37.705556,-46.775000,-41.448611,A
1,2011/2012,6,Bayer Leverkusen,Köln,0.386190,1.087157,1.343750,1.308677,1.2,1.2,...,17.6,4.446154,-1.946154,5.507692,8.930769,7.846154,-9.807692,-9.315385,-7.630769,A
2,2011/2012,6,Hamburger SV,Borussia Mönchengladbach,-0.504000,-1.034748,-1.000000,-0.753984,-1.8,-2.0,...,1.2,-0.700000,-1.900000,-9.000000,-7.100000,-4.300000,-10.200000,-9.300000,-2.600000,A
3,2011/2012,6,Nürnberg,Werder Bremen,-0.193600,-0.647411,-1.000000,-1.275395,-0.6,-1.0,...,5.8,2.624762,-0.818095,-6.046667,-11.289524,-13.160952,-16.018095,-3.703810,-16.575238,D
4,2011/2012,6,Hoffenheim,Wolfsburg,0.182200,0.287322,-0.125000,-1.036073,0.6,-1.6,...,-7.4,-0.482500,5.852500,23.597500,14.877500,27.892500,-13.700000,-20.860000,-18.690000,H
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2164,2020/2021,14,Arminia Bielefeld,Borussia Mönchengladbach,-0.431660,-0.514908,-0.410278,-0.272522,0.0,-0.2,...,-17.8,-15.200000,-19.400000,-19.900000,-22.800000,-27.600000,-23.200000,-22.200000,-24.800000,A
2165,2020/2021,14,Hertha BSC,Schalke 04,0.401421,0.555587,0.374146,0.180599,1.0,0.4,...,-5.4,14.400000,18.075000,11.525000,15.875000,10.625000,9.700000,8.475000,14.475000,H
2166,2020/2021,14,Stuttgart,RB Leipzig,-0.471471,-0.909788,-0.958740,-0.903988,-0.8,1.4,...,-10.6,-5.000000,-15.400000,-20.100000,-21.700000,-20.800000,-30.100000,-14.900000,-20.400000,A
2167,2020/2021,14,Borussia Dortmund,Wolfsburg,-0.349894,-1.109683,-1.424805,-1.354143,-1.2,-0.2,...,16.6,3.666667,3.677778,4.900000,2.922222,11.855556,11.611111,8.877778,9.755556,H
