In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

path = r'/Users/tom/Documents/Coding/AiCore/Projects/4. Football Match Outcome Predictor /Cleaned Datasets/premier_league_features_df.csv'
path_2 = r'/Users/tom/Documents/Coding/AiCore/Projects/4. Football Match Outcome Predictor /Cleaned Datasets/combined_elo_with_features_df.csv'
path_3 = r'/Users/tom/Documents/Coding/AiCore/Projects/4. Football Match Outcome Predictor /Cleaned Datasets/ML_split/epl_spd_populated_df.csv'
path_4 = r'/Users/tom/Documents/Coding/AiCore/Projects/4. Football Match Outcome Predictor /Cleaned Datasets/ML_split/segunda_division_populated_df.csv'



premier_league_features_df = pd.read_csv(path)
combined_elo_with_features_df = pd.read_csv(path_2)
epl_spd_populated_df = pd.read_csv(path_3)
segunda_division_populated_df = pd.read_csv(path_4)




In [3]:
# Group into decades

def _split_dataset_into_decades(combined_elo_df):
    '''split_dataset_into_decades 
    Function to split dataset into three datasets each approximately 10 years long
    '''

    combined_elo_df = combined_elo_df.sort_values(by=['Season'], ascending=[True])

    decade_1 = (combined_elo_df["Season"].unique()[0:11])
    decade_2 = (combined_elo_df["Season"].unique()[10:21])
    decade_3 = (combined_elo_df["Season"].unique()[20:32])

    season_grouped_df = combined_elo_df.groupby("Season")

    decade_1_df = pd.DataFrame()
    decade_2_df = pd.DataFrame()
    decade_3_df = pd.DataFrame()

    for season in decade_1:
        decade_df = season_grouped_df.get_group(season)
        decade_1_df = pd.concat([decade_1_df, decade_df], axis=0, ignore_index=True)

    for season in decade_2:
        decade_df = season_grouped_df.get_group(season)
        decade_2_df = pd.concat([decade_2_df, decade_df], axis=0, ignore_index=True)

    for season in decade_3:
        decade_df = season_grouped_df.get_group(season)
        decade_3_df = pd.concat([decade_3_df, decade_df], axis=0, ignore_index=True)


    return decade_1_df, decade_2_df, decade_3_df

def _group_into_divisions(decade_df, division):
    '''_group_into_leagues _summary_

    Arguments:
        decade_df -- _description_
        division -- _description_
    '''

    league_grouped_df = decade_df.groupby("League")
    division_df = league_grouped_df.get_group(division)

    return division_df

def _create_average_feature_columns(split_df):
    '''create_new_feature_columns 
    Function to create columns filled with zeros for each required feature

    Arguments:
        combined_local_elo_df -- _description_
    '''
    # Get an average goals_scored and conceeded

    split_df["home_team_average_goals_scored_per_game"] = 0
    split_df["home_team_average_goals_conceeded_per_game"] = 0
    split_df["away_team_average_goals_scored_per_game"] = 0
    split_df["away_team_average_goals_conceeded_per_game"] = 0

    return split_df

def _create_split_averages_summary_template(split_df):
    '''create_summary_template 
    Function to create auxiliary template summary per decade

    Arguments:
        decade_df -- _description_

    Returns:
        _description_
    '''
    summary_df_template = pd.DataFrame(columns=
    [
        'team_name', 'league', 'total_goals_scored', 'total_goals_conceeded', 'games_played', 'goals_scored_per_game', 
        'goals_conceeded_per_game'
        ])
    team_group = split_df.groupby("Home_Team")
    home_teams = list(split_df["Home_Team"].unique())
    away_teams = list(split_df["Away_Team"].unique())
    missing_teams = list(set(away_teams) - set(home_teams))
    team_list = home_teams + missing_teams

    for team_name in team_list:
        league = team_group.first()["League"].unique()[0]
        team_stats_dict = {
            'team_name': team_name, 
            'league': league, 'total_goals_scored': 0, 
            'total_goals_conceeded': 0, 'games_played': 0, 'goals_scored_per_game':0, 
            'goals_conceeded_per_game':0
            }
        team_stats_dict_df = pd.DataFrame([team_stats_dict])
        summary_df_template = pd.concat([summary_df_template, team_stats_dict_df], ignore_index=True)


    print("Summary Template Complete")
    return summary_df_template

def populate_df_with_averages(split_df, summary_df_template):
    '''populate_df_with_averages 
    Function to compute and add average goals per game to combined dataframe.

    This is achieved by:
    1. Ordering split df by season and round.
    2. Splitting df into seasons
    3. For each season splitting into round
    4a. In each round split, if data exists, inputting into split df
    4b. This is put here so that averages for the round 1 before are put into round 2
    5. For each row in the round df goals per game is computed
    6. This is then added to a auxilary df where a running count of the averages are stored
    7. This is then updated into the split df for each match per round


    Arguments:
        split_df -- _description_

    Returns:
        _description_
    '''

    split_df = split_df.sort_values(['Season', 'Round'], ascending=[True, True])

    season_df_grouped = split_df.groupby("Season")

    for season in split_df["Season"].unique():
        season_df = season_df_grouped.get_group(season)
        round_df_grouped = season_df.groupby("Round")
        for round in season_df["Round"].unique():
            print(f"Starting round {round}, season {season}")

            round_df = round_df_grouped.get_group(round)
            # Set the values of the previous match
            for team in list(summary_df_template["team_name"]):

                populated_season_df = _add_home_team_feature_total_to_df("home_team_average_goals_scored_per_game","goals_scored_per_game", team, season_df, summary_df_template, round)
                populated_season_df = _add_home_team_feature_total_to_df("home_team_average_goals_conceeded_per_game","goals_conceeded_per_game", team, season_df, summary_df_template, round)
        
                populated_season_df = _add_away_team_feature_total_to_df("away_team_average_goals_scored_per_game","goals_scored_per_game", team, season_df, summary_df_template, round)
                populated_season_df = _add_away_team_feature_total_to_df("away_team_average_goals_conceeded_per_game","goals_conceeded_per_game", team, season_df, summary_df_template, round)
            

            for index, row in round_df.iterrows():

                home_team_name = row["Home_Team"]
                away_team_name = row["Away_Team"]
                league = row["League"]
                season=row["Season"]
                round=row["Round"]
                home_goals_scored = row["Home_Goals"]
                away_goals_scored = row["Away_Goals"]

                #Home Team
                summary_df_template = _goals_and_averages_computer(summary_df_template, home_team_name, home_goals_scored, 
                away_goals_scored, league)

                #Away Team
                summary_df_template = _goals_and_averages_computer(summary_df_template, away_team_name, away_goals_scored, 
                home_goals_scored, league)
        
        
        split_df = merge_season_df_with_combined_df(split_df, populated_season_df)

        print(f"Average Goals computed and added to df for season {season}")

    return split_df


def _goals_and_averages_computer(summary_df_template, team, goal_option_1, goal_option_2, league):
    '''_goals_and_averages_computer _summary_

    Arguments:
        summary_df_template -- _description_
        team -- _description_
        goal_option_1 -- _description_
        goal_option_2 -- _description_
        league -- _description_

    Returns:
        _description_
    '''

    summary_df_template.loc[((summary_df_template.team_name == team) & (
    summary_df_template.league == league)), "total_goals_scored"] += goal_option_1

    summary_df_template.loc[((summary_df_template.team_name == team) & (
    summary_df_template.league == league)), "total_goals_conceeded"] += goal_option_2

    summary_df_template.loc[((summary_df_template.team_name == team) & (
    summary_df_template.league == league)), "games_played"] += 1

    summary_df_template.loc[((summary_df_template.team_name == team) & (
    summary_df_template.league == league)), "goals_scored_per_game"] = summary_df_template.loc[((summary_df_template.team_name == team) & (
    summary_df_template.league == league)), "total_goals_scored"] / summary_df_template.loc[((summary_df_template.team_name == team) & (
    summary_df_template.league == league)), "games_played"]

    summary_df_template.loc[((summary_df_template.team_name == team) & (
    summary_df_template.league == league)), "goals_conceeded_per_game"] = summary_df_template.loc[((summary_df_template.team_name == team) & (
    summary_df_template.league == league)), "total_goals_conceeded"] / summary_df_template.loc[((summary_df_template.team_name == team) & (
    summary_df_template.league == league)), "games_played"]

    return summary_df_template


def _add_home_team_feature_total_to_df(feature,summary_feature, team, df, summary_df_template, round):
    '''_add_home_team_feature_total_to_df _summary_

    Arguments:
        feature -- _description_
        summary_feature -- _description_
        team -- _description_
        df -- _description_
        summary_df_template -- _description_
        round -- _description_

    Returns:
        _description_
    '''
    
    df.loc[(df.Home_Team == team) & ((
                df.Round == round)), feature] = summary_df_template.loc[(
                    summary_df_template.team_name == team), summary_feature].values[0]
    return df

def _add_away_team_feature_total_to_df(feature,summary_feature, team, df, summary_df_template, round):
    '''_add_away_team_feature_total_to_df _summary_

    Arguments:
        feature -- _description_
        summary_feature -- _description_
        team -- _description_
        df -- _description_
        summary_df_template -- _description_
        round -- _description_

    Returns:
        _description_
    '''
    
    df.loc[((df.Away_Team == team) & (
                df.Round == round)), feature] = summary_df_template.loc[((
                    summary_df_template.team_name == team)), summary_feature].values[0]
    return df

def merge_season_df_with_combined_df(split_df, populated_season_df):
    '''merge_season_df_with_combined_df _summary_

    Arguments:
        df -- _description_
        populated_season_df -- _description_

    Returns:
        _description_
    '''
    split_df.update(populated_season_df, overwrite=True)

    return split_df



# Function to remove partially completed seasons

def _remove_incomplete_seasons(missing_information_list, combined_elo_with_features_df):

    missing_info_set = set(missing_information_list)
    missing_info_deduplicated_list = list(missing_info_set)
    missing_info_deduplicated_list
    for item in missing_info_deduplicated_list:
        league = item.split(": ")[0]
        season = item.split(": ")[1]
        combined_elo_with_features_df.drop(combined_elo_with_features_df.index[(combined_elo_with_features_df.League == league)&(
                        combined_elo_with_features_df.Season == season)], inplace=True)
    
    return combined_elo_with_features_df

def _drop_loader_season(split_df, loader_season):
    '''drop_loader_season 
    Each season includes the year before for the average goals p/game statistic.
    This season is then dropped from the dataset so that the early rounds of each 
    season are no so heavily wegithed on the game prior


    Arguments:
        split_df -- _description_
        loader_season -- _description_

    Returns:
        _description_
    '''
    

    split_df.drop(split_df.index[(split_df.Season == loader_season)], inplace=True)

    return split_df

def _prepare_dataset_for_ML_models(df):


    df.drop(axis=1, columns=['Result','Round', 'Link', 'Home_Win', 'Away_Win', 'Home_Team', 'Away_Team', 'Season', 'League', 'City', 'Country', 'Stadium', 'Pitch', 'Capacity'], inplace=True)
    df["Home_Result"] = "unknown"
    df.loc[(df.Home_Goals > df.Away_Goals), "Home_Result"] = "win"
    df.loc[(df.Home_Goals < df.Away_Goals), "Home_Result"] = "loss"
    df.loc[(df.Home_Goals == df.Away_Goals), "Home_Result"] = "draw"
    df.drop(axis=1, columns=['Home_Goals', 'Away_Goals'], inplace=True)
    df.dropna(subset=["ELO_home"], inplace = True)
    
    return df

## For shortened decade and english and spanish top flight datasets

In [None]:
# Split dataset into decades
decade_1_df, decade_2_df, decade_3_df = _split_dataset_into_decades(epl_spd_populated_df)

# Group into divisions

premiership = 'premier_league'
primera_division = 'primera_division'

premiership_decade_3_df = _group_into_divisions(decade_3_df, premiership)
primera_division_decade_3_df = _group_into_divisions(decade_3_df, primera_division) 

# for premiership:

premiership_decade_3_with_features_df = _create_average_feature_columns(premiership_decade_3_df)
premiership_summary_df_template = _create_split_averages_summary_template(premiership_decade_3_with_features_df)
populated_premiership_decade_3_df = populate_df_with_averages(premiership_decade_3_with_features_df, premiership_summary_df_template)

primera_division_decade_3_with_features_df = _create_average_feature_columns(primera_division_decade_3_df)
primera_division_summary_df_template = _create_split_averages_summary_template(primera_division_decade_3_with_features_df)
populated_primera_division_decade_3_df = populate_df_with_averages(primera_division_decade_3_with_features_df, primera_division_summary_df_template)

populated_premiership_decade_3_loader_dropped_df = _drop_loader_season(populated_premiership_decade_3_df, 2010)
populated_primera_division_decade_3_loader_dropped_df = _drop_loader_season(populated_primera_division_decade_3_df, 2010)


In [31]:
populated_premiership_decade_3_ml_df = _prepare_dataset_for_ML_models(populated_premiership_decade_3_loader_dropped_df)
populated_primera_division_decade_3_ml_df = _prepare_dataset_for_ML_models(populated_primera_division_decade_3_loader_dropped_df)

In [39]:
#path_1 = r'/Users/tom/Documents/Coding/AiCore/Projects/4. Football Match Outcome Predictor /Cleaned Datasets/ML_split/classification_premiership_decade_3_dataset.csv'
#path_2 = r'/Users/tom/Documents/Coding/AiCore/Projects/4. Football Match Outcome Predictor /Cleaned Datasets/ML_split/classification_primera_division_decade_3_dataset.csv'

#populated_premiership_decade_3_ml_df.to_csv(path_1, index=False)
#populated_primera_division_decade_3_ml_df.to_csv(path_2, index=False)


In [29]:
populated_premiership_decade_3_loader_dropped_df

Unnamed: 0,Home_Team,Away_Team,Result,Link,Season,Round,League,Home_Goals,Away_Goals,Home_Win,Away_Win,City,Country,Stadium,Capacity,Pitch,ELO_home,ELO_away,home_team_total_goals_scored_so_far,home_team_total_goals_conceeded_so_far,home_team_current_win_streak,home_team_current_loss_streak,home_team_total_points_so_far,home_team_current_goal_drought,home_team_total_wins_so_far,away_team_total_goals_scored_so_far,away_team_total_goals_conceeded_so_far,away_team_current_win_streak,away_team_current_loss_streak,away_team_total_points_so_far,away_team_current_goal_drought,away_team_total_wins_so_far,home_team_average_goals_scored_per_game,home_team_average_goals_conceeded_per_game,away_team_average_goals_scored_per_game,away_team_average_goals_conceeded_per_game
779,Liverpool,Arsenal,1-1,https://www.besoccer.com/match/liverpool/arsen...,2011.0,1.0,premier_league,1.0,1.0,False,False,Liverpool,England,Anfield,54074.0,Natural,93.0,93.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.605263,0.921053,2.184211,1.078947
785,Man. Utd,Newcastle,3-0,https://www.besoccer.com/match/manchester-unit...,2011.0,1.0,premier_league,3.0,0.0,True,False,Mánchester,England,Old Trafford,76212.0,Natural,97.0,77.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.263158,0.736842,0.000000,0.000000
815,Tottenham Hotspur,Man. City,0-0,https://www.besoccer.com/match/tottenham-hotsp...,2011.0,1.0,premier_league,0.0,0.0,False,False,Londres,England,Tottenham Hotspur Stadium,62062.0,Natural,86.0,83.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.763158,1.078947,1.921053,1.184211
838,Aston Villa,West Ham,3-0,https://www.besoccer.com/match/aston-villa-fc/...,2011.0,1.0,premier_league,3.0,0.0,True,False,Birmingham,England,Villa Park,42788.0,Natural,84.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.368421,1.026316,1.236842,1.736842
908,Chelsea,West Bromwich Albion,6-0,https://www.besoccer.com/match/chelsea-fc/west...,2011.0,1.0,premier_league,6.0,0.0,True,False,Londres,England,Stamford Bridge,41841.0,Natural,96.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.710526,0.842105,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8722,West Ham,Leicester,3-2,https://www.besoccer.com/match/west-ham-united...,2021.0,31.0,premier_league,3.0,2.0,True,False,Londres,England,London Stadium,80000.0,Natural,78.0,84.0,48.0,37.0,1.0,0.0,52.0,0.0,15.0,53.0,34.0,0.0,1.0,56.0,1.0,17.0,1.287805,1.521951,1.507752,1.306202
8823,Brighton & Hove Albion,Everton,0-0,https://www.besoccer.com/match/brighton-amp-ho...,2021.0,31.0,premier_league,0.0,0.0,False,False,,,,,,71.0,80.0,33.0,38.0,0.0,1.0,32.0,0.0,7.0,41.0,38.0,0.0,0.0,47.0,0.0,14.0,0.979167,1.430556,1.407159,1.252796
8853,Liverpool,Aston Villa,2-1,https://www.besoccer.com/match/liverpool/aston...,2021.0,31.0,premier_league,2.0,1.0,True,False,Liverpool,England,Anfield,54074.0,Natural,94.0,75.0,51.0,36.0,1.0,0.0,49.0,0.0,14.0,42.0,31.0,1.0,0.0,44.0,0.0,13.0,1.877232,1.073661,1.093093,1.537538
8864,Crystal Palace,Chelsea,1-4,https://www.besoccer.com/match/crystal-palace-...,2021.0,31.0,premier_league,1.0,4.0,False,True,Londres,England,Selhurst Park,26309.0,Natural,74.0,91.0,32.0,48.0,0.0,0.0,38.0,0.0,10.0,46.0,30.0,0.0,1.0,51.0,0.0,14.0,1.108108,1.415541,1.875000,1.017857


In [36]:
populated_premiership_decade_3_ml_df

Unnamed: 0,ELO_home,ELO_away,home_team_total_goals_scored_so_far,home_team_total_goals_conceeded_so_far,home_team_current_win_streak,home_team_current_loss_streak,home_team_total_points_so_far,home_team_current_goal_drought,home_team_total_wins_so_far,away_team_total_goals_scored_so_far,away_team_total_goals_conceeded_so_far,away_team_current_win_streak,away_team_current_loss_streak,away_team_total_points_so_far,away_team_current_goal_drought,away_team_total_wins_so_far,home_team_average_goals_scored_per_game,home_team_average_goals_conceeded_per_game,away_team_average_goals_scored_per_game,away_team_average_goals_conceeded_per_game,Home_Result
779,93.0,93.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.605263,0.921053,2.184211,1.078947,draw
785,97.0,77.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.263158,0.736842,0.000000,0.000000,win
815,86.0,83.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.763158,1.078947,1.921053,1.184211,draw
838,84.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.368421,1.026316,1.236842,1.736842,win
908,96.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.710526,0.842105,0.000000,0.000000,win
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8722,78.0,84.0,48.0,37.0,1.0,0.0,52.0,0.0,15.0,53.0,34.0,0.0,1.0,56.0,1.0,17.0,1.287805,1.521951,1.507752,1.306202,win
8823,71.0,80.0,33.0,38.0,0.0,1.0,32.0,0.0,7.0,41.0,38.0,0.0,0.0,47.0,0.0,14.0,0.979167,1.430556,1.407159,1.252796,draw
8853,94.0,75.0,51.0,36.0,1.0,0.0,49.0,0.0,14.0,42.0,31.0,1.0,0.0,44.0,0.0,13.0,1.877232,1.073661,1.093093,1.537538,win
8864,74.0,91.0,32.0,48.0,0.0,0.0,38.0,0.0,10.0,46.0,30.0,0.0,1.0,51.0,0.0,14.0,1.108108,1.415541,1.875000,1.017857,loss


For Full ELO Dataset

In [None]:
combined_elo_with_partial_seasons_removed_df = _remove_incomplete_seasons(missing_information_list, combined_elo_with_features_df)
combined_elo_numerical_df = _prepare_dataset_for_ML_models(combined_elo_with_partial_seasons_removed_df)



## For the segunda division

In [9]:
# Split dataset into decades
decade_1_df, decade_2_df, decade_3_df = _split_dataset_into_decades(segunda_division_populated_df)


segunda_division_decade_3_with_features_df = _create_average_feature_columns(decade_3_df)
segunda_division_summary_df_template = _create_split_averages_summary_template(segunda_division_decade_3_with_features_df)
populated_segunda_division_decade_3_df = populate_df_with_averages(segunda_division_decade_3_with_features_df, segunda_division_summary_df_template)

populated_segunda_division_decade_3_loader_dropped_df = _drop_loader_season(populated_segunda_division_decade_3_df, 2010)


Summary Template Complete
Starting round 1.0, season 2010.0
Starting round 2.0, season 2010.0
Starting round 3.0, season 2010.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[(df.Home_Team == team) & ((
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[((df.Away_Team == team) & (


Starting round 4.0, season 2010.0
Starting round 5.0, season 2010.0
Starting round 6.0, season 2010.0
Starting round 7.0, season 2010.0
Starting round 8.0, season 2010.0
Starting round 9.0, season 2010.0
Starting round 10.0, season 2010.0
Starting round 11.0, season 2010.0
Starting round 12.0, season 2010.0
Starting round 13.0, season 2010.0
Starting round 14.0, season 2010.0
Starting round 15.0, season 2010.0
Starting round 16.0, season 2010.0
Starting round 17.0, season 2010.0
Starting round 18.0, season 2010.0
Starting round 19.0, season 2010.0
Starting round 20.0, season 2010.0
Starting round 21.0, season 2010.0
Starting round 22.0, season 2010.0
Starting round 23.0, season 2010.0
Starting round 24.0, season 2010.0
Starting round 25.0, season 2010.0
Starting round 26.0, season 2010.0
Starting round 27.0, season 2010.0
Starting round 28.0, season 2010.0
Starting round 29.0, season 2010.0
Starting round 30.0, season 2010.0
Starting round 31.0, season 2010.0
Starting round 32.0, seaso

In [12]:
populated_segunda_division_decade_3_df.loc[(populated_segunda_division_decade_3_df.Season == 2021) & ((populated_segunda_division_decade_3_df.Home_Team == "Mirandés")|(populated_segunda_division_decade_3_df.Away_Team == "Mirandés"))]
segunda_division_summary_df_template

Unnamed: 0,team_name,league,total_goals_scored,total_goals_conceeded,games_played,goals_scored_per_game,goals_conceeded_per_game
0,FC Cartagena,segunda_division,177.0,215.0,160,1.10625,1.34375
1,Recreativo,segunda_division,269.0,300.0,252,1.06746,1.190476
2,Real Murcia,segunda_division,196.0,218.0,168,1.166667,1.297619
3,Huesca,segunda_division,387.0,380.0,336,1.151786,1.130952
4,Real Betis,segunda_division,219.0,122.0,126,1.738095,0.968254
...,...,...,...,...,...,...,...
57,Extremadura,segunda_division,86.0,106.0,84,1.02381,1.261905
58,Málaga,segunda_division,118.0,102.0,118,1.0,0.864407
59,Fuenlabrada,segunda_division,83.0,77.0,76,1.092105,1.013158
60,UD Logroñés,segunda_division,25.0,42.0,34,0.735294,1.235294


In [16]:
populated_segunda_division_decade_3_ml_df = _prepare_dataset_for_ML_models(populated_segunda_division_decade_3_loader_dropped_df)



In [17]:
path_sd = r'/Users/tom/Documents/Coding/AiCore/Projects/4. Football Match Outcome Predictor /Cleaned Datasets/ML_split/classification_segunda_division_decade_3_dataset.csv'

populated_segunda_division_decade_3_loader_dropped_df.to_csv(path_sd, index=False)
