### Building league standings
This notebook is used to loop through all games and with the help of helpers methods generate team information later used for performance dependent context.

Two steps:

1. Determine Match outcome - Win, Loss, Draw and the scored goals
2. Build League Standings for each game week - Based on the scored goals and outcomes we build the league standings for each week. With this we can determine team positions on the leaderboard as well as their performance as of that week

In [1]:
import os
import pandas as pd
import numpy as np
from datetime import datetime

### 1. Determine Match outcome
From our data it is unclear who won and how many goals they scored which makes it necessary to extract this information ourselves

In [8]:
from helpers.league_standings import get_match_outcome, track_match_score
from helpers.data_handlers import get_ordner

amount_gameweeks = 34
type_data = "EventData"

input_ordner = get_ordner("original")
output_ordner = get_ordner("modified")

# Loop through each gameweek directory
for gameweek in range(1, amount_gameweeks + 1):
    
    if not os.path.exists(output_ordner + str(gameweek)):
        os.makedirs(output_ordner + str(gameweek))
    
    # Get the files in the directory
    directory = os.listdir(input_ordner + str(gameweek))
    
    # Loop through each file in the directory
    for fname in directory:
        
        if "EventData" in fname:
            print("Working on file: ", fname)
            # Create the file path
            file_path = os.path.join(input_ordner + str(gameweek) + "/" + fname)
            # Load the data
            event_df = pd.read_csv(file_path)

            ## In the dataframe there are two columns that are unnecessary for the final output
            # these get added when loading in the data are actually the index of the dataframe
            # so we drop them here
            try:
                event_df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'], inplace=True)
            except:
                pass
    
            ## Working on the loaded dataframe
            # Calculating the winner and loser of the match
            # Generate the current timestamp in a suitable format for a filename
            current_time = datetime.now().strftime("%Y-%m-%d_%H")
            error_log_filename = f"/error_log_{current_time}"
        
            event_df = get_match_outcome(event_df, error_log_path=output_ordner + str(gameweek) + error_log_filename + "_outcome.txt", update=True, n = 15, validate_goal= True)
            
            event_df = track_match_score(event_df, error_log_path=output_ordner + str(gameweek) + error_log_filename + "_score.txt", n = 15)
            
            # Save the dataframe as a csv file - the original file will be overwritten
            event_df.to_csv(output_ordner + str(gameweek) + "/" + fname, index=False)
            

Working on file:  2023_2024_Bundesliga_GW1_Augsburg - Borussia M.Gladbach_EventData.csv
Working on file:  2023_2024_Bundesliga_GW1_Borussia Dortmund - FC Koln_EventData.csv
Working on file:  2023_2024_Bundesliga_GW1_Eintracht Frankfurt - Darmstadt_EventData.csv
Working on file:  2023_2024_Bundesliga_GW1_Hoffenheim - Freiburg_EventData.csv
Own goal/s detected. Reassigning goal to opposing team...
Error getting teamId for own goal: 'numpy.int64' object has no attribute 'unique'.
This is due to a single own goal being detected.
Team 1211 goals reassigned to team 50 (Freiburg)
Working on file:  2023_2024_Bundesliga_GW1_Leverkusen - RBL_EventData.csv
Working on file:  2023_2024_Bundesliga_GW1_Stuttgart - Bochum_EventData.csv
Working on file:  2023_2024_Bundesliga_GW1_Union Berlin - Mainz_EventData.csv
Working on file:  2023_2024_Bundesliga_GW1_Werder Bremen - Bayern_EventData.csv
Working on file:  2023_2024_Bundesliga_GW1_Wolfsburg - FC Heidenheim_EventData.csv
Working on file:  2023_2024_B

### 2. Build League Standings for each game week
We need the csv with all teams from the league to initialize the empty standings.
- if i_have_all_teams = True - then we load it in
- if i_have_all_teams = False and i_have_all_events = True - we extract it from the all_events.csv file and then save it
- if i_have_all_teams = False and i_have_all_events = False - please view the notebook 'building_all_data.ipynb' to build and save the 'all_events.csv'

In [2]:
i_have_all_teams = True #TODO
i_have_all_events = True #TODO

if i_have_all_teams:
    teams_df = pd.read_csv("data/all_teams.csv")
else:
    if i_have_all_events:
        all_events_df = pd.read_csv("data/all_events.csv")
        # Extract unique teamId and teamName pairs
        teams_df = all_events_df[['teamId', 'teamName']].drop_duplicates()
        teams_df.reset_index(drop=True, inplace=True)
        teams_df.to_csv("data/teams.csv", index=False)
    else:
        print("You need to have all the events data to generate the teams data. Please view the 'building_all_data.ipynb' notebook.")
        exit(1)

In [None]:
from helpers.league_standings import get_standings, process_match_results, save_standings

input_ordner = "data/Bundesliga/modified/GW"
amount_gameweeks = 34
type_data = "EventData"

# Loop through each gameweek directory
for gameweek in range(1, amount_gameweeks + 1):
    title = f"Processing Gameweek {gameweek}"
    title_width = 50
    print("-"*title_width)
    print(" " * ((title_width - len(title)) // 2) + f"{title}")
    print("-"*title_width)
    
    # Initialize or load standings
    gw_league_standings = get_standings(gameweek, input_ordner, teams_df)
    
    # Get the files in the directory
    directory = os.listdir(os.path.join(input_ordner + str(gameweek)))
    
    for fname in directory:
        if type_data in fname:
            print(f"Working on file: {fname}")
            file_path = os.path.join(input_ordner + str(gameweek), fname)
            event_df = pd.read_csv(file_path)
            
            # Filter relevant match results
            match_results = event_df[event_df['type'].isin(['MatchLost', 'MatchWon', 'MatchDraw'])]
            gw_league_standings = process_match_results(match_results, gw_league_standings)
    
    # Sort standings by points, goal difference, and goals scored
    gw_league_standings = gw_league_standings.sort_values(
        by=['points', 'goalDifference', 'goalsFor'], ascending=[False, False, False]
    ).reset_index(drop=True)

    # Save standings
    save_standings(gw_league_standings, gameweek, input_ordner)

print("-"*title_width)
print("You can find the final league standings in the 'data/Bundesliga/modified/GW' directory.\nThe files are named 'league_standings.csv'.")
print("Here is the final league standings for the last gameweek: ")

gw_league_standings

--------------------------------------------------
              Processing Gameweek 1
--------------------------------------------------
Initializing new empty for Gameweek 1...
Working on file: 2023_2024_Bundesliga_GW1_Augsburg - Borussia M.Gladbach_EventData.csv
Working on file: 2023_2024_Bundesliga_GW1_Borussia Dortmund - FC Koln_EventData.csv
Working on file: 2023_2024_Bundesliga_GW1_Eintracht Frankfurt - Darmstadt_EventData.csv
Working on file: 2023_2024_Bundesliga_GW1_Hoffenheim - Freiburg_EventData.csv
Working on file: 2023_2024_Bundesliga_GW1_Leverkusen - RBL_EventData.csv
Working on file: 2023_2024_Bundesliga_GW1_Stuttgart - Bochum_EventData.csv
Working on file: 2023_2024_Bundesliga_GW1_Union Berlin - Mainz_EventData.csv
Working on file: 2023_2024_Bundesliga_GW1_Werder Bremen - Bayern_EventData.csv
Working on file: 2023_2024_Bundesliga_GW1_Wolfsburg - FC Heidenheim_EventData.csv
League standings saved to: data/Bundesliga/modified/GW1\league_standings.csv
---------------------

Unnamed: 0,teamId,teamName,points,goalsFor,goalsAgainst,goalDifference,gamesWon,gamesDrawn,gamesLost,gamesPlayed,lastFive
0,36,Leverkusen,90,89,24,65,28,6,0,34,"['D', 'D', 'W', 'W', 'W']"
1,41,Stuttgart,73,78,39,39,23,4,7,34,"['L', 'D', 'W', 'W', 'W']"
2,37,Bayern,72,94,45,49,23,3,8,34,"['W', 'W', 'L', 'W', 'L']"
3,7614,RBL,65,77,39,38,19,8,7,34,"['W', 'W', 'D', 'D', 'D']"
4,44,Borussia Dortmund,63,68,43,25,18,9,7,34,"['D', 'L', 'W', 'L', 'W']"
5,45,Eintracht Frankfurt,47,51,50,1,11,14,9,34,"['W', 'L', 'L', 'D', 'D']"
6,1211,Hoffenheim,46,66,66,0,13,7,14,34,"['W', 'L', 'D', 'W', 'W']"
7,4852,FC Heidenheim,42,50,55,-5,10,12,12,34,"['L', 'W', 'D', 'D', 'W']"
8,42,Werder Bremen,42,48,54,-6,11,9,14,34,"['W', 'W', 'D', 'D', 'W']"
9,50,Freiburg,42,45,58,-13,11,9,14,34,"['D', 'L', 'D', 'D', 'L']"
