In [1]:
import pandas as pd
import os
from datetime import datetime, timedelta
PATH_ELO_CLUBS = './data/elo/elo-clubs/GER/'


In [2]:
df_merge = pd.read_pickle("./df_merge_club_transfer_data.pkl")
df_merge.columns

Index(['DATE', 'WEEKDAY', 'MONTH', 'SEASON', 'MATCHDAY', 'HOME_TEAM',
       'PLACE_HOME_TEAM', 'AWAY_TEAM', 'PLACE_AWAY_TEAM', 'WIN_PERC_HOME',
       'REMIS_PERC', 'WIN_PERC_AWAY', 'HOME_GOALS', 'AWAY_GOALS', 'RESULT',
       'REFEREE', 'HOME_PLAYERS_COUNT', 'HOME_PLAYERS_AVG_AGE',
       'HOME_LEGIONARIES_COUNT', 'HOME_AVG_MARKET_VALUE',
       'HOME_TOTAL_MARKET_VALUE', 'HOME_AVG_AGE_JOINING',
       'HOME_AVG_AGE_LEAVING', 'HOME_TOTAL_VALUE_JOINING_MIO',
       'HOME_TOTAL_VALUE_LEAVING_MIO', 'HOME_EXPENSES_JOINING_MIO',
       'HOME_REVENUE_LEAVING_MIO', 'AWAY_PLAYERS_COUNT',
       'AWAY_PLAYERS_AVG_AGE', 'AWAY_LEGIONARIES_COUNT',
       'AWAY_AVG_MARKET_VALUE', 'AWAY_TOTAL_MARKET_VALUE',
       'AWAY_AVG_AGE_JOINING', 'AWAY_AVG_AGE_LEAVING',
       'AWAY_TOTAL_VALUE_JOINING_MIO', 'AWAY_TOTAL_VALUE_LEAVING_MIO',
       'AWAY_EXPENSES_JOINING_MIO', 'AWAY_REVENUE_LEAVING_MIO'],
      dtype='object')

In [3]:
df_merge.iloc[0][['DATE']]
df_merge.iloc[0][['HOME_TEAM']]
df_merge.iloc[0][['AWAY_TEAM']]

AWAY_TEAM    FC Schalke 04
Name: 0, dtype: object

In [12]:
Mapping = {
    '1.FC Kaiserslautern': "Lautern.csv", 
    '1.FC Nürnberg':'Nuernberg.csv', 
    '1.FSV Mainz 05':'Mainz.csv', 
    'Arminia Bielefeld':'Bielefeld.csv', 
    'Bayer 04 Leverkusen':'Leverkusen.csv',
    'Bayern München':'Bayern.csv',
    'Borussia Dortmund':'Dortmund.csv', 
    'Borussia Mönchengladbach':"Gladbach.csv", 
    'FC Schalke 04':'Schalke.csv', 
    'Hamburger SV':'Hamburg.csv', 
    'Hannover 96':'Hannover.csv', 
    'Hansa Rostock':'Rostock.csv', 
    'Hertha BSC':'Hertha.csv', 
    'SC Freiburg':'Freiburg.csv', 
    'VfB Stuttgart':'Stuttgart.csv', 
    'VfL Bochum': 'Bochum.csv', 
    'VfL Wolfsburg':'Wolfsburg.csv', 
    'Werder Bremen':'Werder.csv', 
    '1.FC Köln':'Koeln.csv', 
    'Eintracht Frankfurt':'Frankfurt.csv', 
    'MSV Duisburg':'Duisburg.csv', 
    'Alemannia Aachen':'Aachen.csv', 
    'Energie Cottbus':'Cottbus.csv', 
    'Karlsruher SC':'Karlsruhe.csv', 
    'TSG Hoffenheim':'Hoffenheim.csv', 
    'FC St. Pauli':'Pauli.csv', 
    'FC Augsburg':'Augsburg.csv', 
    'Fortuna Düsseldorf':'Duesseldorf.csv', 
    'Greuther Fürth':'Fuerth.csv', 
    'Eintracht Braunschweing':'Braunschweig.csv', 
    'SC Paderborn':'Paderborn.csv', 
    'Darmstadt 98':'Darmstadt.csv', 
    'FC Ingolstadt':'Ingolstadt.csv', 
    'RasenBallsport Leipzig':'RB Leipzig.csv', 
    'Union Berlin':'Union Berlin.csv'
}

In [13]:
# check mappping for complete merged dataframe
missing_mappings = set()

for index, row in df_merge.iterrows():
    home_team = row['HOME_TEAM']
    away_team = row['AWAY_TEAM']

    if home_team not in Mapping:
        missing_mappings.add(f"Für {home_team} fehlt ein Mapping in der Spalte HOME_TEAM.")
    if away_team not in Mapping:
        missing_mappings.add(f"Für {away_team} fehlt ein Mapping in der Spalte AWAY_TEAM.")

if missing_mappings:
    for msg in missing_mappings:
        print("Warnung:", msg)
else:
    print("Alle Werte in den Spalten HOME_TEAM und AWAY_TEAM haben ein Mapping.")

Alle Werte in den Spalten HOME_TEAM und AWAY_TEAM haben ein Mapping.


In [14]:
# check all csv files
missing_files = set()

existing_files = set(os.listdir(PATH_ELO_CLUBS))

# Überprüfen Sie alle Werte in den Spalten 'HOME_TEAM' und 'AWAY_TEAM'
for index, row in df_merge.iterrows():
    home_team = row['HOME_TEAM']
    away_team = row['AWAY_TEAM']

    # Überprüfen, ob die Teams im mapping-Dictionary vorhanden sind
    if home_team in Mapping:
        if Mapping[home_team] not in existing_files:
            missing_files.add(f"Datei {Mapping[home_team]} für {home_team} fehlt.")
    else:
        missing_files.add(f"Mapping für {home_team} fehlt.")

    if away_team in Mapping:
        if Mapping[away_team] not in existing_files:
            missing_files.add(f"Datei {Mapping[away_team]} für {away_team} fehlt.")
    else:
        missing_files.add(f"Mapping für {away_team} fehlt.")

# Ausgabe der fehlenden Dateien oder Mappings
if missing_files:
    for msg in missing_files:
        print("Warnung:", msg)
else:
    print("Alle CSV-Dateien sind vorhanden.")

Alle CSV-Dateien sind vorhanden.


In [15]:
def elo_value(path, match_date, home_team, away_team):
    
    home_team_path = f'{path}/{Mapping[home_team]}'
    away_team_path = f'{path}/{Mapping[away_team]}'
    match_date = pd.to_datetime(match_date)
    
    df_home = pd.read_csv(home_team_path)
    df_away = pd.read_csv(away_team_path)
    
    df_home['From'] = pd.to_datetime(df_home['From'])
    df_home['To'] = pd.to_datetime(df_home['To'])
    df_away['From'] = pd.to_datetime(df_away['From'])
    df_away['To'] = pd.to_datetime(df_away['To'])
    
    home = df_home[(df_home['From'] <= match_date) & (df_home['To'] >= match_date)]
    away = df_away[(df_away['From'] <= match_date) & (df_away['To'] >= match_date)]
    
    closest_home_elo = 'N/A'
    closest_away_elo = 'N/A'
    
    if not home.empty:
        closest_home_elo = home.iloc[0]['Elo']
    else:
        closest_home = df_home[df_home['From'] <= match_date].sort_values('To', ascending=False)
        if not closest_home.empty:
            closest_home_elo = closest_home.iloc[0]['Elo']
    
    if not away.empty:
        closest_away_elo = away.iloc[0]['Elo']
    else:
        closest_away = df_away[df_away['From'] <= match_date].sort_values('To', ascending=False)
        if not closest_away.empty:
            closest_away_elo = closest_away.iloc[0]['Elo']

    return {
        'home': closest_home_elo,
        'away': closest_away_elo,
    }


In [16]:
Not_available = ['FC St. Pauli', 'RasenBallsport Leipzig','Union Berlin']

In [17]:
def elo_rating(path, match_date, home_team, away_team):
    '''
    Der Elo-Wert ist aus der Perspektive der Heimmanschaft
    0 : sichere Niederlage
    1 : sicherer Sieg
    '''
    elo = elo_value(path,match_date, home_team, away_team)
    if elo['home'] != 'N/A' and elo ['away'] != 'N/A':
        dr = 100 + elo['home'] - elo['away']
        w_e = 1 / (pow(10, -dr / 400) + 1)
        return w_e
    else:
        return None

In [19]:
for index, row in df_merge.iterrows():
    if row['HOME_TEAM'] in Not_available or row['AWAY_TEAM'] in Not_available:
        continue
    elo = elo_rating(PATH_ELO_CLUBS,row['DATE'],row['HOME_TEAM'],row['AWAY_TEAM'])
    if elo is None:
        raise Exception("Kein Elo wert verfügbar")
    else:
        pass