# Predict on game results   

In [45]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score


In [46]:
df = pd.read_csv('../data/all_results.csv')
df

Unnamed: 0,Season,Spieltag,Datum,HeimTeam,GastTeam,HeimSieg,GastSieg,HeimTore,GastTore,ErsteLiga
0,1. Bundesliga - Saison 1963/1964,1. Spieltag,24.08.1963 15:30:00,1. FC Saarbrücken,1. FC Köln,False,True,0,2,True
1,1. Bundesliga - Saison 1963/1964,1. Spieltag,24.08.1963 15:30:00,Hertha BSC Berlin,1. FC Nürnberg,False,False,1,1,True
2,1. Bundesliga - Saison 1963/1964,1. Spieltag,24.08.1963 15:30:00,TSV 1860 München,Eintracht Braunschweig,False,False,1,1,True
3,1. Bundesliga - Saison 1963/1964,1. Spieltag,24.08.1963 15:30:00,Karlsruher SC,Meidericher SV,False,True,1,4,True
4,1. Bundesliga - Saison 1963/1964,1. Spieltag,24.08.1963 15:30:00,Preußen Münster,Hamburger SV,False,False,1,1,True
...,...,...,...,...,...,...,...,...,...,...
38221,2. Bundesliga SÜD - Saison 1974/1975,38. Spieltag,15.06.1975,Wormatia Worms,FSV Mainz 05,False,True,2,3,False
38222,2. Bundesliga SÜD - Saison 1974/1975,38. Spieltag,15.06.1975,1. FC Saarbrücken,Stuttgarter Kickers,False,False,1,1,False
38223,2. Bundesliga SÜD - Saison 1974/1975,38. Spieltag,15.06.1975,FC Schweinfurt 05,Chio Waldhof 07,True,False,2,0,False
38224,2. Bundesliga SÜD - Saison 1974/1975,38. Spieltag,15.06.1975,FC Homburg,SpVgg Bayreuth,True,False,1,0,False


In [47]:
import pandas as pd

# Assume df is your DataFrame

# 1. Average Home Win Rate
home_stats = df.groupby('HeimTeam').agg(
    home_matches=('HeimTeam', 'count'),
    home_wins=('HeimSieg', 'sum')  # Boolean True counts as 1
)
home_stats['avg_homeWinRate'] = home_stats['home_wins'] / home_stats['home_matches']

# 2. Average Away Win Rate
away_stats = df.groupby('GastTeam').agg(
    away_matches=('GastTeam', 'count'),
    away_wins=('GastSieg', 'sum')
)
away_stats['avg_awayWinRate'] = away_stats['away_wins'] / away_stats['away_matches']

# Merge home and away stats into one DataFrame
win_rates = pd.merge(home_stats[['avg_homeWinRate']], 
                     away_stats[['avg_awayWinRate']], 
                     left_index=True, right_index=True, how='outer').fillna(0)

# 3. Average Goals Per Season
# Calculate goals for matches where team is home
home_goals = df.groupby(['Season','HeimTeam'])['HeimTore'].sum().reset_index().rename(columns={'HeimTeam':'Team', 'HeimTore':'home_goals'})
# Calculate goals for matches where team is away
away_goals = df.groupby(['Season','GastTeam'])['GastTore'].sum().reset_index().rename(columns={'GastTeam':'Team', 'GastTore':'away_goals'})

# Merge home and away goals per season (fill missing values with 0)
season_goals = pd.merge(home_goals, away_goals, on=['Season','Team'], how='outer').fillna(0)
season_goals['total_goals'] = season_goals['home_goals'] + season_goals['away_goals']

# Then, compute the average goals per season for each team
avg_goals_season = season_goals.groupby('Team')['total_goals'].mean().reset_index().rename(columns={'total_goals':'avg_goals_per_season'})

# 4. Win Rate Against Other Teams (Head-to-Head)
# Create a record for each match from the perspective of both teams

# For home teams:
home_records = df[['HeimTeam', 'GastTeam', 'HeimSieg']].rename(
    columns={'HeimTeam': 'Team', 'GastTeam': 'Opponent', 'HeimSieg': 'Win'}
)
# For away teams:
away_records = df[['GastTeam', 'HeimTeam', 'GastSieg']].rename(
    columns={'GastTeam': 'Team', 'HeimTeam': 'Opponent', 'GastSieg': 'Win'}
)

# Combine both records
all_records = pd.concat([home_records, away_records], ignore_index=True)

# Group by team and opponent to calculate head-to-head statistics
head2head = all_records.groupby(['Team', 'Opponent']).agg(
    games=('Win', 'count'),
    wins=('Win', 'sum')
).reset_index()
head2head['winrate'] = head2head['wins'] / head2head['games']

# Example: Get win rate for a particular team against each opponent:
team_winrates = head2head[head2head['Team'] == '1. FC Saarbrücken']

# Combine all metrics into a summary DataFrame per team (if desired)
summary = win_rates.merge(avg_goals_season.set_index('Team'),
                          left_index=True, right_index=True, how='outer').fillna(0)

print("Team summary metrics:")
print(summary)

# head2head now contains the win rates for each team against every opponent
print("\nHead-to-head win rates:")
print(head2head.head())


Team summary metrics:
                       avg_homeWinRate  avg_awayWinRate  avg_goals_per_season
1. FC Bocholt                 0.600000         0.117647             81.500000
1. FC Heidenheim              0.441176         0.234375             45.500000
1. FC Heidenheim 1846         0.578431         0.297030             51.333333
1. FC Kaiserslautern          0.570838         0.218452             83.684211
1. FC Köln                    0.533333         0.267130             85.836066
...                                ...              ...                   ...
Werder Bremen                 0.564004         0.278107             89.065574
Westfalia Herne               0.605263         0.191176            101.750000
Wormatia Worms                0.573913         0.175439             83.500000
Wuppertaler SV                0.494681         0.151163             91.700000
Würzburger Kickers            0.250000         0.085714             49.000000

[171 rows x 3 columns]

Head-to-head win 

In [48]:
summary

Unnamed: 0,avg_homeWinRate,avg_awayWinRate,avg_goals_per_season
1. FC Bocholt,0.600000,0.117647,81.500000
1. FC Heidenheim,0.441176,0.234375,45.500000
1. FC Heidenheim 1846,0.578431,0.297030,51.333333
1. FC Kaiserslautern,0.570838,0.218452,83.684211
1. FC Köln,0.533333,0.267130,85.836066
...,...,...,...
Werder Bremen,0.564004,0.278107,89.065574
Westfalia Herne,0.605263,0.191176,101.750000
Wormatia Worms,0.573913,0.175439,83.500000
Wuppertaler SV,0.494681,0.151163,91.700000


In [49]:
#sort head2head by Team str contains Werder Bremen and Opponent str contains Bayern München
head2head[head2head['Team'].str.contains('Werder Bremen') & head2head['Opponent'].str.contains('Bayern München')]

Unnamed: 0,Team,Opponent,games,wins,winrate
7738,Werder Bremen,Bayern München,114,28,0.245614
