In [32]:
import pandas as pd
from glicko2 import Player

# Load the Excel file
file_path = '/users/marclambertes/Downloads/Men Database/Netherlands - Eredivisie.xlsx'
df = pd.read_excel(file_path)
df = df[(df['Minutes played'] >= 900) & (df['Position'] != 'GK')]

# Define a custom class for Aerial Duel Teams
class AerialDuelTeam(Player):
    def __init__(self, team, height, aerial_duels_per_90, aerial_win_pct, mu=1500, phi=350, sigma=0.06):
        super().__init__(mu, phi, sigma)
        self.team = team
        self.height = height
        self.aerial_duels_per_90 = aerial_duels_per_90
        self.aerial_win_pct = aerial_win_pct
        self.weighted_performance = None

    def calculate_weighted_performance(self):
        """Calculate the weighted performance score."""
        self.weighted_performance = (0.5 * self.aerial_duels_per_90) + (1 * self.height) + (1.5 * self.aerial_win_pct)

    def match_outcome(self, opponent):
        """Calculate the match outcome based on weighted performance score."""
        win_margin = self.weighted_performance - opponent.weighted_performance
        if win_margin > 5:
            return 1  # Win
        elif win_margin < -5:
            return 0  # Loss
        else:
            return 0.5  # Draw

    def constrained_update_team(self, opponent_ratings, opponent_rds, outcomes):
        """Update the team rating while applying constraints to prevent overflow errors."""
        opponent_rds = [min(max(rd, 30), 350) for rd in opponent_rds]

        try:
            self.update_player(opponent_ratings, opponent_rds, outcomes)
        except OverflowError:
            print(f"Overflow error encountered for team {self.team}. Adjusting volatility and RD values.")
            self.vol = min(max(self.vol, 0.01), 1.2)
            self.rd = min(max(self.rd, 30), 350)
            self.update_player(opponent_ratings, opponent_rds, outcomes)

# Create Team Objects from the Excel Data
team_objects = {}
for _, row in df.iterrows():
    team_name = row['Team within selected timeframe']
    if team_name not in team_objects:
        team_objects[team_name] = AerialDuelTeam(
            team=team_name,
            height=row['Height'],
            aerial_duels_per_90=row['Aerial duels per 90'],
            aerial_win_pct=row['Aerial duels won, %']
        )
    else:
        team = team_objects[team_name]
        team.aerial_duels_per_90 += row['Aerial duels per 90']
        team.aerial_win_pct += row['Aerial duels won, %']

# Normalize values for each team and calculate weighted performance
for team in team_objects.values():
    team_players_df = df[df['Team within selected timeframe'] == team.team]
    num_players = team_players_df.shape[0]
    
    if num_players > 0:
        team.aerial_duels_per_90 /= num_players
        team.aerial_win_pct /= num_players
        team.calculate_weighted_performance()
    else:
        print(f"Warning: No players found for team {team.team}. Skipping normalization.")

# Simulate matches (e.g., round-robin format)
teams = list(team_objects.values())
for team in teams:
    opponents = [op for op in teams if op != team]
    outcomes = [team.match_outcome(op) for op in opponents]
    
    opponent_ratings = [op.rating for op in opponents]
    opponent_rds = [op.rd for op in opponents]
    
    weights = [op.aerial_duels_per_90 for op in opponents]
    
    if sum(weights) > 0:
        weights = [w / sum(weights) for w in weights]
    else:
        weights = [1] * len(weights)

    team.constrained_update_team(opponent_ratings, opponent_rds, outcomes)

# Prepare the results with averaged data
updated_data = []
for team in teams:
    updated_data.append({
        "Team": team.team,
        "Height": team.height,
        "Rating": team.rating,
        "Deviation": team.rd,
        "Volatility": team.vol,
        "Aerial Duels per 90": team.aerial_duels_per_90,
        "Aerial Duels Won %": team.aerial_win_pct,
        "Weighted Performance": team.weighted_performance
    })

# Convert the updated data into a DataFrame for easier handling
updated_df = pd.DataFrame(updated_data)

# Group by Team and calculate average Rating, Deviation, Volatility, and Weighted Performance
grouped_df = updated_df.groupby('Team').agg({
    'Height': 'mean',  # Average height across players
    'Aerial Duels per 90': 'mean',
    'Aerial Duels Won %': 'mean',
    'Rating': 'mean',
    'Deviation': 'mean',
    'Volatility': 'mean',
    'Weighted Performance': 'mean'
}).reset_index()

# Calculate the final weighted rating using the specified weights
grouped_df['Weighted Rating'] = (0.5 * grouped_df['Aerial Duels per 90']) + \
                                (1 * grouped_df['Height']) + \
                                (1.5 * grouped_df['Aerial Duels Won %'])

# Display the results
print(grouped_df)

# Save the updated ratings back to an Excel file
grouped_df.to_excel('grouped_team_ratings_with_weighted_rating.xlsx', index=False)


                Team  Height  Aerial Duels per 90  Aerial Duels Won %  \
0                 AZ   185.0             3.467692           39.318462   
1               Ajax   182.0             2.864375           46.645000   
2        Almere City   180.0             4.585000           41.648571   
3          Excelsior   184.0             2.947333           40.839333   
4          Feyenoord   188.0             2.858667           50.087333   
5    Fortuna Sittard   191.0             3.727333           46.664000   
6    Go Ahead Eagles   180.0             3.102857           39.985714   
7         Heerenveen   169.0             2.945385           42.038462   
8           Heracles   182.0             2.674444           39.308333   
9                NEC   174.0             3.032353           43.878235   
10        PEC Zwolle   191.0             3.100769           41.215385   
11               PSV   181.0             2.522857           46.730714   
12      RKC Waalwijk   185.0             2.878571  

In [22]:
import pandas as pd

# Load the updated ratings from the Excel file
file_path = 'grouped_team_ratings_with_weighted_rating.xlsx'
df_ratings = pd.read_excel(file_path)

# Function to calculate win probability between two ratings
def calculate_win_probability(rating_a, rating_b):
    """Calculate the win probability of Team A against Team B."""
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

# Select two players by name
player_a_name = 'PSV'  # Replace with actual player name from the Excel file
player_b_name = 'Ajax'  # Replace with actual player name from the Excel file

# Retrieve ratings for the selected players
player_a = df_ratings[df_ratings['Team'] == player_a_name]
player_b = df_ratings[df_ratings['Team'] == player_b_name]

if not player_a.empty and not player_b.empty:
    rating_a = player_a.iloc[0]['Weighted Rating']
    rating_b = player_b.iloc[0]['Weighted Rating']
    
    # Calculate win probabilities
    win_prob_a_vs_b = calculate_win_probability(rating_a, rating_b) * 100
    win_prob_b_vs_a = calculate_win_probability(rating_b, rating_a) * 100
    
    # Print win probabilities as percentages
    print(f"Win probability of {player_a_name} against {player_b_name}: {win_prob_a_vs_b:.2f}%")
    print(f"Win probability of {player_b_name} against {player_a_name}: {win_prob_b_vs_a:.2f}%")
else:
    print("One or both players not found in the Excel file.")


Win probability of PSV against Ajax: 49.85%
Win probability of Ajax against PSV: 50.15%


In [26]:
import pandas as pd
from math import log, sqrt, pi, exp

# Load the updated ratings from the Excel file
file_path = 'grouped_team_ratings_with_weighted_rating.xlsx'
df_ratings = pd.read_excel(file_path)

# Glicko Constants
q = log(10) / 400

# Function to calculate win probability between two ratings
def calculate_win_probability(rating_a, rating_b):
    """Calculate the win probability of Team A against Team B."""
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

# Function to calculate the updated ratings based on the Glicko2 system
def update_glicko_ratings(rating_a, rd_a, rating_b, rd_b, outcome_a):
    """Update ratings using the Glicko2 system after a match result."""
    
    # Convert RD (Rating Deviation) to variance
    rd_a_var = rd_a ** 2
    rd_b_var = rd_b ** 2
    
    # Calculate g(RD)
    g_rd_b = 1 / sqrt(1 + (3 * q ** 2 * rd_b_var) / (pi ** 2))
    
    # Calculate the expected score (E)
    expected_score_a = 1 / (1 + 10 ** (-g_rd_b * (rating_a - rating_b) / 400))
    
    # Calculate the rating update
    delta_a = q / ((1 / rd_a_var) + (1 / (g_rd_b ** 2 * rd_b_var))) * (outcome_a - expected_score_a)
    
    # Update ratings
    new_rating_a = rating_a + delta_a
    
    # Return the updated ratings (the opponent's rating remains unchanged here)
    return new_rating_a

# Select two teams by name
team_a_name = 'PSV'  # Replace with actual team name from the Excel file
team_b_name = 'Ajax'  # Replace with actual team name from the Excel file

# Retrieve ratings for the selected teams
team_a = df_ratings[df_ratings['Team'] == team_a_name]
team_b = df_ratings[df_ratings['Team'] == team_b_name]

if not team_a.empty and not team_b.empty:
    # Store old ratings
    old_rating_a = team_a.iloc[0]['Weighted Rating']
    old_rating_b = team_b.iloc[0]['Weighted Rating']
    rd_a = team_a.iloc[0]['Deviation']
    rd_b = team_b.iloc[0]['Deviation']
    
    # Calculate win probabilities
    win_prob_a_vs_b = calculate_win_probability(old_rating_a, old_rating_b) * 100
    win_prob_b_vs_a = calculate_win_probability(old_rating_b, old_rating_a) * 100
    
    # Print win probabilities as percentages
    print(f"Win probability of {team_a_name} against {team_b_name}: {win_prob_a_vs_b:.2f}%")
    print(f"Win probability of {team_b_name} against {team_a_name}: {win_prob_b_vs_a:.2f}%")
    
    # Assume the outcome: team_a (PSV) wins (outcome_a = 1)
    outcome_a = 1  # PSV wins
    
    # Update ratings after the match
    new_rating_a = update_glicko_ratings(old_rating_a, rd_a, old_rating_b, rd_b, outcome_a)
    
    # Display old and new ratings
    print(f"\nOld rating for {team_a_name}: {old_rating_a:.2f}")
    print(f"New rating for {team_a_name}: {new_rating_a:.2f}")
    print(f"\nOld rating for {team_b_name}: {old_rating_b:.2f} (unchanged since the focus is on team A)")
    
    # Update the DataFrame with the new ratings
    df_ratings.loc[df_ratings['Team'] == team_a_name, 'Weighted Rating'] = new_rating_a

    # Save the updated ratings back to the Excel file
    df_ratings.to_excel('grouped_team_ratings_with_new_ratings.xlsx', index=False)
else:
    print("One or both teams not found in the Excel file.")


Win probability of PSV against Ajax: 49.85%
Win probability of Ajax against PSV: 50.15%

Old rating for PSV: 252.36
New rating for PSV: 269.57

Old rating for Ajax: 253.40 (unchanged since the focus is on team A)


In [30]:
import pandas as pd
from math import log, sqrt, pi

# Load the updated ratings from the Excel file
file_path = 'grouped_team_ratings_with_weighted_rating.xlsx'
df_ratings = pd.read_excel(file_path)

# Glicko Constants
q = log(10) / 400

# Function to calculate win probability between two ratings
def calculate_win_probability(rating_a, rating_b):
    """Calculate the win probability of Team A against Team B."""
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

# Function to calculate the updated ratings based on the Glicko2 system
def update_glicko_ratings(rating_a, rd_a, rating_b, rd_b, outcome_a):
    """Update ratings using the Glicko2 system after a match result."""
    
    # Convert RD (Rating Deviation) to variance
    rd_a_var = rd_a ** 2
    rd_b_var = rd_b ** 2
    
    # Calculate g(RD)
    g_rd_b = 1 / sqrt(1 + (3 * q ** 2 * rd_b_var) / (pi ** 2))
    
    # Calculate the expected score (E)
    expected_score_a = 1 / (1 + 10 ** (-g_rd_b * (rating_a - rating_b) / 400))
    
    # Calculate the rating update
    delta_a = q / ((1 / rd_a_var) + (1 / (g_rd_b ** 2 * rd_b_var))) * (outcome_a - expected_score_a)
    
    # Update ratings
    new_rating_a = rating_a + delta_a
    
    # Return the updated ratings (the opponent's rating remains unchanged here)
    return new_rating_a

# Define match fixtures for a round (adjust team names based on your data)
fixtures = [
    ('PSV', 'Ajax'),
    ('Feyenoord', 'AZ'),
    ('Utrecht', 'Vitesse'),
    ('Heerenveen', 'Excelsior'),
    ('Twente', 'Sparta Rotterdam'),
    ('Willem II', 'Heracles'),
    ('Fortuna Sittard', 'RKC Waalwijk'),
    ('PEC Zwolle', 'Go Ahead Eagles'),
    ('NEC Nijmegen', 'Cambuur')
]

# Loop through each match in the round
for match in fixtures:
    team_a_name, team_b_name = match
    
    # Retrieve ratings for the selected teams
    team_a = df_ratings[df_ratings['Team'] == team_a_name]
    team_b = df_ratings[df_ratings['Team'] == team_b_name]
    
    if not team_a.empty and not team_b.empty:
        # Store old ratings
        old_rating_a = team_a.iloc[0]['Weighted Rating']
        old_rating_b = team_b.iloc[0]['Weighted Rating']
        rd_a = team_a.iloc[0]['Deviation']
        rd_b = team_b.iloc[0]['Deviation']
        
        # Assume the outcome (e.g., 1 means team_a wins, 0.5 is a draw, 0 means team_b wins)
        outcome_a = 1  # You can modify this based on actual results

        # Update ratings after the match
        new_rating_a = update_glicko_ratings(old_rating_a, rd_a, old_rating_b, rd_b, outcome_a)
        new_rating_b = update_glicko_ratings(old_rating_b, rd_b, old_rating_a, rd_a, 1 - outcome_a)
        
        # Display old and new ratings
        print(f"\nMatch: {team_a_name} vs {team_b_name}")
        print(f"Old rating for {team_a_name}: {old_rating_a:.2f}, New rating: {new_rating_a:.2f}")
        print(f"Old rating for {team_b_name}: {old_rating_b:.2f}, New rating: {new_rating_b:.2f}")
        
        # Update the DataFrame with the new ratings
        df_ratings.loc[df_ratings['Team'] == team_a_name, 'Weighted Rating'] = new_rating_a
        df_ratings.loc[df_ratings['Team'] == team_b_name, 'Weighted Rating'] = new_rating_b
    else:
        print(f"One or both teams not found in the Excel file for match: {team_a_name} vs {team_b_name}")

# Save the updated ratings back to the Excel file after all matches in the round
df_ratings.to_excel('grouped_team_ratings_with_new_ratings.xlsx', index=False)



Match: PSV vs Ajax
Old rating for PSV: 252.36, New rating: 269.57
Old rating for Ajax: 253.40, New rating: 236.19

Match: Feyenoord vs AZ
Old rating for Feyenoord: 264.56, New rating: 280.86
Old rating for AZ: 245.71, New rating: 229.41

Match: Utrecht vs Vitesse
Old rating for Utrecht: 254.99, New rating: 270.98
Old rating for Vitesse: 253.83, New rating: 237.84

Match: Heerenveen vs Excelsior
Old rating for Heerenveen: 233.53, New rating: 251.07
Old rating for Excelsior: 246.73, New rating: 229.19

Match: Twente vs Sparta Rotterdam
Old rating for Twente: 252.68, New rating: 267.81
Old rating for Sparta Rotterdam: 240.35, New rating: 225.23
One or both teams not found in the Excel file for match: Willem II vs Heracles

Match: Fortuna Sittard vs RKC Waalwijk
Old rating for Fortuna Sittard: 262.86, New rating: 278.33
Old rating for RKC Waalwijk: 254.13, New rating: 238.66

Match: PEC Zwolle vs Go Ahead Eagles
Old rating for PEC Zwolle: 254.37, New rating: 272.46
Old rating for Go Ahead