### Import Libraries

In [1]:
import trueskill 
import pandas as pd
import numpy as np

### We will use the Monte Carlo method to predict the next step ahead

In [2]:
# Return +1 if player 1 wins and -1 if player 2 wins
data = pd.read_csv("SerieA.csv")
print(data.head())

   yyyy-mm-dd  HH:MM     team1     team2  score1  score2
0  18/08/2018  18:00    Chievo  Juventus       2       3
1  18/08/2018  20:30     Lazio    Napoli       1       2
2  19/08/2018  18:00    Torino      Roma       0       1
3  19/08/2018  20:30  Sassuolo     Inter       1       0
4  19/08/2018  20:30     Parma   Udinese       2       2


### Initialize variables

In [3]:
# Initialize TrueSkill environment with zero draw probability
env = trueskill.TrueSkill(draw_probability=0)

# Dictionary to store ratings for each team
ratings = {}


In [4]:
# Get the list of unique teams
teams = pd.unique(data[['team1', 'team2']].values.ravel())

print(teams)

['Chievo' 'Juventus' 'Lazio' 'Napoli' 'Torino' 'Roma' 'Sassuolo' 'Inter'
 'Parma' 'Udinese' 'Empoli' 'Cagliari' 'Bologna' 'Spal' 'Atalanta'
 'Frosinone' 'Milan' 'Sampdoria' 'Genoa' 'Fiorentina']


In [5]:
# Initialize each team's rating
for team in teams:
    ratings[team] = env.create_rating()

ratings # Each team starts with equal rating

{'Chievo': trueskill.Rating(mu=25.000, sigma=8.333),
 'Juventus': trueskill.Rating(mu=25.000, sigma=8.333),
 'Lazio': trueskill.Rating(mu=25.000, sigma=8.333),
 'Napoli': trueskill.Rating(mu=25.000, sigma=8.333),
 'Torino': trueskill.Rating(mu=25.000, sigma=8.333),
 'Roma': trueskill.Rating(mu=25.000, sigma=8.333),
 'Sassuolo': trueskill.Rating(mu=25.000, sigma=8.333),
 'Inter': trueskill.Rating(mu=25.000, sigma=8.333),
 'Parma': trueskill.Rating(mu=25.000, sigma=8.333),
 'Udinese': trueskill.Rating(mu=25.000, sigma=8.333),
 'Empoli': trueskill.Rating(mu=25.000, sigma=8.333),
 'Cagliari': trueskill.Rating(mu=25.000, sigma=8.333),
 'Bologna': trueskill.Rating(mu=25.000, sigma=8.333),
 'Spal': trueskill.Rating(mu=25.000, sigma=8.333),
 'Atalanta': trueskill.Rating(mu=25.000, sigma=8.333),
 'Frosinone': trueskill.Rating(mu=25.000, sigma=8.333),
 'Milan': trueskill.Rating(mu=25.000, sigma=8.333),
 'Sampdoria': trueskill.Rating(mu=25.000, sigma=8.333),
 'Genoa': trueskill.Rating(mu=25.000, 

### Monte-Carlo Simulation

In [6]:
def monte_carlo_win_probability(team1_rating, team2_rating, num_simulations=10000):
    """
    Estimates the probability that team1 wins against team2 using Monte Carlo simulation.

    Parameters:
    - team1_rating: TrueSkill rating object for team1.
    - team2_rating: TrueSkill rating object for team2.
    - num_simulations: Number of simulations to run.

    Returns:
    - Probability that team1 wins.
    """
    team1_wins = 0

    # Extract mu and sigma for both teams
    mu1, sigma1 = team1_rating.mu, team1_rating.sigma
    mu2, sigma2 = team2_rating.mu, team2_rating.sigma

    for _ in range(num_simulations):
        # Sample performance from normal distributions
        perf1 = np.random.normal(mu1, sigma1)
        perf2 = np.random.normal(mu2, sigma2)

        # Increment team1_wins if team1's performance is higher
        if perf1 > perf2:
            team1_wins += 1

    # Calculate win probability
    win_probability = team1_wins / num_simulations

    return win_probability


### Iterate Over the Matches and Make Predictions

In [7]:
# Lists to store results for analysis
predictions = []
actual_results = []

# Iterate over each match
for index, row in data.iterrows():
    # Extract teams and scores
    team1 = row['team1']
    team2 = row['team2']
    score1 = row['score1']
    score2 = row['score2']

    # Get current ratings
    team1_rating = ratings[team1]
    team2_rating = ratings[team2]

    # Step 1: Predict the outcome using Monte Carlo simulation
    win_prob = monte_carlo_win_probability(team1_rating, team2_rating)

    # Make a deterministic prediction based on win probability
    if win_prob > 0.5:
        prediction = +1  # Predict team1 wins
    else:
        prediction = -1  # Predict team2 wins

    # Append prediction
    predictions.append(prediction)

    # Step 2: Determine the actual outcome
    if score1 > score2:
        actual_result = +1  # Team 1 won
    elif score1 < score2:
        actual_result = -1  # Team 2 won
    else:
        # Match was a draw
        actual_result = 0   # We will exclude this match from analysis


    actual_results.append(actual_result)

    # Step 3: Update the TrueSkill ratings with the actual match result
    if actual_result == +1:
        # Team 1 won
        ratings[team1], ratings[team2] = env.rate_1vs1(team1_rating, team2_rating)
    elif actual_result == -1:
        # Team 2 won
        ratings[team2], ratings[team1] = env.rate_1vs1(team2_rating, team1_rating)


# Convert to numpy arrays
predictions = np.array(predictions)
actual_results = np.array(actual_results)

# Exclude matches that ended in a draw from accuracy calculation
non_draw_indices = actual_results != 0

# Calculate accuracy
accuracy = np.mean(predictions[non_draw_indices] == actual_results[non_draw_indices])

print(f'Prediction Accuracy (excluding draws): {accuracy * 100:.2f}%')


Prediction Accuracy (excluding draws): 65.44%
