# Reading Google Sheets File That Contain Match Infos

In [1]:
import pandas as pd

# UEFA Google Sheets URL
csv_url = "https://docs.google.com/spreadsheets/d/1WfEG-1icUjj6k7TGePJQEXH-w0TLEIcN/export?format=csv"

# Read the Google Sheet as a DataFrame
df = pd.read_csv(csv_url, dtype={'FTHG': 'Int64', 'FTAG': 'Int64', 'HTHG': 'Int64', 'HTAG': 'Int64'})

# Convert 'date' column to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')

# Find the latest date for each league
latest_dates = df.groupby('League')['Date'].max().rename('latest_date')

# Merge with the original DataFrame
df = df.merge(latest_dates, on='League')

# Calculate the time difference in days
df['time_diff'] = (df['latest_date'] - df['Date']).dt.days
df.head()

Unnamed: 0,Date,League,Home,Away,FTHG,FTAG,HTHG,HTAG,latest_date,time_diff
0,2025-09-16,UCL,PSV,USG,1,3,0,2,2025-10-01,15
1,2025-09-16,UCL,Athletic Club,Arsenal,0,2,0,0,2025-10-01,15
2,2025-09-16,UCL,Benfica,Qarabag,2,3,2,1,2025-10-01,15
3,2025-09-16,UCL,Tottenham,Villarreal,1,0,1,0,2025-10-01,15
4,2025-09-16,UCL,Juventus,Dortmund,4,4,0,0,2025-10-01,15


# Separating Previous and Future Matches and Tournaments

In [3]:
# Separate rows with NA values
next_matches = df[df.isna().any(axis=1)]
next_leagues = next_matches['League'].unique().tolist()

# Separate rows without NA values
previous_matches = df[~df.isna().any(axis=1)]

next_matches.head()

Unnamed: 0,Date,League,Home,Away,FTHG,FTAG,HTHG,HTAG,latest_date,time_diff
672,2025-03-23,UNL,France,Croatia,,,,,2025-03-24,1
673,2025-03-23,UNL,Germany,Italy,,,,,2025-03-24,1
674,2025-03-23,UNL,Portugal,Denmark,,,,,2025-03-24,1
675,2025-03-23,UNL,Spain,Netherlands,,,,,2025-03-24,1
676,2025-03-23,UNL,Georgia,Armenia,,,,,2025-03-24,1


# Calculating Functions Needed For Dixon-Coles Model

In [4]:
from scipy.optimize import minimize
from scipy.stats import poisson
import numpy as np

def rho_correction(x, y, lambda_x, mu_y, rho):
    if x==0 and y==0:
        return 1- (lambda_x * mu_y * rho)
    elif x==0 and y==1:
        return 1 + (lambda_x * rho)
    elif x==1 and y==0:
        return 1 + (mu_y * rho)
    elif x==1 and y==1:
        return 1 - rho
    else:
        return 1.0

def dc_log_like(x, y, alpha_x, beta_x, alpha_y, beta_y, rho, gamma):
    lambda_x, mu_y = np.exp(alpha_x + beta_y + gamma), np.exp(alpha_y + beta_x) 
    return (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) + 
            np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))

def solve_parameters_decay(dataset, half_or_full = 'full', xi=0.001, debug = False, init_vals=None, 
                           options={'disp': True, 'maxiter':100},
                     constraints = [{'type':'eq', 'fun': lambda x: sum(x[:20])-20}] , **kwargs):
    teams = np.sort(dataset['Home'].unique())
    # check for no weirdness in dataset
    away_teams = np.sort(dataset['Away'].unique())
    if not np.array_equal(teams, away_teams):
        raise ValueError("Home Teams Not Equal To Away Teams")
    n_teams = len(teams)
    if init_vals is None:
        # random initialisation of model parameters
        init_vals = np.concatenate((np.random.uniform(0,1,(n_teams)), # attack strength
                                      np.random.uniform(0,-1,(n_teams)), # defence strength
                                      np.array([0,1.0]) # rho (score correction), gamma (home advantage)
                                     ))
        
    def dc_log_like_decay(x, y, alpha_x, beta_x, alpha_y, beta_y, rho, gamma, t, xi=xi):
        lambda_x, mu_y = np.exp(alpha_x + beta_y + gamma), np.exp(alpha_y + beta_x) 
        return  np.exp(-xi*t) * (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) + 
                                  np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))

    def estimate_paramters(params):
        score_coefs = dict(zip(teams, params[:n_teams]))
        defend_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
        rho, gamma = params[-2:]
        if half_or_full == 'full':
            log_like = [dc_log_like_decay(row.FTHG, row.FTAG, score_coefs[row.Home], defend_coefs[row.Home],
                                      score_coefs[row.Away], defend_coefs[row.Away], 
                                      rho, gamma, row.time_diff, xi=xi) for row in dataset.itertuples()]
        elif half_or_full == 'half':
            log_like = [dc_log_like_decay(row.HTHG, row.HTAG, score_coefs[row.Home], defend_coefs[row.Home],
                                      score_coefs[row.Away], defend_coefs[row.Away], 
                                      rho, gamma, row.time_diff, xi=xi) for row in dataset.itertuples()]
        return -sum(log_like)
    opt_output = minimize(estimate_paramters, init_vals, options=options, constraints = constraints)
    if debug:
        # sort of hacky way to investigate the output of the optimisation process
        return opt_output
    else:
        return dict(zip(["attack_"+team for team in teams] + 
                        ["defence_"+team for team in teams] +
                        ['rho', 'home_adv'],
                        opt_output.x))

# Calculating Lambda Values for Dixon-Coles Model

In [5]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
stats_df = pd.DataFrame()
full_time_models = []
half_time_models = []

for league in next_leagues:
    league_df = previous_matches[previous_matches['League'] == league]
    
    full_time_estimates = solve_parameters_decay(league_df, half_or_full = 'full')
    full_time_models.append(full_time_estimates)

    half_time_estimates = solve_parameters_decay(league_df, half_or_full = 'half')
    half_time_models.append(half_time_estimates)

  np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))
  return  np.exp(-xi*t) * (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) +


Optimization terminated successfully    (Exit mode 0)
            Current function value: 373.84881157054286
            Iterations: 82
            Function evaluations: 9191
            Gradient evaluations: 82
Optimization terminated successfully    (Exit mode 0)
            Current function value: 248.9154690105896
            Iterations: 91
            Function evaluations: 10158
            Gradient evaluations: 91


# Calculating Probability Matrices for HT/FT

In [6]:
#First Function needs work to make it more understandable and a df rather than matrix!
def dixon_coles_simulate_match(params_dict, homeTeam, awayTeam, max_goals=10):
    team_avgs = [np.exp(params_dict['attack_'+homeTeam] + params_dict['defence_'+awayTeam] + params_dict['home_adv']),
                 np.exp(params_dict['defence_'+homeTeam] + params_dict['attack_'+awayTeam])]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in team_avgs]
    output_matrix = np.outer(np.array(team_pred[0]), np.array(team_pred[1]))
    correction_matrix = np.array([[rho_correction(home_goals, away_goals, team_avgs[0],
                                                   team_avgs[1], params_dict['rho']) for away_goals in range(2)]
                                   for home_goals in range(2)])
    output_matrix[:2,:2] = output_matrix[:2,:2] * correction_matrix
    return output_matrix

full_time_matrices = []
half_time_matrices = []

for i in range(len(next_matches)):
    my_league = next_matches['League'].iloc[i]
    league_index = next_leagues.index(my_league)
    ft_match_score_matrix = dixon_coles_simulate_match(full_time_models[league_index], 
                                                       next_matches['Home'].iloc[i], next_matches['Away'].iloc[i], max_goals = 8)
    ht_match_score_matrix = dixon_coles_simulate_match(half_time_models[league_index], 
                                                       next_matches['Home'].iloc[i], next_matches['Away'].iloc[i], max_goals = 4)
    full_time_matrices.append(ft_match_score_matrix)
    half_time_matrices.append(ht_match_score_matrix)

# Calculating Probabilities of Dixon-Coles Model

In [7]:
ft1, ftx, ft2, ft_score = [], [], [], []
over_15, over_25, under_35, under_45, btts = [], [], [], [], []
ht1, htx, ht2, ht_score, ht_over05, ht_under15 = [], [], [], [], [], []
ho05, ao05, ho15, ao15, hu25, au25 = [], [], [], [], [], []

# Helper function to calculate total goals for each score
def total_goals(i, j):
    return i + j

for i in range(len(next_matches)):
    my_matrix = full_time_matrices[i]
    ht_matrix = half_time_matrices[i]

    ft1.append(round(np.sum(np.tril(my_matrix, k=-1)) * 100, 2)) # Sum of lower triangular values (home win)
    ftx.append(round(np.sum(np.diag(my_matrix)) * 100, 2)) # Sum of diagonal values (draw)
    ft2.append(round(np.sum(np.triu(my_matrix, k=1)) * 100, 2)) # Sum of higher triangular values (away_win)
    
    max_score = np.unravel_index(np.argmax(my_matrix), my_matrix.shape) # Find the index of the maximum score
    home_goals, away_goals = max_score
    ft_score.append(f"{home_goals}-{away_goals}") # Format the score as 'home-away'

    # Calculate the probabilities
    over_15.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) > 1.5]) * 100, 2))
    over_25.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) > 2.5]) * 100, 2))
    under_35.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) <= 3.5]) * 100, 2))
    under_45.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) <= 4.5]) * 100, 2))

    # Calculate BTTS (both teams to score and goals != 0)
    btts.append(round(np.sum([my_matrix[i, j] for i in range(1, my_matrix.shape[0]) for j in range(1, my_matrix.shape[1])]) * 100, 2)) 

    # Calculate statistics for Half Time
    ht1.append(round(np.sum(np.tril(ht_matrix, k=-1)) * 100, 2)) # Sum of lower triangular values (home win)
    htx.append(round(np.sum(np.diag(ht_matrix)) * 100, 2)) # Sum of diagonal values (draw)
    ht2.append(round(np.sum(np.triu(ht_matrix, k=1)) * 100, 2)) # Sum of higher triangular values (away_win)

    ht_max_score = np.unravel_index(np.argmax(ht_matrix), ht_matrix.shape) # Find the index of the maximum score
    ht_hogs, ht_awgs = ht_max_score
    ht_score.append(f"{ht_hogs}-{ht_awgs}") # Format the score as 'home-away'

    ht_over05.append(round(np.sum([ht_matrix[i, j] for i in range(ht_matrix.shape[0]) for j in range(ht_matrix.shape[1]) if total_goals(i, j) > 0.5]) * 100, 2))   
    ht_under15.append(round(np.sum([ht_matrix[i, j] for i in range(ht_matrix.shape[0]) for j in range(ht_matrix.shape[1]) if total_goals(i, j) < 1.5]) * 100, 2)) 

    ho05.append(round(np.sum(my_matrix[1:,:]) * 100, 2))
    ao05.append(round(np.sum(my_matrix[:,1:]) * 100, 2))
    ho15.append(round(np.sum(my_matrix[2:,:]) * 100, 2))
    ao15.append(round(np.sum(my_matrix[:,2:]) * 100, 2))
    hu25.append(round(np.sum(my_matrix[:3,:]) * 100, 2))
    au25.append(round(np.sum(my_matrix[:,:3]) * 100, 2))
    

# Combine lists into a DataFrame
final_results = pd.DataFrame({
    'League': next_matches['League'], 'Home': next_matches['Home'], 'Away': next_matches['Away'],
    'FT1': ft1, 'FTX': ftx, 'FT2': ft2, 'FTR': ft_score,
    'DC1X': [x + y for x, y in zip(ft1, ftx)], 'DC12': [x + y for x, y in zip(ft1, ft2)], 'DCX2': [x + y for x, y in zip(ftx, ft2)],
    '1.5O': over_15, '2.5O': over_25, '3.5U': under_35, '4.5U': under_45, 'BTTS': btts,
    'HT1': ht1, 'HTX': htx, 'HT2': ht2, 'HTR': ht_score,
    'HTDC1X': [x + y for x, y in zip(ht1, htx)], 'HTDC12': [x + y for x, y in zip(ht1, ht2)], 'HTDCX2': [x + y for x, y in zip(htx, ht2)],
    'HT0.5O': ht_over05, 'HT1.5U': ht_under15, 'H0.5O':ho05, 'A0.5O':ao05, 'H1.5O':ho15, 'A1.5O':ao15, 'H2.5U':hu25, 'A2.5U':au25
})

# Function to highlight values higher than threshold
def highlight_values(value):
    if isinstance(value, str):
        return ''  # Return empty string for NaN values
    elif value > 70:
    #color = 'red'
        return 'background-color: red'
    else:
        return ''

# Apply the style
with pd.option_context('display.precision', 2):
    styled_df = final_results.style.applymap(highlight_values)
styled_df.to_excel("UEFA.xlsx", index = False)
# Display the styled DataFrame
from IPython.display import display, HTML
display(styled_df)

  styled_df = final_results.style.applymap(highlight_values)


Unnamed: 0,League,Home,Away,FT1,FTX,FT2,FTR,DC1X,DC12,DCX2,1.5O,2.5O,3.5U,4.5U,BTTS,HT1,HTX,HT2,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O,HT1.5U,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U
672,UNL,France,Croatia,26.51,30.74,42.75,1-1,57.25,69.26,73.49,68.31,40.36,79.96,91.64,47.23,37.99,37.96,20.2,1-1,75.95,58.19,58.16,83.86,13.62,62.62,73.13,25.84,37.82,92.26,85.38
673,UNL,Germany,Italy,91.27,3.45,0.88,4-0,94.72,92.15,4.33,91.99,83.83,25.34,42.58,37.72,21.27,55.53,23.12,0-0,76.8,44.39,78.65,63.38,58.65,94.57,37.98,89.87,8.8,16.5,94.18
674,UNL,Portugal,Denmark,26.77,38.72,34.51,0-0,65.49,61.28,73.23,47.15,20.29,92.84,97.92,30.18,41.46,55.58,2.92,0-0,97.04,44.38,58.5,48.72,82.68,50.66,56.83,15.8,20.57,96.51,94.66
675,UNL,Spain,Netherlands,74.26,15.6,9.88,2-1,89.86,84.14,25.48,90.08,73.63,46.34,65.82,60.29,46.45,45.46,7.48,0-0,91.91,53.93,52.94,71.84,47.47,93.9,63.32,77.33,26.63,45.95,91.58
676,UNL,Georgia,Armenia,91.26,5.74,1.42,3-0,97.0,92.68,7.16,91.19,78.01,38.68,58.07,35.45,70.5,26.03,0.48,1-0,96.53,70.98,26.51,78.84,41.04,96.17,36.01,87.61,7.58,27.03,97.32
677,UNL,Hungary,Turkiye,29.83,32.08,38.09,1-1,61.91,67.92,70.17,65.56,37.23,82.3,92.95,45.34,51.47,45.62,2.68,0-0,97.09,54.15,48.3,61.18,70.01,63.54,69.04,26.75,32.74,91.8,88.54
678,UNL,Iceland,Kosovo,44.68,24.89,30.42,1-1,69.57,75.1,55.31,85.17,63.97,58.12,76.33,65.75,30.14,55.19,14.58,0-0,85.33,44.72,69.77,61.86,61.79,83.75,77.18,54.24,43.48,72.58,81.42
679,UNL,Scotland,Greece,53.86,31.76,14.38,1-0,85.62,68.24,46.14,55.93,27.83,88.59,96.11,31.58,54.73,43.59,1.44,0-0,98.32,56.17,45.03,60.44,72.83,72.5,41.86,37.0,10.33,85.92,98.22
680,UNL,Serbia,Austria,38.54,38.48,22.99,0-0,77.02,61.53,61.47,46.6,19.9,93.04,97.99,29.13,0.89,43.44,55.46,0-0,44.33,56.35,98.9,58.99,75.44,59.33,46.82,22.74,13.24,93.72,97.37
681,UNL,Slovenia,Slovakia,44.83,37.31,17.86,0-0,82.14,62.69,55.17,46.38,19.79,93.09,98.01,27.2,30.51,56.44,13.0,0-0,86.95,43.51,69.44,58.41,67.17,63.13,41.09,26.34,9.92,92.01,98.33
