# Reading Google Sheets File That Contain Match Infos

In [1]:
import pandas as pd

# UEFA Google Sheets URL
csv_url = "https://docs.google.com/spreadsheets/d/1WfEG-1icUjj6k7TGePJQEXH-w0TLEIcN/export?format=csv"

# Read the Google Sheet as a DataFrame
df = pd.read_csv(csv_url, dtype={'FTHG': 'Int64', 'FTAG': 'Int64', 'HTHG': 'Int64', 'HTAG': 'Int64'})
df.head()

Unnamed: 0,Date,League,Home,Away,FTHG,FTAG,HTHG,HTAG
0,9/17/2024,UCL,Juventus,PSV,3,1,2,0
1,9/17/2024,UCL,Young Boys,Aston Villa,0,3,0,2
2,9/17/2024,UCL,Bayern,Dinamo Zagreb,9,2,3,0
3,9/17/2024,UCL,Milan,Liverpool,1,3,1,2
4,9/17/2024,UCL,Real Madrid,Stuttgart,3,1,0,0


# Separating Previous and Future Matches and Tournaments

In [2]:
# Separate rows with NA values
next_matches = df[df.isna().any(axis=1)]
next_leagues = next_matches['League'].unique().tolist()

# Separate rows without NA values
previous_matches = df[~df.isna().any(axis=1)]

next_matches.head()

Unnamed: 0,Date,League,Home,Away,FTHG,FTAG,HTHG,HTAG
408,10/12/2024,UCL,Dinamo Zagreb,Celtic,,,,
409,10/12/2024,UCL,Girona,Liverpool,,,,
410,10/12/2024,UCL,Atalanta,Real Madrid,,,,
411,10/12/2024,UCL,Leverkusen,Inter,,,,
412,10/12/2024,UCL,Club Brugge,Sporting,,,,


# Calculating Functions Needed For Dixon-Coles Model

In [3]:
from scipy.optimize import minimize
from scipy.stats import poisson
import numpy as np

def rho_correction(x, y, lambda_x, mu_y, rho):
    if x==0 and y==0:
        return 1- (lambda_x * mu_y * rho)
    elif x==0 and y==1:
        return 1 + (lambda_x * rho)
    elif x==1 and y==0:
        return 1 + (mu_y * rho)
    elif x==1 and y==1:
        return 1 - rho
    else:
        return 1.0

def dc_log_like(x, y, alpha_x, beta_x, alpha_y, beta_y, rho, gamma):
    lambda_x, mu_y = np.exp(alpha_x + beta_y + gamma), np.exp(alpha_y + beta_x) 
    return (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) + 
            np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))



def solve_parameters(dataset, half_or_full = 'full', debug = False, init_vals=None, options={'disp': True, 'maxiter':100},
                     constraints = [{'type':'eq', 'fun': lambda x: sum(x[:20])-20}] , **kwargs):
    teams = np.sort(dataset['Home'].unique())
    # check for no weirdness in dataset
    away_teams = np.sort(dataset['Away'].unique())
    if not np.array_equal(teams, away_teams):
        raise ValueError("Something's not right")
    n_teams = len(teams)
    if init_vals is None:
        # random initialisation of model parameters
        init_vals = np.concatenate((np.random.uniform(0,1,(n_teams)), # attack strength
                                      np.random.uniform(0,-1,(n_teams)), # defence strength
                                      np.array([0, 1.0]) # rho (score correction), gamma (home advantage)
                                     ))

    def estimate_paramters(params):
        score_coefs = dict(zip(teams, params[:n_teams]))
        defend_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
        rho, gamma = params[-2:]
        if half_or_full == 'full':
            log_like = [dc_log_like(row.FTHG, row.FTAG, score_coefs[row.Home], defend_coefs[row.Home],
                        score_coefs[row.Away], defend_coefs[row.Away], rho, gamma) for row in dataset.itertuples()]
        elif half_or_full == 'half':
            log_like = [dc_log_like(row.HTHG, row.HTAG, score_coefs[row.Home], defend_coefs[row.Home],
                        score_coefs[row.Away], defend_coefs[row.Away], rho, gamma) for row in dataset.itertuples()]

        return -sum(log_like)
    opt_output = minimize(estimate_paramters, init_vals, options=options, constraints = constraints, **kwargs)
    if debug:
        # sort of hacky way to investigate the output of the optimisation process
        return opt_output
    else:
        return dict(zip(["attack_"+team for team in teams] + 
                        ["defence_"+team for team in teams] +
                        ['rho', 'home_adv'],
                        opt_output.x))

# Calculating Lambda Values for Dixon-Coles Model

In [4]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
stats_df = pd.DataFrame()
full_time_models = []
half_time_models = []

for league in next_leagues:
    league_df = previous_matches[previous_matches['League'] == league]
    
    full_time_estimates = solve_parameters(league_df, half_or_full = 'full')
    full_time_models.append(full_time_estimates)

    half_time_estimates = solve_parameters(league_df, half_or_full = 'half')
    half_time_models.append(half_time_estimates)

  np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))
  return (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) +


Optimization terminated successfully    (Exit mode 0)
            Current function value: 229.20784974727704
            Iterations: 64
            Function evaluations: 4866
            Gradient evaluations: 64
Optimization terminated successfully    (Exit mode 0)
            Current function value: 154.10422524630445
            Iterations: 57
            Function evaluations: 4319
            Gradient evaluations: 57
Iteration limit reached    (Exit mode 9)
            Current function value: 224.09603444833354
            Iterations: 100
            Function evaluations: 7716
            Gradient evaluations: 100
Optimization terminated successfully    (Exit mode 0)
            Current function value: 149.3290389369651
            Iterations: 62
            Function evaluations: 4692
            Gradient evaluations: 62
Optimization terminated successfully    (Exit mode 0)
            Current function value: 167.41722771007124
            Iterations: 58
            Function evaluat

# Calculating Probability Matrices for HT/FT

In [5]:
#First Function needs work to make it more understandable and a df rather than matrix!
def dixon_coles_simulate_match(params_dict, homeTeam, awayTeam, max_goals=10):
    team_avgs = [np.exp(params_dict['attack_'+homeTeam] + params_dict['defence_'+awayTeam] + params_dict['home_adv']),
                 np.exp(params_dict['defence_'+homeTeam] + params_dict['attack_'+awayTeam])]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in team_avgs]
    output_matrix = np.outer(np.array(team_pred[0]), np.array(team_pred[1]))
    correction_matrix = np.array([[rho_correction(home_goals, away_goals, team_avgs[0],
                                                   team_avgs[1], params_dict['rho']) for away_goals in range(2)]
                                   for home_goals in range(2)])
    output_matrix[:2,:2] = output_matrix[:2,:2] * correction_matrix
    return output_matrix

full_time_matrices = []
half_time_matrices = []

for i in range(len(next_matches)):
    my_league = next_matches['League'].iloc[i]
    league_index = next_leagues.index(my_league)
    ft_match_score_matrix = dixon_coles_simulate_match(full_time_models[league_index], 
                                                       next_matches['Home'].iloc[i], next_matches['Away'].iloc[i], max_goals = 8)
    ht_match_score_matrix = dixon_coles_simulate_match(half_time_models[league_index], 
                                                       next_matches['Home'].iloc[i], next_matches['Away'].iloc[i], max_goals = 4)
    full_time_matrices.append(ft_match_score_matrix)
    half_time_matrices.append(ht_match_score_matrix)

# Calculating Probabilities of Dixon-Coles Model

In [6]:
ft1, ftx, ft2, ft_score = [], [], [], []
over_15, over_25, under_35, under_45, btts = [], [], [], [], []
ht1, htx, ht2, ht_score, ht_over05, ht_under15 = [], [], [], [], [], []
ho05, ao05, ho15, ao15, hu25, au25 = [], [], [], [], [], []

# Helper function to calculate total goals for each score
def total_goals(i, j):
    return i + j

for i in range(len(next_matches)):
    my_matrix = full_time_matrices[i]
    ht_matrix = half_time_matrices[i]

    ft1.append(round(np.sum(np.tril(my_matrix, k=-1)) * 100, 2)) # Sum of lower triangular values (home win)
    ftx.append(round(np.sum(np.diag(my_matrix)) * 100, 2)) # Sum of diagonal values (draw)
    ft2.append(round(np.sum(np.triu(my_matrix, k=1)) * 100, 2)) # Sum of higher triangular values (away_win)
    
    max_score = np.unravel_index(np.argmax(my_matrix), my_matrix.shape) # Find the index of the maximum score
    home_goals, away_goals = max_score
    ft_score.append(f"{home_goals}-{away_goals}") # Format the score as 'home-away'

    # Calculate the probabilities
    over_15.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) > 1.5]) * 100, 2))
    over_25.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) > 2.5]) * 100, 2))
    under_35.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) <= 3.5]) * 100, 2))
    under_45.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) <= 4.5]) * 100, 2))

    # Calculate BTTS (both teams to score and goals != 0)
    btts.append(round(np.sum([my_matrix[i, j] for i in range(1, my_matrix.shape[0]) for j in range(1, my_matrix.shape[1])]) * 100, 2)) 

    # Calculate statistics for Half Time
    ht1.append(round(np.sum(np.tril(ht_matrix, k=-1)) * 100, 2)) # Sum of lower triangular values (home win)
    htx.append(round(np.sum(np.diag(ht_matrix)) * 100, 2)) # Sum of diagonal values (draw)
    ht2.append(round(np.sum(np.triu(ht_matrix, k=1)) * 100, 2)) # Sum of higher triangular values (away_win)

    ht_max_score = np.unravel_index(np.argmax(ht_matrix), ht_matrix.shape) # Find the index of the maximum score
    ht_hogs, ht_awgs = ht_max_score
    ht_score.append(f"{ht_hogs}-{ht_awgs}") # Format the score as 'home-away'

    ht_over05.append(round(np.sum([ht_matrix[i, j] for i in range(ht_matrix.shape[0]) for j in range(ht_matrix.shape[1]) if total_goals(i, j) > 0.5]) * 100, 2))   
    ht_under15.append(round(np.sum([ht_matrix[i, j] for i in range(ht_matrix.shape[0]) for j in range(ht_matrix.shape[1]) if total_goals(i, j) < 1.5]) * 100, 2)) 

    ho05.append(round(np.sum(my_matrix[1:,:]) * 100, 2))
    ao05.append(round(np.sum(my_matrix[:,1:]) * 100, 2))
    ho15.append(round(np.sum(my_matrix[2:,:]) * 100, 2))
    ao15.append(round(np.sum(my_matrix[:,2:]) * 100, 2))
    hu25.append(round(np.sum(my_matrix[:3,:]) * 100, 2))
    au25.append(round(np.sum(my_matrix[:,:3]) * 100, 2))
    

# Combine lists into a DataFrame
final_results = pd.DataFrame({
    'League': next_matches['League'], 'Home': next_matches['Home'], 'Away': next_matches['Away'],
    'FT1': ft1, 'FTX': ftx, 'FT2': ft2, 'FTR': ft_score,
    'DC1X': [x + y for x, y in zip(ft1, ftx)], 'DC12': [x + y for x, y in zip(ft1, ft2)], 'DCX2': [x + y for x, y in zip(ftx, ft2)],
    '1.5O': over_15, '2.5O': over_25, '3.5U': under_35, '4.5U': under_45, 'BTTS': btts,
    'HT1': ht1, 'HTX': htx, 'HT2': ht2, 'HTR': ht_score,
    'HTDC1X': [x + y for x, y in zip(ht1, htx)], 'HTDC12': [x + y for x, y in zip(ht1, ht2)], 'HTDCX2': [x + y for x, y in zip(htx, ht2)],
    'HT0.5O': ht_over05, 'HT1.5U': ht_under15, 'H0.5O':ho05, 'A0.5O':ao05, 'H1.5O':ho15, 'A1.5O':ao15, 'H2.5U':hu25, 'A2.5U':au25
})

# Function to highlight values higher than threshold
def highlight_values(value):
    if isinstance(value, str):
        return ''  # Return empty string for NaN values
    elif value > 70:
    #color = 'red'
        return 'background-color: red'
    else:
        return ''

# Apply the style
with pd.option_context('display.precision', 2):
    styled_df = final_results.style.applymap(highlight_values)
styled_df.to_excel("UEFA.xlsx", index = False)
# Display the styled DataFrame
from IPython.display import display, HTML
display(styled_df)

  styled_df = final_results.style.applymap(highlight_values)


Unnamed: 0,League,Home,Away,FT1,FTX,FT2,FTR,DC1X,DC12,DCX2,1.5O,2.5O,3.5U,4.5U,BTTS,HT1,HTX,HT2,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O,HT1.5U,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U
408,UCL,Dinamo Zagreb,Celtic,33.03,18.06,48.83,1-2,51.09,81.86,66.89,91.73,79.73,38.35,57.72,76.3,38.34,25.09,35.98,1-0,63.43,74.32,61.07,87.85,46.62,85.25,90.33,57.12,67.86,69.79,58.39
409,UCL,Girona,Liverpool,0.21,1.88,95.32,0-4,2.09,95.53,97.2,89.71,76.72,39.07,58.49,7.5,2.48,26.82,69.65,0-1,29.3,72.13,96.47,73.91,61.07,7.69,95.82,0.31,89.24,97.4,21.79
410,UCL,Atalanta,Real Madrid,80.29,15.2,4.48,1-0,95.49,84.77,19.68,65.18,39.17,80.84,92.14,18.69,55.15,42.52,2.17,0-0,97.67,57.32,44.69,58.42,78.6,86.46,22.37,59.42,2.71,67.62,99.75
411,UCL,Leverkusen,Inter,0.0,63.69,36.31,0-0,63.69,36.31,100.0,7.58,1.1,99.88,99.99,0.0,0.0,95.21,4.79,0-0,95.21,4.79,100.0,4.79,99.88,0.0,36.31,0.0,7.58,100.0,98.9
412,UCL,Club Brugge,Sporting,22.03,18.97,58.98,0-1,41.0,81.01,77.95,80.63,60.4,61.92,79.38,57.1,53.73,36.64,9.4,0-0,90.37,63.13,46.04,67.85,71.48,67.42,86.49,30.89,59.47,89.58,67.58
413,UCL,Leipzig,Aston Villa,7.06,39.56,53.38,0-0,46.62,60.44,92.94,26.9,8.58,97.92,99.59,7.82,0.0,68.71,31.28,0-0,68.71,31.28,99.99,31.28,94.5,14.47,58.23,1.1,21.77,99.94,94.15
414,UCL,Salzburg,PSG,10.61,40.33,49.06,0-0,50.94,59.67,89.39,27.35,8.92,97.8,99.56,10.58,12.48,71.92,15.59,0-0,84.4,28.07,87.51,29.65,95.78,20.36,55.94,2.23,19.83,99.83,94.97
415,UCL,Shakhtar,Bayern,14.2,14.66,70.99,1-2,28.86,85.19,85.65,86.79,70.28,50.68,69.88,59.3,39.37,29.79,30.58,1-0,69.16,69.95,60.37,81.37,57.65,65.1,92.31,28.42,72.81,90.79,52.24
416,UCL,Brest,PSV,44.51,18.65,36.78,2-1,63.16,81.29,55.43,90.47,77.26,41.83,61.35,74.61,67.63,13.18,11.46,1-0,80.81,79.09,24.64,88.8,23.44,88.19,85.54,63.02,57.65,63.85,69.33
417,UCL,Athletico Madrid,Slovan,72.34,1.27,0.67,6-1,73.61,73.01,1.94,73.93,72.83,4.44,10.37,47.86,74.13,12.07,6.41,2-0,86.2,80.54,18.48,86.59,28.58,74.18,47.96,73.49,20.65,3.21,67.8
