# Reading Google Sheets File That Contain Match Infos

In [6]:
import pandas as pd

# UEFA Google Sheets URL
csv_url = "https://docs.google.com/spreadsheets/d/1WfEG-1icUjj6k7TGePJQEXH-w0TLEIcN/export?format=csv"

# Read the Google Sheet as a DataFrame
df = pd.read_csv(csv_url, dtype={'FTHG': 'Int64', 'FTAG': 'Int64', 'HTHG': 'Int64', 'HTAG': 'Int64'})
df.head()

Unnamed: 0,Date,League,Home,Away,FTHG,FTAG,HTHG,HTAG
0,9/17/2024,UCL,Juventus,PSV,3,1,2,0
1,9/17/2024,UCL,Young Boys,Aston Villa,0,3,0,2
2,9/17/2024,UCL,Bayern,Dinamo Zagreb,9,2,3,0
3,9/17/2024,UCL,Milan,Liverpool,1,3,1,2
4,9/17/2024,UCL,Real Madrid,Stuttgart,3,1,0,0


# Separating Previous and Future Matches and Tournaments

In [7]:
# Separate rows with NA values
next_matches = df[df.isna().any(axis=1)]
next_leagues = next_matches['League'].unique().tolist()

# Separate rows without NA values
previous_matches = df[~df.isna().any(axis=1)]

next_matches.head()

Unnamed: 0,Date,League,Home,Away,FTHG,FTAG,HTHG,HTAG
373,28/11/2024,UEL,Slavia Praha,Fenerbahce,,,,
374,28/11/2024,UEL,AZ Alkmaar,Galatasaray,,,,
375,28/11/2024,UEL,Athletic Bilbao,Elfsborg,,,,
376,28/11/2024,UEL,Besiktas,Maccabi Tel-Aviv,,,,
377,28/11/2024,UEL,Dynamo Kyiv,Viktoria Plzen,,,,


# Calculating Functions Needed For Dixon-Coles Model

In [8]:
from scipy.optimize import minimize
from scipy.stats import poisson
import numpy as np

def rho_correction(x, y, lambda_x, mu_y, rho):
    if x==0 and y==0:
        return 1- (lambda_x * mu_y * rho)
    elif x==0 and y==1:
        return 1 + (lambda_x * rho)
    elif x==1 and y==0:
        return 1 + (mu_y * rho)
    elif x==1 and y==1:
        return 1 - rho
    else:
        return 1.0

def dc_log_like(x, y, alpha_x, beta_x, alpha_y, beta_y, rho, gamma):
    lambda_x, mu_y = np.exp(alpha_x + beta_y + gamma), np.exp(alpha_y + beta_x) 
    return (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) + 
            np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))



def solve_parameters(dataset, half_or_full = 'full', debug = False, init_vals=None, options={'disp': True, 'maxiter':100},
                     constraints = [{'type':'eq', 'fun': lambda x: sum(x[:20])-20}] , **kwargs):
    teams = np.sort(dataset['Home'].unique())
    # check for no weirdness in dataset
    away_teams = np.sort(dataset['Away'].unique())
    if not np.array_equal(teams, away_teams):
        raise ValueError("Something's not right")
    n_teams = len(teams)
    if init_vals is None:
        # random initialisation of model parameters
        init_vals = np.concatenate((np.random.uniform(0,1,(n_teams)), # attack strength
                                      np.random.uniform(0,-1,(n_teams)), # defence strength
                                      np.array([0, 1.0]) # rho (score correction), gamma (home advantage)
                                     ))

    def estimate_paramters(params):
        score_coefs = dict(zip(teams, params[:n_teams]))
        defend_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
        rho, gamma = params[-2:]
        if half_or_full == 'full':
            log_like = [dc_log_like(row.FTHG, row.FTAG, score_coefs[row.Home], defend_coefs[row.Home],
                        score_coefs[row.Away], defend_coefs[row.Away], rho, gamma) for row in dataset.itertuples()]
        elif half_or_full == 'half':
            log_like = [dc_log_like(row.HTHG, row.HTAG, score_coefs[row.Home], defend_coefs[row.Home],
                        score_coefs[row.Away], defend_coefs[row.Away], rho, gamma) for row in dataset.itertuples()]

        return -sum(log_like)
    opt_output = minimize(estimate_paramters, init_vals, options=options, constraints = constraints, **kwargs)
    if debug:
        # sort of hacky way to investigate the output of the optimisation process
        return opt_output
    else:
        return dict(zip(["attack_"+team for team in teams] + 
                        ["defence_"+team for team in teams] +
                        ['rho', 'home_adv'],
                        opt_output.x))

# Calculating Lambda Values for Dixon-Coles Model

In [9]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
stats_df = pd.DataFrame()
full_time_models = []
half_time_models = []

for league in next_leagues:
    league_df = previous_matches[previous_matches['League'] == league]
    
    full_time_estimates = solve_parameters(league_df, half_or_full = 'full')
    full_time_models.append(full_time_estimates)

    half_time_estimates = solve_parameters(league_df, half_or_full = 'half')
    half_time_models.append(half_time_estimates)

  np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))
  return (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) +


Optimization terminated successfully    (Exit mode 0)
            Current function value: 169.63393028671206
            Iterations: 54
            Function evaluations: 4105
            Gradient evaluations: 54
Optimization terminated successfully    (Exit mode 0)
            Current function value: 104.74880450967626
            Iterations: 56
            Function evaluations: 4221
            Gradient evaluations: 56
Optimization terminated successfully    (Exit mode 0)
            Current function value: 121.0770933270585
            Iterations: 64
            Function evaluations: 4851
            Gradient evaluations: 64
Iteration limit reached    (Exit mode 9)
            Current function value: 23.916816803795673
            Iterations: 100
            Function evaluations: 7525
            Gradient evaluations: 100


# Calculating Probability Matrices for HT/FT

In [10]:
#First Function needs work to make it more understandable and a df rather than matrix!
def dixon_coles_simulate_match(params_dict, homeTeam, awayTeam, max_goals=10):
    team_avgs = [np.exp(params_dict['attack_'+homeTeam] + params_dict['defence_'+awayTeam] + params_dict['home_adv']),
                 np.exp(params_dict['defence_'+homeTeam] + params_dict['attack_'+awayTeam])]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in team_avgs]
    output_matrix = np.outer(np.array(team_pred[0]), np.array(team_pred[1]))
    correction_matrix = np.array([[rho_correction(home_goals, away_goals, team_avgs[0],
                                                   team_avgs[1], params_dict['rho']) for away_goals in range(2)]
                                   for home_goals in range(2)])
    output_matrix[:2,:2] = output_matrix[:2,:2] * correction_matrix
    return output_matrix

full_time_matrices = []
half_time_matrices = []

for i in range(len(next_matches)):
    my_league = next_matches['League'].iloc[i]
    league_index = next_leagues.index(my_league)
    ft_match_score_matrix = dixon_coles_simulate_match(full_time_models[league_index], 
                                                       next_matches['Home'].iloc[i], next_matches['Away'].iloc[i], max_goals = 8)
    ht_match_score_matrix = dixon_coles_simulate_match(half_time_models[league_index], 
                                                       next_matches['Home'].iloc[i], next_matches['Away'].iloc[i], max_goals = 4)
    full_time_matrices.append(ft_match_score_matrix)
    half_time_matrices.append(ht_match_score_matrix)

# Calculating Probabilities of Dixon-Coles Model

In [11]:
ft1, ftx, ft2, ft_score = [], [], [], []
over_15, over_25, under_35, under_45, btts = [], [], [], [], []
ht1, htx, ht2, ht_score, ht_over05, ht_under15 = [], [], [], [], [], []
ho05, ao05, ho15, ao15, hu25, au25 = [], [], [], [], [], []

# Helper function to calculate total goals for each score
def total_goals(i, j):
    return i + j

for i in range(len(next_matches)):
    my_matrix = full_time_matrices[i]
    ht_matrix = half_time_matrices[i]

    ft1.append(round(np.sum(np.tril(my_matrix, k=-1)) * 100, 2)) # Sum of lower triangular values (home win)
    ftx.append(round(np.sum(np.diag(my_matrix)) * 100, 2)) # Sum of diagonal values (draw)
    ft2.append(round(np.sum(np.triu(my_matrix, k=1)) * 100, 2)) # Sum of higher triangular values (away_win)
    
    max_score = np.unravel_index(np.argmax(my_matrix), my_matrix.shape) # Find the index of the maximum score
    home_goals, away_goals = max_score
    ft_score.append(f"{home_goals}-{away_goals}") # Format the score as 'home-away'

    # Calculate the probabilities
    over_15.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) > 1.5]) * 100, 2))
    over_25.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) > 2.5]) * 100, 2))
    under_35.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) <= 3.5]) * 100, 2))
    under_45.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) <= 4.5]) * 100, 2))

    # Calculate BTTS (both teams to score and goals != 0)
    btts.append(round(np.sum([my_matrix[i, j] for i in range(1, my_matrix.shape[0]) for j in range(1, my_matrix.shape[1])]) * 100, 2)) 

    # Calculate statistics for Half Time
    ht1.append(round(np.sum(np.tril(ht_matrix, k=-1)) * 100, 2)) # Sum of lower triangular values (home win)
    htx.append(round(np.sum(np.diag(ht_matrix)) * 100, 2)) # Sum of diagonal values (draw)
    ht2.append(round(np.sum(np.triu(ht_matrix, k=1)) * 100, 2)) # Sum of higher triangular values (away_win)

    ht_max_score = np.unravel_index(np.argmax(ht_matrix), ht_matrix.shape) # Find the index of the maximum score
    ht_hogs, ht_awgs = ht_max_score
    ht_score.append(f"{ht_hogs}-{ht_awgs}") # Format the score as 'home-away'

    ht_over05.append(round(np.sum([ht_matrix[i, j] for i in range(ht_matrix.shape[0]) for j in range(ht_matrix.shape[1]) if total_goals(i, j) > 0.5]) * 100, 2))   
    ht_under15.append(round(np.sum([ht_matrix[i, j] for i in range(ht_matrix.shape[0]) for j in range(ht_matrix.shape[1]) if total_goals(i, j) < 1.5]) * 100, 2)) 

    ho05.append(round(np.sum(my_matrix[1:,:]) * 100, 2))
    ao05.append(round(np.sum(my_matrix[:,1:]) * 100, 2))
    ho15.append(round(np.sum(my_matrix[2:,:]) * 100, 2))
    ao15.append(round(np.sum(my_matrix[:,2:]) * 100, 2))
    hu25.append(round(np.sum(my_matrix[:3,:]) * 100, 2))
    au25.append(round(np.sum(my_matrix[:,:3]) * 100, 2))
    

# Combine lists into a DataFrame
final_results = pd.DataFrame({
    'League': next_matches['League'], 'Home': next_matches['Home'], 'Away': next_matches['Away'],
    'FT1': ft1, 'FTX': ftx, 'FT2': ft2, 'FTR': ft_score,
    'DC1X': [x + y for x, y in zip(ft1, ftx)], 'DC12': [x + y for x, y in zip(ft1, ft2)], 'DCX2': [x + y for x, y in zip(ftx, ft2)],
    '1.5O': over_15, '2.5O': over_25, '3.5U': under_35, '4.5U': under_45, 'BTTS': btts,
    'HT1': ht1, 'HTX': htx, 'HT2': ht2, 'HTR': ht_score,
    'HTDC1X': [x + y for x, y in zip(ht1, htx)], 'HTDC12': [x + y for x, y in zip(ht1, ht2)], 'HTDCX2': [x + y for x, y in zip(htx, ht2)],
    'HT0.5O': ht_over05, 'HT1.5U': ht_under15, 'H0.5O':ho05, 'A0.5O':ao05, 'H1.5O':ho15, 'A1.5O':ao15, 'H2.5U':hu25, 'A2.5U':au25
})

# Function to highlight values higher than threshold
def highlight_values(value):
    if isinstance(value, str):
        return ''  # Return empty string for NaN values
    elif value > 70:
    #color = 'red'
        return 'background-color: red'
    else:
        return ''

# Apply the style
with pd.option_context('display.precision', 2):
    styled_df = final_results.style.applymap(highlight_values)
styled_df.to_excel("UEFA.xlsx", index = False)
# Display the styled DataFrame
from IPython.display import display, HTML
display(styled_df)

  styled_df = final_results.style.applymap(highlight_values)


Unnamed: 0,League,Home,Away,FT1,FTX,FT2,FTR,DC1X,DC12,DCX2,1.5O,2.5O,3.5U,4.5U,BTTS,HT1,HTX,HT2,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O,HT1.5U,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U
373,UEL,Slavia Praha,Fenerbahce,71.26,22.8,5.92,2-0,94.06,77.18,28.72,69.63,41.6,78.99,91.08,31.92,41.84,48.9,9.2,0-0,90.74,51.04,58.1,56.64,79.22,85.14,35.5,56.82,7.22,70.17,98.97
374,UEL,AZ Alkmaar,Galatasaray,51.45,15.74,31.04,3-2,67.19,82.49,46.78,97.58,94.54,9.84,20.15,91.27,6.58,43.41,49.86,0-0,49.99,56.44,93.27,61.49,74.81,95.8,93.3,86.77,78.52,28.23,41.84
375,UEL,Athletic Bilbao,Elfsborg,79.01,15.99,4.85,2-0,95.0,83.86,20.84,83.72,61.32,60.83,78.52,42.14,21.32,31.22,46.61,0-1,52.54,67.93,77.83,83.88,42.74,92.36,44.12,72.94,11.62,52.07,97.71
376,UEL,Besiktas,Maccabi Tel-Aviv,91.01,6.35,1.16,3-0,97.36,92.17,7.51,91.18,77.34,39.78,59.23,34.93,0.0,74.98,25.02,0-0,74.98,25.02,100.0,25.02,96.57,96.17,35.25,87.31,7.23,27.8,97.5
377,UEL,Dynamo Kyiv,Viktoria Plzen,0.0,1.43,95.61,0-4,1.43,95.61,97.04,89.52,76.6,38.73,58.13,0.0,0.0,0.02,7.52,0-4,0.02,7.52,7.54,7.52,0.2,0.0,95.61,0.0,89.52,97.04,20.45
378,UEL,Lazio,Ludogorets,72.48,24.84,2.67,1-0,97.32,75.15,27.51,52.08,24.65,90.47,96.95,12.93,15.54,77.32,7.15,0-0,92.86,22.69,84.47,23.99,96.81,78.76,15.18,45.86,1.22,79.63,99.93
379,UEL,Qarabag,Lyon,4.04,11.38,83.9,0-3,15.42,87.94,95.28,91.23,75.81,42.96,62.5,50.64,5.14,20.78,70.76,0-1,25.92,75.9,91.54,83.43,39.36,51.67,95.58,16.67,83.29,95.51,36.24
380,UEL,RFS,PAOK,40.03,45.16,14.81,0-0,85.19,54.84,59.97,34.55,11.36,96.89,99.31,19.33,29.77,70.22,0.0,0-0,99.99,29.77,70.22,29.77,95.05,54.28,31.98,18.5,5.77,95.5,99.28
381,UEL,Anderlecht,Porto,67.93,15.61,15.63,3-1,83.54,83.56,31.24,96.27,87.74,24.75,41.84,79.61,65.18,17.78,9.27,2-0,82.96,74.45,27.05,86.53,20.98,95.82,81.64,84.43,51.26,34.07,74.24
382,UEL,Midtjylland,Frankfurt,15.54,35.69,48.76,0-0,51.23,64.3,84.45,55.85,26.63,89.31,96.44,33.48,15.77,61.04,23.19,0-0,76.81,38.96,84.23,43.76,88.37,44.29,69.98,11.7,33.86,97.83,87.88
