# Reading Google Sheets File That Contain Match Infos

In [8]:
import pandas as pd

# UEFA Google Sheets URL
csv_url = "https://docs.google.com/spreadsheets/d/1WfEG-1icUjj6k7TGePJQEXH-w0TLEIcN/export?format=csv"

# Read the Google Sheet as a DataFrame
df = pd.read_csv(csv_url, dtype={'FTHG': 'Int64', 'FTAG': 'Int64', 'HTHG': 'Int64', 'HTAG': 'Int64'})

# Convert 'date' column to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')

# Find the latest date for each league
latest_dates = df.groupby('League')['Date'].max().rename('latest_date')

# Merge with the original DataFrame
df = df.merge(latest_dates, on='League')

# Calculate the time difference in days
df['time_diff'] = (df['latest_date'] - df['Date']).dt.days
df.head()

Unnamed: 0,Date,League,Home,Away,FTHG,FTAG,HTHG,HTAG,latest_date,time_diff
0,2024-09-17,UCL,Juventus,PSV,3,1,2,0,2025-03-12,176
1,2024-09-17,UCL,Young Boys,Aston Villa,0,3,0,2,2025-03-12,176
2,2024-09-17,UCL,Bayern,Dinamo Zagreb,9,2,3,0,2025-03-12,176
3,2024-09-17,UCL,Milan,Liverpool,1,3,1,2,2025-03-12,176
4,2024-09-17,UCL,Real Madrid,Stuttgart,3,1,0,0,2025-03-12,176


# Separating Previous and Future Matches and Tournaments

In [9]:
# Separate rows with NA values
next_matches = df[df.isna().any(axis=1)]
next_leagues = next_matches['League'].unique().tolist()

# Separate rows without NA values
previous_matches = df[~df.isna().any(axis=1)]

next_matches.head()

Unnamed: 0,Date,League,Home,Away,FTHG,FTAG,HTHG,HTAG,latest_date,time_diff
624,2025-03-11,UCL,Barcelona,Benfica,,,,,2025-03-12,1
625,2025-03-11,UCL,Leverkusen,Bayern,,,,,2025-03-12,1
626,2025-03-11,UCL,Inter,Feyenoord,,,,,2025-03-12,1
627,2025-03-11,UCL,Liverpool,PSG,,,,,2025-03-12,1
628,2025-03-12,UCL,Lille,Dortmund,,,,,2025-03-12,0


# Calculating Functions Needed For Dixon-Coles Model

In [10]:
from scipy.optimize import minimize
from scipy.stats import poisson
import numpy as np

def rho_correction(x, y, lambda_x, mu_y, rho):
    if x==0 and y==0:
        return 1- (lambda_x * mu_y * rho)
    elif x==0 and y==1:
        return 1 + (lambda_x * rho)
    elif x==1 and y==0:
        return 1 + (mu_y * rho)
    elif x==1 and y==1:
        return 1 - rho
    else:
        return 1.0

def dc_log_like(x, y, alpha_x, beta_x, alpha_y, beta_y, rho, gamma):
    lambda_x, mu_y = np.exp(alpha_x + beta_y + gamma), np.exp(alpha_y + beta_x) 
    return (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) + 
            np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))

def solve_parameters_decay(dataset, half_or_full = 'full', xi=0.001, debug = False, init_vals=None, 
                           options={'disp': True, 'maxiter':100},
                     constraints = [{'type':'eq', 'fun': lambda x: sum(x[:20])-20}] , **kwargs):
    teams = np.sort(dataset['Home'].unique())
    # check for no weirdness in dataset
    away_teams = np.sort(dataset['Away'].unique())
    if not np.array_equal(teams, away_teams):
        raise ValueError("Home Teams Not Equal To Away Teams")
    n_teams = len(teams)
    if init_vals is None:
        # random initialisation of model parameters
        init_vals = np.concatenate((np.random.uniform(0,1,(n_teams)), # attack strength
                                      np.random.uniform(0,-1,(n_teams)), # defence strength
                                      np.array([0,1.0]) # rho (score correction), gamma (home advantage)
                                     ))
        
    def dc_log_like_decay(x, y, alpha_x, beta_x, alpha_y, beta_y, rho, gamma, t, xi=xi):
        lambda_x, mu_y = np.exp(alpha_x + beta_y + gamma), np.exp(alpha_y + beta_x) 
        return  np.exp(-xi*t) * (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) + 
                                  np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))

    def estimate_paramters(params):
        score_coefs = dict(zip(teams, params[:n_teams]))
        defend_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
        rho, gamma = params[-2:]
        if half_or_full == 'full':
            log_like = [dc_log_like_decay(row.FTHG, row.FTAG, score_coefs[row.Home], defend_coefs[row.Home],
                                      score_coefs[row.Away], defend_coefs[row.Away], 
                                      rho, gamma, row.time_diff, xi=xi) for row in dataset.itertuples()]
        elif half_or_full == 'half':
            log_like = [dc_log_like_decay(row.HTHG, row.HTAG, score_coefs[row.Home], defend_coefs[row.Home],
                                      score_coefs[row.Away], defend_coefs[row.Away], 
                                      rho, gamma, row.time_diff, xi=xi) for row in dataset.itertuples()]
        return -sum(log_like)
    opt_output = minimize(estimate_paramters, init_vals, options=options, constraints = constraints)
    if debug:
        # sort of hacky way to investigate the output of the optimisation process
        return opt_output
    else:
        return dict(zip(["attack_"+team for team in teams] + 
                        ["defence_"+team for team in teams] +
                        ['rho', 'home_adv'],
                        opt_output.x))

# Calculating Lambda Values for Dixon-Coles Model

In [11]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
stats_df = pd.DataFrame()
full_time_models = []
half_time_models = []

for league in next_leagues:
    league_df = previous_matches[previous_matches['League'] == league]
    
    full_time_estimates = solve_parameters_decay(league_df, half_or_full = 'full')
    full_time_models.append(full_time_estimates)

    half_time_estimates = solve_parameters_decay(league_df, half_or_full = 'half')
    half_time_models.append(half_time_estimates)

  np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))
  return  np.exp(-xi*t) * (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) +


Iteration limit reached    (Exit mode 9)
            Current function value: 431.583482744006
            Iterations: 100
            Function evaluations: 7635
            Gradient evaluations: 100
Optimization terminated successfully    (Exit mode 0)
            Current function value: 290.0164873616159
            Iterations: 58
            Function evaluations: 4412
            Gradient evaluations: 58
Iteration limit reached    (Exit mode 9)
            Current function value: 411.7144346357842
            Iterations: 100
            Function evaluations: 7656
            Gradient evaluations: 100
Iteration limit reached    (Exit mode 9)
            Current function value: 282.6966321827495
            Iterations: 100
            Function evaluations: 7772
            Gradient evaluations: 100
Iteration limit reached    (Exit mode 9)
            Current function value: 312.6181203791248
            Iterations: 100
            Function evaluations: 7674
            Gradient evaluat

# Calculating Probability Matrices for HT/FT

In [12]:
#First Function needs work to make it more understandable and a df rather than matrix!
def dixon_coles_simulate_match(params_dict, homeTeam, awayTeam, max_goals=10):
    team_avgs = [np.exp(params_dict['attack_'+homeTeam] + params_dict['defence_'+awayTeam] + params_dict['home_adv']),
                 np.exp(params_dict['defence_'+homeTeam] + params_dict['attack_'+awayTeam])]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in team_avgs]
    output_matrix = np.outer(np.array(team_pred[0]), np.array(team_pred[1]))
    correction_matrix = np.array([[rho_correction(home_goals, away_goals, team_avgs[0],
                                                   team_avgs[1], params_dict['rho']) for away_goals in range(2)]
                                   for home_goals in range(2)])
    output_matrix[:2,:2] = output_matrix[:2,:2] * correction_matrix
    return output_matrix

full_time_matrices = []
half_time_matrices = []

for i in range(len(next_matches)):
    my_league = next_matches['League'].iloc[i]
    league_index = next_leagues.index(my_league)
    ft_match_score_matrix = dixon_coles_simulate_match(full_time_models[league_index], 
                                                       next_matches['Home'].iloc[i], next_matches['Away'].iloc[i], max_goals = 8)
    ht_match_score_matrix = dixon_coles_simulate_match(half_time_models[league_index], 
                                                       next_matches['Home'].iloc[i], next_matches['Away'].iloc[i], max_goals = 4)
    full_time_matrices.append(ft_match_score_matrix)
    half_time_matrices.append(ht_match_score_matrix)

# Calculating Probabilities of Dixon-Coles Model

In [13]:
ft1, ftx, ft2, ft_score = [], [], [], []
over_15, over_25, under_35, under_45, btts = [], [], [], [], []
ht1, htx, ht2, ht_score, ht_over05, ht_under15 = [], [], [], [], [], []
ho05, ao05, ho15, ao15, hu25, au25 = [], [], [], [], [], []

# Helper function to calculate total goals for each score
def total_goals(i, j):
    return i + j

for i in range(len(next_matches)):
    my_matrix = full_time_matrices[i]
    ht_matrix = half_time_matrices[i]

    ft1.append(round(np.sum(np.tril(my_matrix, k=-1)) * 100, 2)) # Sum of lower triangular values (home win)
    ftx.append(round(np.sum(np.diag(my_matrix)) * 100, 2)) # Sum of diagonal values (draw)
    ft2.append(round(np.sum(np.triu(my_matrix, k=1)) * 100, 2)) # Sum of higher triangular values (away_win)
    
    max_score = np.unravel_index(np.argmax(my_matrix), my_matrix.shape) # Find the index of the maximum score
    home_goals, away_goals = max_score
    ft_score.append(f"{home_goals}-{away_goals}") # Format the score as 'home-away'

    # Calculate the probabilities
    over_15.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) > 1.5]) * 100, 2))
    over_25.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) > 2.5]) * 100, 2))
    under_35.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) <= 3.5]) * 100, 2))
    under_45.append(round(np.sum([my_matrix[i, j] for i in range(my_matrix.shape[0]) for j in range(my_matrix.shape[1]) if total_goals(i, j) <= 4.5]) * 100, 2))

    # Calculate BTTS (both teams to score and goals != 0)
    btts.append(round(np.sum([my_matrix[i, j] for i in range(1, my_matrix.shape[0]) for j in range(1, my_matrix.shape[1])]) * 100, 2)) 

    # Calculate statistics for Half Time
    ht1.append(round(np.sum(np.tril(ht_matrix, k=-1)) * 100, 2)) # Sum of lower triangular values (home win)
    htx.append(round(np.sum(np.diag(ht_matrix)) * 100, 2)) # Sum of diagonal values (draw)
    ht2.append(round(np.sum(np.triu(ht_matrix, k=1)) * 100, 2)) # Sum of higher triangular values (away_win)

    ht_max_score = np.unravel_index(np.argmax(ht_matrix), ht_matrix.shape) # Find the index of the maximum score
    ht_hogs, ht_awgs = ht_max_score
    ht_score.append(f"{ht_hogs}-{ht_awgs}") # Format the score as 'home-away'

    ht_over05.append(round(np.sum([ht_matrix[i, j] for i in range(ht_matrix.shape[0]) for j in range(ht_matrix.shape[1]) if total_goals(i, j) > 0.5]) * 100, 2))   
    ht_under15.append(round(np.sum([ht_matrix[i, j] for i in range(ht_matrix.shape[0]) for j in range(ht_matrix.shape[1]) if total_goals(i, j) < 1.5]) * 100, 2)) 

    ho05.append(round(np.sum(my_matrix[1:,:]) * 100, 2))
    ao05.append(round(np.sum(my_matrix[:,1:]) * 100, 2))
    ho15.append(round(np.sum(my_matrix[2:,:]) * 100, 2))
    ao15.append(round(np.sum(my_matrix[:,2:]) * 100, 2))
    hu25.append(round(np.sum(my_matrix[:3,:]) * 100, 2))
    au25.append(round(np.sum(my_matrix[:,:3]) * 100, 2))
    

# Combine lists into a DataFrame
final_results = pd.DataFrame({
    'League': next_matches['League'], 'Home': next_matches['Home'], 'Away': next_matches['Away'],
    'FT1': ft1, 'FTX': ftx, 'FT2': ft2, 'FTR': ft_score,
    'DC1X': [x + y for x, y in zip(ft1, ftx)], 'DC12': [x + y for x, y in zip(ft1, ft2)], 'DCX2': [x + y for x, y in zip(ftx, ft2)],
    '1.5O': over_15, '2.5O': over_25, '3.5U': under_35, '4.5U': under_45, 'BTTS': btts,
    'HT1': ht1, 'HTX': htx, 'HT2': ht2, 'HTR': ht_score,
    'HTDC1X': [x + y for x, y in zip(ht1, htx)], 'HTDC12': [x + y for x, y in zip(ht1, ht2)], 'HTDCX2': [x + y for x, y in zip(htx, ht2)],
    'HT0.5O': ht_over05, 'HT1.5U': ht_under15, 'H0.5O':ho05, 'A0.5O':ao05, 'H1.5O':ho15, 'A1.5O':ao15, 'H2.5U':hu25, 'A2.5U':au25
})

# Function to highlight values higher than threshold
def highlight_values(value):
    if isinstance(value, str):
        return ''  # Return empty string for NaN values
    elif value > 70:
    #color = 'red'
        return 'background-color: red'
    else:
        return ''

# Apply the style
with pd.option_context('display.precision', 2):
    styled_df = final_results.style.applymap(highlight_values)
styled_df.to_excel("UEFA.xlsx", index = False)
# Display the styled DataFrame
from IPython.display import display, HTML
display(styled_df)

  styled_df = final_results.style.applymap(highlight_values)


Unnamed: 0,League,Home,Away,FT1,FTX,FT2,FTR,DC1X,DC12,DCX2,1.5O,2.5O,3.5U,4.5U,BTTS,HT1,HTX,HT2,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O,HT1.5U,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U
624,UCL,Barcelona,Benfica,65.36,15.34,19.12,2-1,80.7,84.48,34.46,90.0,76.58,42.59,62.12,68.59,35.37,35.53,28.96,0-0,70.9,64.33,64.49,75.87,62.25,92.8,74.8,74.14,40.18,50.46,83.6
625,UCL,Leverkusen,Bayern,35.55,21.61,42.84,0-1,57.16,78.39,64.45,75.45,53.48,68.79,84.49,55.19,47.04,41.15,11.67,0-0,88.19,58.71,52.82,64.85,73.73,73.5,77.45,38.31,43.87,85.05,81.14
626,UCL,Inter,Feyenoord,83.84,14.18,1.96,1-0,98.02,85.8,16.14,61.82,35.22,83.72,93.71,8.59,67.95,31.38,0.0,1-0,99.33,67.95,31.38,67.95,67.75,86.42,10.39,59.32,0.56,67.72,99.96
627,UCL,Liverpool,PSG,59.07,23.51,17.42,1-0,82.58,76.49,40.93,59.3,33.93,84.63,94.19,33.38,32.35,45.48,22.14,0-0,77.83,54.49,67.62,62.19,76.9,76.44,45.85,42.38,12.63,82.24,97.55
628,UCL,Lille,Dortmund,48.93,18.6,32.43,2-1,67.53,81.36,51.03,87.63,72.34,48.27,67.66,69.84,5.37,35.56,58.71,0-1,40.93,64.08,94.27,67.81,69.54,87.55,81.11,61.66,49.66,65.31,76.53
629,UCL,Arsenal,PSV,90.41,2.59,1.28,4-0,93.0,91.69,3.87,91.24,84.92,21.12,37.05,41.63,67.08,8.15,4.58,3-0,75.23,71.66,12.73,77.88,12.61,93.47,42.18,89.6,11.28,14.02,92.16
630,UCL,Aston Villa,Club Brugge,67.36,17.53,15.09,1-0,84.89,82.45,32.62,75.04,52.48,69.71,85.13,45.31,20.69,35.52,43.6,0-0,56.21,64.29,79.12,74.54,63.59,86.42,54.07,59.34,18.34,67.71,95.54
631,UCL,Athletico Madrid,Real Madrid,35.69,19.02,45.27,1-2,54.71,80.96,64.29,86.93,71.11,49.81,69.09,69.27,49.88,28.63,20.85,1-0,78.51,70.73,49.48,83.09,50.51,82.08,85.89,51.3,58.29,75.16,68.73
632,UEL,Athletic Bilbao,Roma,47.15,25.81,27.04,1-0,72.96,74.19,52.85,70.76,45.32,76.0,89.27,49.1,29.25,49.84,20.87,0-0,79.09,50.12,70.71,60.99,72.84,76.69,64.37,42.75,27.6,81.97,91.37
633,UEL,Frankfurt,Ajax,56.3,21.91,21.77,1-1,78.21,78.07,43.68,79.44,57.06,65.33,81.98,55.96,23.64,54.26,22.08,0-0,77.9,45.72,76.34,54.62,79.04,84.84,66.21,56.25,29.56,70.71,90.32
