# [Building a Simple Football Prediction Model](https://medium.com/geekculture/building-a-simple-football-prediction-model-using-machine-learning-f061e607bec5)

and [How to Compute Football Implied Probabilities From Bookmakers Odds](https://octosport.medium.com/how-to-compute-football-implied-probabilities-from-bookmakers-odds-bbb33ccf7c1d)

In [1]:
import warnings
warnings.filterwarnings('ignore')

import sqlite3
import pandas as pd
from datetime import *
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from logistic_model import LogisticRegression, LogisticModel
from implied import ImpliedProbability
from sklearn.metrics import plot_confusion_matrix, accuracy_score

In [2]:
def connect_database(db):
    con = sqlite3.connect(db)
    cursor = con.cursor()
    return cursor, con


def leer_partidos(con):
    partidos = pd.read_sql_query('SELECT * FROM partidos', con) #, index_col='fecha')
    return partidos

cur, con = connect_database('../../Clasificacion.db')

partidos = leer_partidos(con)

In [3]:
partidos['timestamp'] = partidos['timestamp'].map(lambda X: datetime.fromtimestamp(int(X)).date())
partidos['fecha'] = partidos['timestamp']
partidos = partidos.sort_values(by='fecha')

In [4]:
partidos[(partidos['temporada']=='2021-22') & (partidos['jornada']==8)]

Unnamed: 0,id_partido,temporada,division,jornada,equipo_local,equipo_visitante,goles_local,goles_visitante,fecha,timestamp,...,visitante_puntos_antes,visitante_jugados_antes,visitante_ganados_antes,visitante_empatados_antes,visitante_perdidos_antes,visitante_goles_favor_antes,visitante_goles_contra_antes,visitante_racha_partidos,visitante_racha_gf,visitante_racha_gc
39750,39837,2021-22,2,8,Ponferradina,Valladolid,2,2,2021-10-01,2021-10-01,...,10,7,3,1,3,7,7,,,
39856,39742,2021-22,1,8,Atletico de Bilbao,Alaves,1,0,2021-10-01,2021-10-01,...,3,6,1,0,5,2,11,,,
39859,39745,2021-22,1,8,Cadiz,Valencia,0,0,2021-10-02,2021-10-02,...,11,7,3,2,2,12,8,,,
39754,39841,2021-22,2,8,Alcorcon,Real Sociedad-B,1,4,2021-10-02,2021-10-02,...,6,7,1,3,3,4,7,,,
39753,39840,2021-22,2,8,Zaragoza,Oviedo,0,0,2021-10-02,2021-10-02,...,10,7,2,4,1,8,6,,,
39752,39839,2021-22,2,8,Huesca,Tenerife,1,2,2021-10-02,2021-10-02,...,11,7,3,2,2,8,6,,,
39751,39838,2021-22,2,8,Amorebieta,Sporting de Gijon,1,1,2021-10-02,2021-10-02,...,16,7,5,1,1,11,7,,,
39857,39743,2021-22,1,8,Osasuna,Rayo Vallecano,1,0,2021-10-02,2021-10-02,...,13,7,4,1,2,13,7,,,
39860,39746,2021-22,1,8,Atletico de Madrid,Barcelona,2,0,2021-10-02,2021-10-02,...,12,6,3,3,0,11,5,,,
39858,39744,2021-22,1,8,Mallorca,Levante,1,0,2021-10-02,2021-10-02,...,4,7,0,4,3,6,12,,,


In [5]:
part_test = partidos
part_test = part_test[part_test['fecha'] >= date(2021, 10, 6)] 
part_test.head(10)

Unnamed: 0,id_partido,temporada,division,jornada,equipo_local,equipo_visitante,goles_local,goles_visitante,fecha,timestamp,...,visitante_puntos_antes,visitante_jugados_antes,visitante_ganados_antes,visitante_empatados_antes,visitante_perdidos_antes,visitante_goles_favor_antes,visitante_goles_contra_antes,visitante_racha_partidos,visitante_racha_gf,visitante_racha_gc
39761,39848,2021-22,2,9,Valladolid,Malaga,1,1,2021-10-08,2021-10-08,...,11,8,3,2,3,7,10,,,
39764,39851,2021-22,2,9,Almeria,Las Palmas,1,1,2021-10-09,2021-10-09,...,13,8,3,4,1,12,9,,,
39762,39849,2021-22,2,9,Lugo,Girona,1,0,2021-10-09,2021-10-09,...,8,8,2,2,4,6,8,,,
39763,39850,2021-22,2,9,Mirandes,Eibar,3,3,2021-10-09,2021-10-09,...,14,8,4,2,2,12,10,,,
39765,39852,2021-22,2,9,Oviedo,Sporting de Gijon,1,1,2021-10-09,2021-10-09,...,17,8,5,2,1,12,8,,,
39766,39853,2021-22,2,9,Real Sociedad-B,Ponferradina,1,1,2021-10-10,2021-10-10,...,16,8,5,1,2,12,7,,,
39770,39857,2021-22,2,9,Tenerife,Amorebieta,2,1,2021-10-10,2021-10-10,...,7,8,1,4,3,7,11,,,
39769,39856,2021-22,2,9,Fuenlabrada,Leganes,2,1,2021-10-10,2021-10-10,...,9,8,2,3,3,7,8,,,
39768,39855,2021-22,2,9,Alcorcon,Burgos-Cf,1,0,2021-10-10,2021-10-10,...,9,8,2,3,3,6,6,,,
39767,39854,2021-22,2,9,Cartagena,Ibiza-Eivissa,5,1,2021-10-10,2021-10-10,...,11,8,2,5,1,10,9,,,


In [6]:
partidos = partidos.sort_values(by='fecha')
part_test = partidos[partidos['division'] == 1]

# Test Set
part_test = part_test[part_test['fecha'] >= date(2021, 10, 21)] 

# Train Set
partidos = partidos[partidos['fecha'] > date(2016, 8, 1)]
partidos = partidos[partidos['fecha'] < date(2021, 10, 21)]  

partidos_original = partidos

In [7]:
train = partidos.filter(['equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])
test = part_test.filter(['equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])

In [8]:
equipos = train['equipo_local'].unique()

In [9]:
test.head()

Unnamed: 0,equipo_local,equipo_visitante,goles_local,goles_visitante
39882,Elche,Espanol,0,0
39874,Atletico de Bilbao,Villarreal,0,0
39877,Getafe,Celta,0,0
39876,Betis,Rayo Vallecano,0,0
39879,Osasuna,Granada,0,0


## Entrenamos el Modelo

In [10]:
model = LogisticModel()

model.fit(train['equipo_local'],train['equipo_visitante'],train['goles_local'],train['goles_visitante'])

In [11]:
def result(row):
    if row['goles_local'] > row['goles_visitante']:
        return row['equipo_local']
    elif row['goles_local'] < row['goles_visitante']:
        return row['equipo_visitante']
    else:
        return 'draw'

def correct(row):
    if row['forecast_winner'] == row['real_winner']:
       return 1
    else: return 0


In [12]:
test['forecast_winner'] = test.apply(lambda row: model.predict_winner(row['equipo_local'], row['equipo_visitante']), axis=1)
test['forecast_probs'] = test.apply(lambda row: model.predict_proba(row['equipo_local'], row['equipo_visitante']), axis=1)
test['real_winner'] = test.apply(lambda row: result(row), axis=1)
test['correct'] = test.apply(lambda row: correct(row), axis=1)

In [13]:
accuracy_score(test.real_winner, test.forecast_winner)

0.1

## Extract for ELO Rating with JAX

In [14]:
jax_export = partidos_original[partidos_original['division'] == 1]
jax_export = jax_export.filter(['fecha', 'equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])
jax_export.rename(columns={'fecha': 'date', 'equipo_local': 'home', 'equipo_visitante': 'away', 'goles_local': 'home_goals', 'goles_visitante': 'away_goals'}, inplace=True)
jax_export.to_csv('jax_elo_dataset.csv', index=False)

In [15]:
jax_export.tail(10)

Unnamed: 0,date,home,away,home_goals,away_goals
39861,2021-10-03,Elche,Celta,1,0
39864,2021-10-03,Villarreal,Betis,2,0
39866,2021-10-16,Levante,Getafe,0,0
39867,2021-10-16,Real Sociedad,Mallorca,1,0
39868,2021-10-17,Rayo Vallecano,Elche,2,1
39870,2021-10-17,Villarreal,Osasuna,1,2
39871,2021-10-17,Barcelona,Valencia,3,1
39869,2021-10-17,Celta,Sevilla,0,1
39872,2021-10-18,Alaves,Betis,0,1
39873,2021-10-18,Espanol,Cadiz,2,0


## Copa del Rey 2020-21

In [16]:
model.predict_proba('Barcelona', 'Valencia')

Unnamed: 0,Valencia,draw,Barcelona
probability,0.08082,0.167257,0.751923


In [17]:
model.predict_winner('Barcelona', 'Valencia')

'Barcelona'

In [18]:
coefs = model.get_coef()
coefs.head(15)

Unnamed: 0,away wins,draw,home wins
home_Alaves,-0.735698,-0.506906,0.127076
home_Albacete,-0.100594,-0.067132,-0.788215
home_Alcorcon,0.120855,-0.584575,-0.528946
home_Almeria,-0.552523,-0.408833,-0.109377
home_Amorebieta,-0.419513,0.512954,-0.756649
home_Atletico de Bilbao,-1.389957,-0.364323,0.534396
home_Atletico de Madrid,-2.425315,-0.807631,1.493554
home_Barcelona,-2.479543,-1.133026,1.782596
home_Barcelona B,0.765682,-0.54306,-1.096059
home_Betis,-0.994814,-0.588989,0.434259


In [39]:
model.predict_proba('Barcelona', 'Real Madrid')

Unnamed: 0,Real Madrid,draw,Barcelona
probability,0.291694,0.183434,0.524872


In [40]:
model.predict_winner('Atletico de Bilbao', 'Villarreal', )

'Atletico de Bilbao'

In [41]:
coefs = model.get_coef()

In [43]:
coefs.loc['home_Osasuna']

Unnamed: 0,away wins,draw,home wins
home_Osasuna,-0.729587,-0.377976,-0.012127


In [44]:
coefs.loc['away_Granada']

Unnamed: 0,away wins,draw,home wins
away_Granada,-0.584111,-0.51393,-0.032324


In [26]:
coefs.loc['home_Barcelona']

Unnamed: 0,away wins,draw,home wins
home_Barcelona,-2.479543,-1.133026,1.782596


In [27]:
coefs.loc['away_Real Madrid']

Unnamed: 0,away wins,draw,home wins
away_Real Madrid,1.388931,-0.53896,-1.972955


In [28]:
coefs.loc['home_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
home_Atletico de Bilbao,-1.389957,-0.364323,0.534396


In [46]:
coefs.loc['away_Villarreal']

Unnamed: 0,away wins,draw,home wins
away_Villarreal,0.490662,-0.291027,-1.102285


In [32]:
coefs.loc['away_Barcelona']['away wins']

away_Barcelona    1.384098
Name: away wins, dtype: float64

In [33]:
print(coefs.loc['away_Barcelona']['away wins'][0])
print(coefs.loc['away_Barcelona']['draw'][0])
print(coefs.loc['away_Barcelona']['home wins'][0])

1.3840977199634135
-0.5158283298870145
-2.0015859415800605


In [34]:
type(coefs.loc['away_Barcelona']['away wins'][0])

numpy.float64

## Obtener probabilidades Casas de Apuestas

In [35]:
probs = ImpliedProbability()
probs_mul = ImpliedProbability('multiplicative')
probs_add = ImpliedProbability('additive')
probs_pow = ImpliedProbability('power')
print(probs.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_mul.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_add.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_pow.convert(5.75, 4.20, 1.53).implied_probabilities)

[0.15500646 0.2180107  0.62698284]
[0.16320622 0.22343708 0.6133567 ]
[0.15204536 0.21622755 0.63172709]
[0.15271821 0.21401535 0.63326645]


In [36]:
probs.convert(5.25, 4.00, 1.60).implied_probabilities

array([0.17132532, 0.22978496, 0.59888973])

In [37]:
probs.convert(5.50, 4.00, 1.57).implied_probabilities

array([0.1618835 , 0.22876829, 0.60934822])

In [38]:
probs.convert(1.75, 3.70, 4.75).implied_probabilities

array([0.55120443, 0.25386094, 0.19493462])