# [Building a Simple Football Prediction Model](https://medium.com/geekculture/building-a-simple-football-prediction-model-using-machine-learning-f061e607bec5)

and [How to Compute Football Implied Probabilities From Bookmakers Odds](https://octosport.medium.com/how-to-compute-football-implied-probabilities-from-bookmakers-odds-bbb33ccf7c1d)

In [1]:
import warnings
warnings.filterwarnings('ignore')

import sqlite3
import pandas as pd
from datetime import *
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from logistic_model import LogisticRegression, LogisticModel
from implied import ImpliedProbability
from sklearn.metrics import plot_confusion_matrix, accuracy_score

In [2]:
def connect_database(db):
    con = sqlite3.connect(db)
    cursor = con.cursor()
    return cursor, con


def leer_partidos(con):
    partidos = pd.read_sql_query('SELECT * FROM partidos', con) #, index_col='fecha')
    return partidos

cur, con = connect_database('../../Clasificacion.db')

partidos = leer_partidos(con)

In [3]:
partidos['timestamp'] = partidos['timestamp'].map(lambda X: datetime.fromtimestamp(int(X)).date())
partidos['fecha'] = partidos['timestamp']
partidos = partidos.sort_values(by='fecha')

In [4]:
partidos[(partidos['temporada']=='2021-22') & (partidos['jornada']==17)]

Unnamed: 0,id_partido,temporada,division,jornada,equipo_local,equipo_visitante,goles_local,goles_visitante,fecha,timestamp,...,visitante_puntos_antes,visitante_jugados_antes,visitante_ganados_antes,visitante_empatados_antes,visitante_perdidos_antes,visitante_goles_favor_antes,visitante_goles_contra_antes,visitante_racha_partidos,visitante_racha_gf,visitante_racha_gc
39849,40007,2021-22,2,17,Fuenlabrada,Mirandes,1,1,2021-11-19,2021-11-19,...,18,16,5,3,8,23,27,,,
39852,40010,2021-22,2,17,Almeria,Valladolid,3,1,2021-11-20,2021-11-20,...,28,16,8,4,4,24,17,,,
39851,40009,2021-22,2,17,Malaga,Las Palmas,2,1,2021-11-20,2021-11-20,...,27,16,7,6,3,24,18,,,
39850,40008,2021-22,2,17,Oviedo,Amorebieta,2,0,2021-11-20,2021-11-20,...,13,16,2,7,7,19,25,,,
39855,40013,2021-22,2,17,Huesca,Ibiza-Eivissa,0,0,2021-11-21,2021-11-21,...,22,16,5,7,4,18,19,,,
39857,40015,2021-22,2,17,Ponferradina,Sporting de Gijon,4,1,2021-11-21,2021-11-21,...,22,16,6,4,6,16,17,,,
39856,40014,2021-22,2,17,Tenerife,Alcorcon,1,0,2021-11-21,2021-11-21,...,8,16,2,2,12,13,33,,,
39853,40011,2021-22,2,17,Cartagena,Burgos-Cf,1,0,2021-11-21,2021-11-21,...,19,16,5,4,7,14,17,,,
39854,40012,2021-22,2,17,Lugo,Eibar,2,2,2021-11-21,2021-11-21,...,31,16,9,4,3,22,17,,,
39858,40016,2021-22,2,17,Real Sociedad-B,Girona,1,2,2021-11-22,2021-11-22,...,21,16,6,3,7,18,18,,,


In [5]:
part_test = partidos
part_test = part_test[part_test['fecha'] >= date(2021, 12, 10)] 
part_test.head(10)

Unnamed: 0,id_partido,temporada,division,jornada,equipo_local,equipo_visitante,goles_local,goles_visitante,fecha,timestamp,...,visitante_puntos_antes,visitante_jugados_antes,visitante_ganados_antes,visitante_empatados_antes,visitante_perdidos_antes,visitante_goles_favor_antes,visitante_goles_contra_antes,visitante_racha_partidos,visitante_racha_gf,visitante_racha_gc
40050,50006,2021-22,1,17,Real Madrid,Atletico de Madrid,0,0,2021-12-12,2021-12-12,...,29,15,8,5,2,27,16,,,
40049,50005,2021-22,1,17,Osasuna,Barcelona,0,0,2021-12-12,2021-12-12,...,23,15,6,5,4,23,17,,,
40048,50004,2021-22,1,17,Espanol,Levante,0,0,2021-12-12,2021-12-12,...,8,16,0,8,8,13,28,,,
40047,50003,2021-22,1,17,Betis,Real Sociedad,0,0,2021-12-12,2021-12-12,...,29,16,8,5,3,19,13,,,
40046,50002,2021-22,1,17,Atletico de Bilbao,Sevilla,0,0,2021-12-12,2021-12-12,...,31,15,9,4,2,25,11,,,
40051,50007,2021-22,1,17,Valencia,Elche,0,0,2021-12-12,2021-12-12,...,15,16,3,6,7,15,22,,,
40052,50008,2021-22,1,17,Villarreal,Rayo Vallecano,0,0,2021-12-12,2021-12-12,...,27,16,8,3,5,24,16,,,
39882,50009,2021-22,2,20,Almeria,Zaragoza,0,0,2021-12-12,2021-12-12,...,26,19,5,11,3,17,16,,,
39883,50010,2021-22,2,20,Eibar,Malaga,0,0,2021-12-12,2021-12-12,...,26,19,7,5,7,18,24,,,
39884,50011,2021-22,2,20,Valladolid,Oviedo,0,0,2021-12-12,2021-12-12,...,28,19,6,10,3,22,17,,,


In [6]:
partidos = partidos.sort_values(by='fecha')
part_test = partidos[partidos['division'] == 1]

# Test Set
part_test = part_test[part_test['fecha'] >= date(2021, 12, 10)] 

# Train Set
partidos = partidos[partidos['fecha'] > date(2016, 8, 1)]
partidos = partidos[partidos['fecha'] < date(2021, 12, 10)]  

partidos_original = partidos

In [7]:
train = partidos.filter(['equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])
test = part_test.filter(['equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])

In [8]:
equipos = train['equipo_local'].unique()

In [9]:
test.head()

Unnamed: 0,equipo_local,equipo_visitante,goles_local,goles_visitante
40048,Espanol,Levante,0,0
40051,Valencia,Elche,0,0
40046,Atletico de Bilbao,Sevilla,0,0
40047,Betis,Real Sociedad,0,0
40049,Osasuna,Barcelona,0,0


## Entrenamos el Modelo

In [10]:
model = LogisticModel()

model.fit(train['equipo_local'],train['equipo_visitante'],train['goles_local'],train['goles_visitante'])

In [11]:
def result(row):
    if row['goles_local'] > row['goles_visitante']:
        return row['equipo_local']
    elif row['goles_local'] < row['goles_visitante']:
        return row['equipo_visitante']
    else:
        return 'draw'

def correct(row):
    if row['forecast_winner'] == row['real_winner']:
       return 1
    else: return 0


In [12]:
test['forecast_winner'] = test.apply(lambda row: model.predict_winner(row['equipo_local'], row['equipo_visitante']), axis=1)
test['forecast_probs'] = test.apply(lambda row: model.predict_proba(row['equipo_local'], row['equipo_visitante']), axis=1)
test['real_winner'] = test.apply(lambda row: result(row), axis=1)
test['correct'] = test.apply(lambda row: correct(row), axis=1)

In [13]:
accuracy_score(test.real_winner, test.forecast_winner)

0.0

## Extract for ELO Rating with JAX

In [14]:
jax_export = partidos_original[partidos_original['division'] == 1]
jax_export = jax_export.filter(['fecha', 'equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])
jax_export.rename(columns={'fecha': 'date', 'equipo_local': 'home', 'equipo_visitante': 'away', 'goles_local': 'home_goals', 'goles_visitante': 'away_goals'}, inplace=True)
jax_export.to_csv('jax_elo_dataset.csv', index=False)

In [15]:
jax_export.tail(10)

Unnamed: 0,date,home,away,home_goals,away_goals
40035,2021-12-03,Granada,Alaves,2,1
40039,2021-12-04,Real Sociedad,Real Madrid,0,2
40036,2021-12-04,Sevilla,Villarreal,1,0
40037,2021-12-04,Barcelona,Betis,0,1
40038,2021-12-04,Atletico de Madrid,Mallorca,1,2
40043,2021-12-05,Celta,Valencia,1,2
40040,2021-12-05,Rayo Vallecano,Espanol,1,0
40042,2021-12-05,Levante,Osasuna,0,0
40041,2021-12-05,Elche,Cadiz,3,1
40044,2021-12-06,Getafe,Atletico de Bilbao,0,0


## Copa del Rey 2020-21

In [16]:
model.predict_proba('Barcelona', 'Valencia')

Unnamed: 0,Valencia,draw,Barcelona
probability,0.093691,0.171107,0.735202


In [17]:
model.predict_winner('Barcelona', 'Valencia')

'Barcelona'

In [18]:
coefs = model.get_coef()
coefs.head(15)

Unnamed: 0,away wins,draw,home wins
home_Alaves,-0.735506,-0.53523,0.149991
home_Albacete,-0.108845,-0.058308,-0.787266
home_Alcorcon,0.091796,-0.497242,-0.578562
home_Almeria,-0.62224,-0.419811,-0.053957
home_Amorebieta,-0.084382,0.185982,-0.848928
home_Atletico de Bilbao,-1.378778,-0.351755,0.511796
home_Atletico de Madrid,-2.368186,-0.792323,1.447319
home_Barcelona,-2.336086,-1.104222,1.692915
home_Barcelona B,0.757372,-0.536444,-1.094403
home_Betis,-1.006686,-0.632067,0.476282


In [24]:
model.predict_proba('Osasuna','Barcelona')

Unnamed: 0,Barcelona,draw,Osasuna
probability,0.609443,0.281329,0.109228


In [21]:
model.predict_winner('Real Madrid', 'Atletico de Madrid', )

'Real Madrid'

In [22]:
coefs = model.get_coef()

In [25]:
coefs.loc['home_Osasuna']

Unnamed: 0,away wins,draw,home wins
home_Osasuna,-0.734806,-0.321699,-0.062413


In [26]:
coefs.loc['away_Osasuna']

Unnamed: 0,away wins,draw,home wins
away_Osasuna,-0.145183,-0.376145,-0.501861


In [27]:
coefs.loc['home_Barcelona']

Unnamed: 0,away wins,draw,home wins
home_Barcelona,-2.336086,-1.104222,1.692915


In [28]:
coefs.loc['away_Real Madrid']

Unnamed: 0,away wins,draw,home wins
away_Real Madrid,1.471618,-0.603556,-2.014672


In [29]:
coefs.loc['home_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
home_Atletico de Bilbao,-1.378778,-0.351755,0.511796


In [30]:
coefs.loc['away_Villarreal']

Unnamed: 0,away wins,draw,home wins
away_Villarreal,0.454348,-0.319403,-1.031561


In [31]:
coefs.loc['away_Barcelona']['away wins']

away_Barcelona    1.363731
Name: away wins, dtype: float64

In [32]:
print(coefs.loc['away_Barcelona']['away wins'][0])
print(coefs.loc['away_Barcelona']['draw'][0])
print(coefs.loc['away_Barcelona']['home wins'][0])

1.3637312471482528
-0.5204277174894957
-1.9597088462550631


In [33]:
type(coefs.loc['away_Barcelona']['away wins'][0])

numpy.float64

## Obtener probabilidades Casas de Apuestas

In [34]:
probs = ImpliedProbability()
probs_mul = ImpliedProbability('multiplicative')
probs_add = ImpliedProbability('additive')
probs_pow = ImpliedProbability('power')
print(probs.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_mul.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_add.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_pow.convert(5.75, 4.20, 1.53).implied_probabilities)

[0.15500646 0.2180107  0.62698284]
[0.16320622 0.22343708 0.6133567 ]
[0.15204536 0.21622755 0.63172709]
[0.15271821 0.21401535 0.63326645]


In [35]:
probs.convert(5.25, 4.00, 1.60).implied_probabilities

array([0.17132532, 0.22978496, 0.59888973])

In [36]:
probs.convert(5.50, 4.00, 1.57).implied_probabilities

array([0.1618835 , 0.22876829, 0.60934822])

In [37]:
probs.convert(1.75, 3.70, 4.75).implied_probabilities

array([0.55120443, 0.25386094, 0.19493462])