# [Building a Simple Football Prediction Model](https://medium.com/geekculture/building-a-simple-football-prediction-model-using-machine-learning-f061e607bec5)

and [How to Compute Football Implied Probabilities From Bookmakers Odds](https://octosport.medium.com/how-to-compute-football-implied-probabilities-from-bookmakers-odds-bbb33ccf7c1d)

In [1]:
import warnings
warnings.filterwarnings('ignore')

import sqlite3
import pandas as pd
from datetime import *
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from logistic_model import LogisticRegression, LogisticModel
from implied import ImpliedProbability
from sklearn.metrics import plot_confusion_matrix, accuracy_score

In [2]:
def connect_database(db):
    con = sqlite3.connect(db)
    cursor = con.cursor()
    return cursor, con


def leer_partidos(con):
    partidos = pd.read_sql_query('SELECT * FROM partidos', con) #, index_col='fecha')
    return partidos

cur, con = connect_database('../../Clasificacion.db')

partidos = leer_partidos(con)

In [3]:
partidos['timestamp'] = partidos['timestamp'].map(lambda X: datetime.fromtimestamp(int(X)).date())
partidos['fecha'] = partidos['timestamp']
partidos = partidos.sort_values(by='fecha')

In [4]:
partidos[(partidos['temporada']=='2021-22') & (partidos['jornada']==8)]

Unnamed: 0,id_partido,temporada,division,jornada,equipo_local,equipo_visitante,goles_local,goles_visitante,fecha,timestamp,...,visitante_puntos_antes,visitante_jugados_antes,visitante_ganados_antes,visitante_empatados_antes,visitante_perdidos_antes,visitante_goles_favor_antes,visitante_goles_contra_antes,visitante_racha_partidos,visitante_racha_gf,visitante_racha_gc
39750,39829,2021-22,2,8,Ponferradina,Valladolid,2,2,2021-10-01,2021-10-01,...,10,7,3,1,3,7,7,,,
39834,39742,2021-22,1,8,Atletico de Bilbao,Alaves,1,0,2021-10-01,2021-10-01,...,3,6,1,0,5,2,11,,,
39754,39833,2021-22,2,8,Alcorcon,Real Sociedad-B,1,4,2021-10-02,2021-10-02,...,6,7,1,3,3,4,7,,,
39752,39831,2021-22,2,8,Huesca,Tenerife,1,2,2021-10-02,2021-10-02,...,11,7,3,2,2,8,6,,,
39751,39830,2021-22,2,8,Amorebieta,Sporting de Gijon,1,1,2021-10-02,2021-10-02,...,16,7,5,1,1,11,7,,,
39838,39746,2021-22,1,8,Atletico de Madrid,Barcelona,2,0,2021-10-02,2021-10-02,...,12,6,3,3,0,11,5,,,
39837,39745,2021-22,1,8,Cadiz,Valencia,0,0,2021-10-02,2021-10-02,...,11,7,3,2,2,12,8,,,
39836,39744,2021-22,1,8,Mallorca,Levante,1,0,2021-10-02,2021-10-02,...,4,7,0,4,3,6,12,,,
39835,39743,2021-22,1,8,Osasuna,Rayo Vallecano,1,0,2021-10-02,2021-10-02,...,13,7,4,1,2,13,7,,,
39753,39832,2021-22,2,8,Zaragoza,Oviedo,0,0,2021-10-02,2021-10-02,...,10,7,2,4,1,8,6,,,


In [5]:
part_test = partidos
part_test = part_test[part_test['fecha'] >= date(2021, 10, 6)] 
part_test.head(10)

Unnamed: 0,id_partido,temporada,division,jornada,equipo_local,equipo_visitante,goles_local,goles_visitante,fecha,timestamp,...,visitante_puntos_antes,visitante_jugados_antes,visitante_ganados_antes,visitante_empatados_antes,visitante_perdidos_antes,visitante_goles_favor_antes,visitante_goles_contra_antes,visitante_racha_partidos,visitante_racha_gf,visitante_racha_gc
39848,50005,2021-22,1,9,Real Madrid,Atletico de Bilbao,0,0,2021-10-06,2021-10-06,...,13,8,3,4,1,7,4,,,
39849,50006,2021-22,1,9,Levante,Getafe,0,0,2021-10-06,2021-10-06,...,1,8,0,1,7,3,13,,,
39850,50007,2021-22,1,9,Real Sociedad,Mallorca,0,0,2021-10-06,2021-10-06,...,11,8,3,2,3,7,12,,,
39851,50008,2021-22,1,9,Rayo Vallecano,Elche,0,0,2021-10-06,2021-10-06,...,9,8,2,3,3,5,8,,,
39847,50004,2021-22,1,9,Granada,Atletico de Madrid,0,0,2021-10-06,2021-10-06,...,17,8,5,2,1,11,6,,,
39846,50003,2021-22,1,9,Celta,Sevilla,0,0,2021-10-06,2021-10-06,...,14,7,4,2,1,10,3,,,
39764,50013,2021-22,2,9,Las Palmas,Tenerife,0,0,2021-10-06,2021-10-06,...,14,8,4,2,2,10,7,,,
39844,50001,2021-22,1,9,Alaves,Betis,0,0,2021-10-06,2021-10-06,...,12,8,3,3,2,11,9,,,
39852,50009,2021-22,1,9,Villarreal,Osasuna,0,0,2021-10-06,2021-10-06,...,14,8,4,2,2,11,11,,,
39765,50014,2021-22,2,9,Leganes,Valladolid,0,0,2021-10-06,2021-10-06,...,11,8,3,2,3,9,9,,,


In [6]:
partidos = partidos.sort_values(by='fecha')
part_test = partidos[partidos['division'] == 1]

# Test Set
part_test = part_test[part_test['fecha'] >= date(2021, 10, 6)] 

# Train Set
partidos = partidos[partidos['fecha'] > date(2019, 8, 1)]
partidos = partidos[partidos['fecha'] < date(2021, 10, 6)]  

In [7]:
train = partidos.filter(['equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])
test = part_test.filter(['equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])

In [8]:
equipos = train['equipo_local'].unique()

In [9]:
test.head()

Unnamed: 0,equipo_local,equipo_visitante,goles_local,goles_visitante
39852,Villarreal,Osasuna,0,0
39844,Alaves,Betis,0,0
39845,Barcelona,Valencia,0,0
39846,Celta,Sevilla,0,0
39847,Granada,Atletico de Madrid,0,0


## Entrenamos el Modelo

In [10]:
model = LogisticModel()

model.fit(train['equipo_local'],train['equipo_visitante'],train['goles_local'],train['goles_visitante'])

In [11]:
def result(row):
    if row['goles_local'] > row['goles_visitante']:
        return row['equipo_local']
    elif row['goles_local'] < row['goles_visitante']:
        return row['equipo_visitante']
    else:
        return 'draw'

def correct(row):
    if row['forecast_winner'] == row['real_winner']:
       return 1
    else: return 0


In [12]:
test['forecast_winner'] = test.apply(lambda row: model.predict_winner(row['equipo_local'], row['equipo_visitante']), axis=1)
test['forecast_probs'] = test.apply(lambda row: model.predict_proba(row['equipo_local'], row['equipo_visitante']), axis=1)
test['real_winner'] = test.apply(lambda row: result(row), axis=1)
test['correct'] = test.apply(lambda row: correct(row), axis=1)

In [13]:
accuracy_score(test.real_winner, test.forecast_winner)

0.0

## Copa del Rey 2020-21

In [37]:
model.predict_proba('Barcelona', 'Valencia')

Unnamed: 0,Valencia,draw,Barcelona
probability,0.060772,0.202524,0.736705


In [15]:
model.predict_winner('Barcelona', 'Valencia')

'Barcelona'

In [42]:
coefs = model.get_coef()
coefs.head(15)

Unnamed: 0,away wins,draw,home wins
home_Alaves,-0.394794,-0.529341,-0.099709
home_Albacete,0.268748,-0.140793,-1.1094
home_Alcorcon,0.603509,-0.971927,-0.737635
home_Almeria,-0.493066,-0.521706,-0.04895
home_Amorebieta,-0.854705,0.697015,-0.455524
home_Atletico de Bilbao,-0.747723,-0.575367,0.27924
home_Atletico de Madrid,-2.174411,-0.601763,1.232301
home_Barcelona,-1.73715,-0.961083,1.314974
home_Betis,-0.877183,-0.607204,0.422898
home_Burgos-Cf,-0.177762,-0.355991,0.035422


In [16]:
model.predict_proba('Valencia', 'Barcelona')

Unnamed: 0,Barcelona,draw,Valencia
probability,0.358857,0.305869,0.335275


In [17]:
model.predict_winner('Valencia', 'Barcelona', )

'Barcelona'

In [18]:
coefs = model.get_coef()

In [35]:
coefs.head(15)

Unnamed: 0,away wins,draw,home wins
home_Alaves,-0.394794,-0.529341,-0.099709
home_Albacete,0.268748,-0.140793,-1.1094
home_Alcorcon,0.603509,-0.971927,-0.737635
home_Almeria,-0.493066,-0.521706,-0.04895
home_Amorebieta,-0.854705,0.697015,-0.455524
home_Atletico de Bilbao,-0.747723,-0.575367,0.27924
home_Atletico de Madrid,-2.174411,-0.601763,1.232301
home_Barcelona,-1.73715,-0.961083,1.314974
home_Betis,-0.877183,-0.607204,0.422898
home_Burgos-Cf,-0.177762,-0.355991,0.035422


In [20]:
coefs.loc['away_Barcelona']

Unnamed: 0,away wins,draw,home wins
away_Barcelona,0.873309,-0.659378,-1.269697


In [21]:
coefs.loc['home_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
home_Atletico de Bilbao,-0.747723,-0.575367,0.27924


In [22]:
coefs.loc['home_Barcelona']

Unnamed: 0,away wins,draw,home wins
home_Barcelona,-1.73715,-0.961083,1.314974


In [23]:
coefs.loc['away_Real Madrid']

Unnamed: 0,away wins,draw,home wins
away_Real Madrid,1.123155,-0.391393,-1.949364


In [24]:
coefs.loc['home_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
home_Atletico de Bilbao,-0.747723,-0.575367,0.27924


In [25]:
coefs.loc['away_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
away_Atletico de Bilbao,-0.576982,0.223387,-0.724771


In [26]:
coefs.loc['away_Barcelona']

Unnamed: 0,away wins,draw,home wins
away_Barcelona,0.873309,-0.659378,-1.269697


In [27]:
type(coefs.loc['away_Barcelona'])

pandas.core.frame.DataFrame

In [28]:
coefs.loc['away_Barcelona']['away wins']

away_Barcelona    0.873309
Name: away wins, dtype: float64

In [29]:
print(coefs.loc['away_Barcelona']['away wins'][0])
print(coefs.loc['away_Barcelona']['draw'][0])
print(coefs.loc['away_Barcelona']['home wins'][0])

0.873308721769958
-0.6593775995171697
-1.2696973892075505


In [30]:
type(coefs.loc['away_Barcelona']['away wins'][0])

numpy.float64

## Obtener probabilidades Casas de Apuestas

In [31]:
probs = ImpliedProbability()
probs_mul = ImpliedProbability('multiplicative')
probs_add = ImpliedProbability('additive')
probs_pow = ImpliedProbability('power')
print(probs.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_mul.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_add.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_pow.convert(5.75, 4.20, 1.53).implied_probabilities)

[0.15500646 0.2180107  0.62698284]
[0.16320622 0.22343708 0.6133567 ]
[0.15204536 0.21622755 0.63172709]
[0.15271821 0.21401535 0.63326645]


In [32]:
probs.convert(5.25, 4.00, 1.60).implied_probabilities

array([0.17132532, 0.22978496, 0.59888973])

In [33]:
probs.convert(5.50, 4.00, 1.57).implied_probabilities

array([0.1618835 , 0.22876829, 0.60934822])

In [34]:
probs.convert(1.75, 3.70, 4.75).implied_probabilities

array([0.55120443, 0.25386094, 0.19493462])