# [Building a Simple Football Prediction Model](https://medium.com/geekculture/building-a-simple-football-prediction-model-using-machine-learning-f061e607bec5)

and [How to Compute Football Implied Probabilities From Bookmakers Odds](https://octosport.medium.com/how-to-compute-football-implied-probabilities-from-bookmakers-odds-bbb33ccf7c1d)

In [2]:
import warnings
warnings.filterwarnings('ignore')

import sqlite3
import pandas as pd
from datetime import *
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from logistic_model import LogisticRegression, LogisticModel
from implied import ImpliedProbability
from sklearn.metrics import plot_confusion_matrix, accuracy_score

In [3]:
def connect_database(db):
    con = sqlite3.connect(db)
    cursor = con.cursor()
    return cursor, con


def leer_partidos(con):
    partidos = pd.read_sql_query('SELECT * FROM partidos', con) #, index_col='fecha')
    return partidos

cur, con = connect_database('../../Clasificacion.v2.db')

partidos = leer_partidos(con)

In [4]:
partidos['timestamp'] = partidos['timestamp'].map(lambda X: datetime.fromtimestamp(int(X)).date())
partidos['fecha'] = partidos['timestamp']

In [5]:
partidos = partidos.sort_values(by='fecha')
part_test = partidos[partidos['division'] == 1]

# Test Set
part_test = part_test[part_test['fecha'] > date(2021, 2, 8)] 

# Train Set
partidos = partidos[partidos['fecha'] > date(2019, 8, 1)]
partidos = partidos[partidos['fecha'] < date(2021, 2, 8)]  

In [6]:
train = partidos.filter(['equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])
test = part_test.filter(['equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])

In [7]:
equipos = train['equipo_local'].unique()

In [8]:
test.head()

Unnamed: 0,equipo_local,equipo_visitante,goles_local,goles_visitante
39214,Real Madrid,Getafe,2,0
39426,Celta,Elche,3,1
39430,Barcelona,Alaves,5,1
39429,Eibar,Valladolid,1,1
39428,Sevilla,Huesca,1,0


## Entrenamos el Modelo

In [9]:
model = LogisticModel()

model.fit(train['equipo_local'],train['equipo_visitante'],train['goles_local'],train['goles_visitante'])

In [10]:
def result(row):
    if row['goles_local'] > row['goles_visitante']:
        return row['equipo_local']
    elif row['goles_local'] < row['goles_visitante']:
        return row['equipo_visitante']
    else:
        return 'draw'

def correct(row):
    if row['forecast_winner'] == row['real_winner']:
       return 1
    else: return 0


In [11]:
test['forecast_winner'] = test.apply(lambda row: model.predict_winner(row['equipo_local'], row['equipo_visitante']), axis=1)
test['forecast_probs'] = test.apply(lambda row: model.predict_proba(row['equipo_local'], row['equipo_visitante']), axis=1)
test['real_winner'] = test.apply(lambda row: result(row), axis=1)
test['correct'] = test.apply(lambda row: correct(row), axis=1)

In [12]:
accuracy_score(test.real_winner, test.forecast_winner)

0.5176470588235295

## Copa del Rey 2020-21

In [13]:
model.predict_proba('Barcelona', 'Atletico de Bilbao')

Unnamed: 0,Atletico de Bilbao,draw,Barcelona
probability,0.070792,0.250406,0.678802


In [14]:
model.predict_winner('Barcelona', 'Atletico de Bilbao')

'Barcelona'

In [15]:
model.predict_proba('Atletico de Bilbao', 'Barcelona')

Unnamed: 0,Barcelona,draw,Atletico de Bilbao
probability,0.534195,0.140028,0.325776


In [16]:
model.predict_winner('Atletico de Bilbao', 'Barcelona', )

'Barcelona'

In [17]:
coefs = model.get_coef()

In [18]:
coefs.head()

Unnamed: 0,away wins,draw,home wins
home_Alaves,-0.44117,-0.362029,-0.18925
home_Albacete,0.052886,0.025916,-1.054371
home_Alcorcon,0.614489,-0.825257,-0.853375
home_Almeria,-0.292885,-0.701416,-0.067901
home_Atletico de Bilbao,-0.468034,-1.0043,0.389299


In [19]:
coefs.loc['away_Barcelona']

Unnamed: 0,away wins,draw,home wins
away_Barcelona,0.723668,-0.748245,-1.036212


In [20]:
coefs.loc['home_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
home_Atletico de Bilbao,-0.468034,-1.0043,0.389299


In [21]:
coefs.loc['home_Barcelona']

Unnamed: 0,away wins,draw,home wins
home_Barcelona,-1.797002,-1.013848,1.439541


In [22]:
coefs.loc['away_Real Madrid']

Unnamed: 0,away wins,draw,home wins
away_Real Madrid,0.98434,-0.35328,-1.762583


In [23]:
coefs.loc['home_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
home_Atletico de Bilbao,-0.468034,-1.0043,0.389299


In [24]:
coefs.loc['away_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
away_Atletico de Bilbao,-0.689353,0.027985,-0.411139


In [25]:
coefs.loc['away_Barcelona']

Unnamed: 0,away wins,draw,home wins
away_Barcelona,0.723668,-0.748245,-1.036212


In [26]:
type(coefs.loc['away_Barcelona'])

pandas.core.frame.DataFrame

In [27]:
coefs.loc['away_Barcelona']['away wins']

away_Barcelona    0.723668
Name: away wins, dtype: float64

In [28]:
print(coefs.loc['away_Barcelona']['away wins'][0])
print(coefs.loc['away_Barcelona']['draw'][0])
print(coefs.loc['away_Barcelona']['home wins'][0])

0.7236678113885112
-0.7482449972391303
-1.0362115151315918


In [29]:
type(coefs.loc['away_Barcelona']['away wins'][0])

numpy.float64

## Obtener probabilidades Casas de Apuestas

In [30]:
probs = ImpliedProbability()
probs_mul = ImpliedProbability('multiplicative')
probs_add = ImpliedProbability('additive')
probs_pow = ImpliedProbability('power')
print(probs.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_mul.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_add.convert(5.75, 4.20, 1.53).implied_probabilities)
print(probs_pow.convert(5.75, 4.20, 1.53).implied_probabilities)

[0.15500646 0.2180107  0.62698284]
[0.16320622 0.22343708 0.6133567 ]
[0.15204536 0.21622755 0.63172709]
[0.15271821 0.21401535 0.63326645]


In [31]:
probs.convert(5.25, 4.00, 1.60).implied_probabilities

array([0.17132532, 0.22978496, 0.59888973])

In [32]:
probs.convert(5.50, 4.00, 1.57).implied_probabilities

array([0.1618835 , 0.22876829, 0.60934822])

In [33]:
probs.convert(1.75, 3.70, 4.75).implied_probabilities

array([0.55120443, 0.25386094, 0.19493462])