# [Building a Simple Football Prediction Model](https://medium.com/geekculture/building-a-simple-football-prediction-model-using-machine-learning-f061e607bec5)

In [62]:
import warnings
warnings.filterwarnings('ignore')

import sqlite3
import pandas as pd
from datetime import *
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from logistic_model import LogisticRegression, LogisticModel
from implied import ImpliedProbability

In [2]:
def connect_database(db):
    con = sqlite3.connect(db)
    cursor = con.cursor()
    return cursor, con


def leer_partidos(con):
    partidos = pd.read_sql_query('SELECT * FROM partidos', con) #, index_col='fecha')
    return partidos

cur, con = connect_database('../../Clasificacion.v2.db')

partidos = leer_partidos(con)

In [5]:
partidos['timestamp'] = partidos['timestamp'].map(lambda X: datetime.fromtimestamp(int(X)).date())
partidos['fecha'] = partidos['timestamp']

In [6]:
partidos = partidos.sort_values(by='fecha')
#partidos = partidos[partidos['division'] == 1]
partidos = partidos[partidos['fecha'] > date(2017, 8, 1)]
#partidos = partidos[partidos['resultado'] < 1]

In [10]:
match = partidos.filter(['equipo_local', 'equipo_visitante', 'goles_local', 'goles_visitante'])

In [11]:
equipos = match['equipo_local'].unique()

In [12]:
match.head

array(['Tenerife', 'Lorca FC', 'Leganes', 'Valencia', 'Alcorcon',
       'Cordoba', 'Osasuna', 'Numancia', 'Valladolid', 'Sevilla',
       'Girona', 'Celta', 'Barcelona', 'Atletico de Bilbao', 'Deportivo',
       'Oviedo', 'Granada', 'Gimnastic de Tarragona', 'Malaga', 'Levante',
       'Lugo', 'Betis', 'Real Sociedad', 'Albacete', 'Sevilla Atletico',
       'Las Palmas', 'Alaves', 'Almeria', 'Rayo Vallecano',
       'Cultural Leonesa', 'Huesca', 'Getafe', 'Real Madrid', 'Espanol',
       'Eibar', 'Cadiz', 'Zaragoza', 'Reus', 'Sporting de Gijon',
       'Barcelona B', 'Villarreal', 'Atletico de Madrid', 'Elche',
       'Mallorca', 'Extremadura', 'Rayo Majadahonda', 'Racing',
       'Mirandes', 'Ponferradina', 'Fuenlabrada', 'Cd-Castellon',
       'Cartagena', 'Ud-Logrones', 'Sabadell'], dtype=object)

## Calcular Ganador y Probabilidades

In [21]:
model = LogisticModel()

model.fit(match['equipo_local'],match['equipo_visitante'],match['goles_local'],match['goles_visitante'])

In [22]:
model.predict_proba('Barcelona', 'Atletico de Bilbao')

Unnamed: 0,Atletico de Bilbao,draw,Barcelona
probability,0.04722,0.224476,0.728305


In [23]:
model.predict_winner('Barcelona', 'Atletico de Bilbao')

'Barcelona'

In [24]:
model.predict_proba('Atletico de Bilbao', 'Barcelona', )

Unnamed: 0,Barcelona,draw,Atletico de Bilbao
probability,0.519797,0.311126,0.169077


In [25]:
model.predict_winner('Atletico de Bilbao', 'Barcelona', )

'Barcelona'

In [35]:
model.get_coef()

Unnamed: 0,away wins,draw,home wins
home_Alaves,-0.578817,-0.606291,0.106665
home_Albacete,-0.170786,-0.002540,-0.807630
home_Alcorcon,0.094411,-0.407084,-0.659266
home_Almeria,-0.414674,-0.428499,-0.197040
home_Atletico de Bilbao,-1.155807,-0.294518,0.324450
...,...,...,...
away_Ud-Logrones,-0.650414,-0.349721,0.082864
away_Valencia,0.125722,-0.451196,-0.643834
away_Valladolid,-0.485941,-0.113341,-0.479335
away_Villarreal,0.423085,-0.463079,-0.902529


In [50]:
model.get_coef().loc['home_Barcelona']

Unnamed: 0,away wins,draw,home wins
home_Barcelona,-2.520553,-1.153776,1.842495


In [51]:
model.get_coef().loc['away_Barcelona']

Unnamed: 0,away wins,draw,home wins
away_Barcelona,1.257917,-0.484447,-1.903471


In [54]:
model.get_coef().loc['home_Real Madrid']

Unnamed: 0,away wins,draw,home wins
home_Real Madrid,-1.559612,-1.348904,1.446553


In [55]:
model.get_coef().loc['away_Real Madrid']

Unnamed: 0,away wins,draw,home wins
away_Real Madrid,1.082021,-0.360532,-1.772322


In [58]:
model.get_coef().loc['home_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
home_Atletico de Bilbao,-1.155807,-0.294518,0.32445


In [57]:
model.get_coef().loc['away_Atletico de Bilbao']

Unnamed: 0,away wins,draw,home wins
away_Atletico de Bilbao,-0.420754,-0.007689,-0.614396


In [59]:
model.get_coef().loc['away_Barcelona']

Unnamed: 0,away wins,draw,home wins
away_Barcelona,1.257917,-0.484447,-1.903471


## Obtener probabilidades Casas de Apuestas

In [67]:
probs = ImpliedProbability()
probs.convert(5.75, 4.20, 1.53).implied_probabilities

array([0.15500646, 0.2180107 , 0.62698284])