In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plFuns import *
plt.rcParams['figure.figsize']=[32,18]

In [58]:
urls = {'EN': 'http://www.football-data.co.uk/mmz4281/1718/E0.csv',
                          'ES': 'http://www.football-data.co.uk/mmz4281/1718/SP1.csv',
                          'IT': 'http://www.football-data.co.uk/mmz4281/1718/I1.csv',
                          'FR': 'http://www.football-data.co.uk/mmz4281/1718/F1.csv',
                          'DE': 'http://www.football-data.co.uk/mmz4281/1718/D1.csv',
                          'NL': 'http://www.football-data.co.uk/mmz4281/1718/N1.csv'
                          }
urls = {'EN': 'http://www.football-data.co.uk/mmz4281/1718/E0.csv',
                          'ES': 'http://www.football-data.co.uk/mmz4281/1718/SP1.csv'}

In [59]:
all_data=dict()
all_team_names=dict()
for country in urls:
    all_data[country]=pd.read_csv(urls[country],usecols=['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG'])
    all_team_names[country]=set(all_data[country]['HomeTeam']).union(set(all_data[country]['AwayTeam']))

In [48]:
class Team(object):
    def __init__(self, name='team name',country='SH'):
        self.name = name
        self.country=country
        self.lmbd_set = np.linspace(0, 5, 1001)
        self.p = self.lmbd_set * 0 + 1
        self.p = self.p / self.p.sum()
        self.tau_set = np.linspace(0, 5, 1001)
        self.q = self.tau_set * 0 + 1
        self.q = self.q / self.q.sum()

    def simplify(self, threshold=1e-10):
        ind = self.p > threshold
        self.lmbd_set = self.lmbd_set[ind]
        self.p = self.p[ind]
        self.p = self.p / self.p.sum()
        ind = self.q > threshold
        self.tau_set = self.tau_set[ind]
        self.q = self.q[ind]
        self.q = self.q / self.q.sum()

    def __add__(self, other_team, n_scenarios=int(1e4)):
        GH, GA, match_des = self.vs(other_team,n=n_scenarios)
        return np.array([(GH > GA).sum(), (GH == GA).sum(), (GH < GA).sum()]) / n_scenarios

    def vs(self, other_team, n=int(1e4)):
        lH = np.random.choice(self.lmbd_set, size=n, p=self.p) + np.random.choice(other_team.tau_set, size=n,
                                                                                  p=other_team.q)
        gH = np.random.poisson(lH)
        lA = np.random.choice(self.tau_set, size=n, p=self.q) + np.random.choice(other_team.lmbd_set, size=n,
                                                                                 p=other_team.p)
        gA = np.random.poisson(lA)
        match_des = self.name + ' vs ' + other_team.name
        return gH, gA, match_des

    def plt(self):
        plt.plot(self.lmbd_set, self.p, label=self.name + ' lmbda')
        plt.plot(self.tau_set, self.q, label=self.name + ' tau')
        plt.legend()
        plt.grid(True)
        l, t = self.means()
        plt.title('lambda: {:0.2f} tau: {:0.2f}'.format(l, t))

    def means(self):
        return self.p.dot(self.lmbd_set), self.q.dot(self.tau_set)
    
    def scored_against(self,other,k):
        lmb_plus_tau = self.lmbd_set + other.tau_set[:, np.newaxis]
        new_p = ((np.exp(-lmb_plus_tau) * (lmb_plus_tau ** k)).T * other.q).sum(axis=1) * self.p
        self.p = new_p / new_p.sum()
        new_q = ((np.exp(-lmb_plus_tau) * (lmb_plus_tau ** k)) * self.p).sum(axis=1) * other.q
        other.q = new_q / new_q.sum()
    

In [132]:
class Season:
    def __init__(self,teams):
        self.teams=teams
        self.home_goals={home+' v '+away : np.full(100000, -1) for home in teams for away in teams if home!=away }
        self.away_goals={home+' v '+away : np.full(100000, -1) for home in teams for away in teams if home!=away }
    
    def process_current_results(self,data):
        for index, row in data.iterrows():
            match=row['HomeTeam']+' v '+row['AwayTeam']
            hg=row['FTHG']
            ag=row['FTAG']
            self.home_goals[match][:]=hg
            self.away_goals[match][:]=ag
        
        

In [60]:
teams=dict()
for _country in all_team_names:
    _team_names=all_team_names[_country]
    for _team_name in _team_names:
        teams[_team_name]=Team(name=_team_name,country=_country)
    

In [134]:
EnglishTeams={x:teams[x] for x in teams if teams[x].country=='EN'}
PremierLeague=Season(EnglishTeams)
PremierLeague.process_current_results(all_data['EN'])

In [77]:
for _country in all_data:
    _data=all_data[_country]
    print(_country)
    for index, row in _data.iterrows():
        home_team=teams[row['HomeTeam']]
        away_team=teams[row['AwayTeam']]
        hg=row['FTHG']
        ag=row['FTAG']
        home_team.scored_against(away_team,hg)
        away_team.scored_against(home_team,ag)
        home_team.simplify()
        away_team.simplify()

EN
ES
