In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plFuns import *
plt.rcParams['figure.figsize']=[32,18]

In [2]:
urls = {'EN': 'http://www.football-data.co.uk/mmz4281/1718/E0.csv',
                          'ES': 'http://www.football-data.co.uk/mmz4281/1718/SP1.csv',
                          'IT': 'http://www.football-data.co.uk/mmz4281/1718/I1.csv',
                          'FR': 'http://www.football-data.co.uk/mmz4281/1718/F1.csv',
                          'DE': 'http://www.football-data.co.uk/mmz4281/1718/D1.csv',
                          'NL': 'http://www.football-data.co.uk/mmz4281/1718/N1.csv'
                          }
urls = {'EN': 'http://www.football-data.co.uk/mmz4281/1718/E0.csv',
                          'ES': 'http://www.football-data.co.uk/mmz4281/1718/SP1.csv'}

In [3]:
all_data=dict()
all_team_names=dict()
for country in urls:
    all_data[country]=pd.read_csv(urls[country],usecols=['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG'])
    all_team_names[country]=set(all_data[country]['HomeTeam']).union(set(all_data[country]['AwayTeam']))

In [4]:
class Team(object):
    def __init__(self, name='team name',country='SH'):
        self.name = name
        self.country=country
        self.lmbd_set = np.linspace(0, 5, 1001)
        self.p = self.lmbd_set * 0 + 1
        self.p = self.p / self.p.sum()
        self.tau_set = np.linspace(0, 5, 1001)
        self.q = self.tau_set * 0 + 1
        self.q = self.q / self.q.sum()

    def simplify(self, threshold=1e-10):
        ind = self.p > threshold
        self.lmbd_set = self.lmbd_set[ind]
        self.p = self.p[ind]
        self.p = self.p / self.p.sum()
        ind = self.q > threshold
        self.tau_set = self.tau_set[ind]
        self.q = self.q[ind]
        self.q = self.q / self.q.sum()

    def __add__(self, other_team, n_scenarios=int(1e4)):
        GH, GA, match_des = self.vs(other_team,n=n_scenarios)
        return np.array([(GH > GA).sum(), (GH == GA).sum(), (GH < GA).sum()]) / n_scenarios

    def vs(self, other_team, n=int(1e4)):
        lH = np.random.choice(self.lmbd_set, size=n, p=self.p) + np.random.choice(other_team.tau_set, size=n,
                                                                                  p=other_team.q)
        gH = np.random.poisson(lH)
        lA = np.random.choice(self.tau_set, size=n, p=self.q) + np.random.choice(other_team.lmbd_set, size=n,
                                                                                 p=other_team.p)
        gA = np.random.poisson(lA)
        match_des = self.name + ' vs ' + other_team.name
        return gH, gA, match_des

    def plt(self):
        plt.plot(self.lmbd_set, self.p, label=self.name + ' lmbda')
        plt.plot(self.tau_set, self.q, label=self.name + ' tau')
        plt.legend()
        plt.grid(True)
        l, t = self.means()
        plt.title('lambda: {:0.2f} tau: {:0.2f}'.format(l, t))

    def means(self):
        return self.p.dot(self.lmbd_set), self.q.dot(self.tau_set)
    
    def scored_against(self,other,k):
        lmb_plus_tau = self.lmbd_set + other.tau_set[:, np.newaxis]
        new_p = ((np.exp(-lmb_plus_tau) * (lmb_plus_tau ** k)).T * other.q).sum(axis=1) * self.p
        self.p = new_p / new_p.sum()
        new_q = ((np.exp(-lmb_plus_tau) * (lmb_plus_tau ** k)) * self.p).sum(axis=1) * other.q
        other.q = new_q / new_q.sum()
    

In [5]:
teams=dict()
for _country in all_team_names:
    _team_names=all_team_names[_country]
    for _team_name in _team_names:
        teams[_team_name]=Team(name=_team_name,country=_country)

In [6]:
class Season:
    def __init__(self,teams):
        self.teams=teams
        self.nr_teams=len(teams)
        
        self.all_matches={home+' v '+away:{'Done':False,'Home':home,'Away':away} for home in teams for away in teams if home!=away }
        self.current_goals=dict()
        self.current_points=dict()
        self.simulated_home_goals=None
        self.simulated_away_goals=None
        self.simulated_home_points=None
        self.simulated_away_points=None
        self.team_id=dict()
        i=0
        for _team in teams:
            self.team_id[_team]=i
            self.current_goals[_team]=0
            self.current_points[_team]=0
            i+=1
    
    def process_current_results(self,data):
        for index, row in data.iterrows():
            home_team=row['HomeTeam']
            away_team=row['AwayTeam']
            match=home_team+' v '+away_team
            home_goals=row['FTHG']
            away_goals=row['FTAG']
            self.current_goals[home_team]+=home_goals
            self.current_goals[row['AwayTeam']]+=away_goals
            if home_goals>away_goals:
                self.current_points[home_team]+=3
            elif home_goals<away_goals:
                self.current_points[away_team]+=3
            else:
                self.current_points[home_team]+=1
                self.current_points[away_team]+=1
            self.all_matches[match]['Done'] = True
            self.matches_to_sim={x: self.all_matches[x] for x in  self.all_matches if not self.all_matches[x]['Done']}
            
    
    def simulate_season(self,n_scenarios=10000):
        nr_matches_to_sim=len(self.matches_to_sim)
        self.match_id=dict()
        i=0
        self.simulated_home_goals=np.zeros([nr_matches_to_sim,n_scenarios])
        self.simulated_away_goals=np.zeros([nr_matches_to_sim,n_scenarios])
        for match in self.matches_to_sim:
            self.match_id[match]=i
            home_team=self.teams[self.matches_to_sim[match]['Home']]
            away_team=self.teams[self.matches_to_sim[match]['Away']]
            gH, gA, _ = home_team.vs(away_team,n=n_scenarios)
            self.simulated_home_goals[i,:]=gH
            self.simulated_away_goals[i,:]=gA
            i+=1
        
        
        

In [7]:
EnglishTeams={x:teams[x] for x in teams if teams[x].country=='EN'}
PremierLeague=Season(EnglishTeams)
PremierLeague.process_current_results(all_data['EN'])
PremierLeague.simulate_season()

In [8]:
PremierLeague.simulated_home_goals.shape

(120, 10000)

In [None]:

n_scenarios=PremierLeague.simulated_home_goals.shape[1]
points_per_team = np.zeros([PremierLeague.nr_teams,n_scenarios])
place_per_team = np.zeros([PremierLeague.nr_teams,n_scenarios])
goals_per_team = np.zeros([PremierLeague.nr_teams,n_scenarios])
goals_against_per_team = np.zeros([PremierLeague.nr_teams,n_scenarios])
for _team in PremierLeague.teams:
    team_id = PremierLeague.team_id[_team]
    points_per_team[team_id,:]=PremierLeague.current_points[_team]
for _match in PremierLeague.matches_to_sim:
    _details=PremierLeague.matches_to_sim[_match]
    _home = _details['Home']
    _home_id=PremierLeague.team_id[_home]
    _away = _details['Away']
    _away_id=PremierLeague.team_id[_away]
    match_id = PremierLeague.match_id[_match]
    home_goals=PremierLeague.simulated_home_goals[match_id]
    away_goals=PremierLeague.simulated_away_goals[match_id]
    goals_per_team[_home_id,:]+=home_goals
    goals_per_team[_away_id,:]+=away_goals
    goals_against_per_team[_home_id,:]+=away_goals
    goals_against_per_team[_away_id,:]+=home_goals
    home_won=home_goals>away_goals
    away_won=home_goals<away_goals
    draw=home_goals==away_goals
    points_per_team[_home_id,home_won]+=3
    points_per_team[_home_id,draw]+=1
    points_per_team[_away_id,home_won]+=3
    points_per_team[_away_id,away_won]+=1

modified_points=points_per_team
b=(goals_per_team-goals_against_per_team).max(axis=0)
a=(goals_per_team-goals_against_per_team).min(axis=0)
modified_points+=0.1*((goals_per_team-goals_against_per_team)-a)/(b-a)
b=goals_per_team.max(axis=0)
a=goals_per_team.min(axis=0)
modified_points+=0.01*(goals_per_team-a)/(b-a)
modified_points+=0.001*np.random.random(modified_points.shape)
modified_points.argsort(axis=0)[:,0]

array([ 5,  2, 16, 14,  9, 19,  3,  8, 10, 12, 17, 11,  6,  4, 15,  0,  7,
        1, 18, 13])

In [None]:
for _country in all_data:
    _data=all_data[_country]
    print(_country)
    for index, row in _data.iterrows():
        home_team=teams[row['HomeTeam']]
        away_team=teams[row['AwayTeam']]
        hg=row['FTHG']
        ag=row['FTAG']
        if not (np.isnan(hg) or np.isnan(ag)):
            home_team.scored_against(away_team,hg)
            away_team.scored_against(home_team,ag)
            home_team.simplify()
            away_team.simplify()

EN


In [None]:
home_team.plt()