In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt

In [2]:
season1 = pd.read_csv('match-by-match/leeds-matches-2021.csv')
season1.head()

Unnamed: 0,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,xG,xGA,Poss,Attendance,Captain,Formation,Referee
0,2020-09-12,17:30,Premier League,Matchweek 1,Sat,Away,L,3,4,Liverpool,0.3,2.7,51,,Luke Ayling,4-1-4-1,Michael Oliver
1,2020-09-16,19:45,EFL Cup,Second round,Wed,Home,D,1 (8),1 (9),Hull City,,,65,,Kiko Casilla,4-1-4-1,David Webb
2,2020-09-19,15:00,Premier League,Matchweek 2,Sat,Home,W,4,3,Fulham,1.4,1.7,51,,Liam Cooper,4-1-4-1,Anthony Taylor
3,2020-09-27,12:00,Premier League,Matchweek 3,Sun,Away,W,1,0,Sheffield Utd,1.2,1.4,64,,Liam Cooper,3-1-4-2,Paul Tierney
4,2020-10-03,17:30,Premier League,Matchweek 4,Sat,Home,D,1,1,Manchester City,2.4,1.2,51,,Liam Cooper,4-1-4-1,Mike Dean


In [4]:
season1 = season1.loc[season1['Comp'] == 'Premier League']

In [7]:
season1 = season1[['Round', 'Result', 'Venue', 'GF', 'GA', 'xG', 'xGA']]

In [12]:
season1.head()

Unnamed: 0,Round,Result,Venue,GF,GA,xG,xGA
0,Matchweek 1,L,Away,3,4,0.3,2.7
2,Matchweek 2,W,Home,4,3,1.4,1.7
3,Matchweek 3,W,Away,1,0,1.2,1.4
4,Matchweek 4,D,Home,1,1,2.4,1.2
5,Matchweek 5,L,Home,0,1,1.0,0.4


### Expected Points Calculator
Typically an xP (expected points) calculator is based on each individual shot taken in a game by both teams. We don't have access to this data anywhere so instead we will follow another approach based on the overall team xG that utilizes a Poisson distribution seen [here](https://github.com/kostino/ExpectedPointsCalculator/blob/master/xPoints_Barcelona-RealMadrid_example.ipynb).

In [20]:
from scipy.stats import poisson
max_goals_pl = 9 # max goals scored in PL history is 9, assumed as no higher

xGLeeds = list(season1['xG'])
xGAway = list(season1['xGA'])
LeedsxP = []
AwayxP = []
for j in range(len(xGLeeds)):
    gProbsLeeds = [poisson.pmf(i, xGLeeds[j]) for i in range(max_goals_pl)]
    gProbsAway = [poisson.pmf(i, xGAway[j]) for i in range(max_goals_pl)]

    resultProbs = np.matmul(np.array(gProbsLeeds).reshape(max_goals_pl, 1),np.array(gProbsAway).reshape(1, max_goals_pl))

    LeedsWP = np.tril(resultProbs).sum() - np.trace(resultProbs)
    AwayWP = np.triu(resultProbs).sum() - np.trace(resultProbs)
    DrawP = np.trace(resultProbs)
    
    LeedsxPVals = LeedsWP * 3 + DrawP * 1
    AwayxPVals = AwayWP * 3 + DrawP * 1
    LeedsxP.append(LeedsxPVals)
    AwayxP.append(AwayxPVals)
#print("Barcelona win %: {:.2f}".format(barcelonaWP))
#print("Draw %: {:.2f}".format(drawP))
#print("Real Madrid win %: {:.2f}".format(realWP))

In [21]:
LeedsxP[0]

0.17424477181211412

In [22]:
season1['xPLeeds'] = LeedsxP
season1['xPOpp'] = AwayxP

In [23]:
season1.head()

Unnamed: 0,Round,Result,Venue,GF,GA,xG,xGA,xPLeeds,xPOpp
0,Matchweek 1,L,Away,3,4,0.3,2.7,0.174245,2.72096
2,Matchweek 2,W,Home,4,3,1.4,1.7,1.18774,1.576301
3,Matchweek 3,W,Away,1,0,1.2,1.4,1.228483,1.50911
4,Matchweek 4,D,Home,1,1,2.4,1.2,2.099079,0.713843
5,Matchweek 5,L,Home,0,1,1.0,0.4,1.8526,0.791847
