In [12]:
import pandas as pd
import scipy.stats as sc

In [13]:
# get data from https://www.football-data.co.uk/
data = pd.read_csv('https://www.football-data.co.uk/mmz4281/1516/E0.csv')

#selecting columns from dataframe to work with
data = data[['Date','HomeTeam','AwayTeam','FTHG','FTAG']]

In [14]:
# number of league games for a full season and print of all teams in league to copy and paste 
lg_games = len(data['HomeTeam'].unique()) - 1
data['HomeTeam'].unique()


array(['Bournemouth', 'Chelsea', 'Everton', 'Leicester', 'Man United',
       'Norwich', 'Arsenal', 'Newcastle', 'Stoke', 'West Brom',
       'Aston Villa', 'Southampton', 'Sunderland', 'Swansea', 'Tottenham',
       'Watford', 'West Ham', 'Crystal Palace', 'Man City', 'Liverpool'],
      dtype=object)

In [15]:

class Goal_Exp:
    
    def __init__(self, home, away, num_games, df):
        self.home = home
        self.away = away
        self.num_games = num_games
        self.df =  df
        
    # calculate average goals of all league two arguments of a data frame and num of games returns a lists
    def calc_lge_avg(self):
        team_count = len(self.df['HomeTeam'].unique())
        games = self.df.tail(self.num_games * team_count)
        homeMean = games['FTHG'].mean()
        awayMean = games['FTAG'].mean()
        return [homeMean, awayMean]
    
    # returns both teams last number of games in a list of dataframes
    def team_last_games(self):
        games = []
        games.append(self.df[(self.df['HomeTeam']==self.home)].tail(self.num_games))
        games.append(self.df[(self.df['AwayTeam']==self.away)].tail(self.num_games))
        return games
    
    # get teams attacking and defensive strengths as a list value at index 1 is attacking strengths
    # and defensive strength at index 2
    def team_strengths(self):
        df2 = self.team_last_games()
        lg_avg = self.calc_lge_avg()
        tm_avg = 0
        stList = []
        stList.append((df2[0]['FTHG'].mean()) / lg_avg[0])
        stList.append((df2[0]['FTAG'].mean()) / lg_avg[1])
        stList.append((df2[1]['FTAG'].mean()) / lg_avg[1])
        stList.append((df2[1]['FTHG'].mean()) / lg_avg[0])
    
        return stList
    
    # calculate each teams expected goals
    def exp_goals(self):
        strengths = self.team_strengths()
        lg_avg = self.calc_lge_avg()
    
        home_exp = round(strengths[0] * strengths[3] * lg_avg[0], 3)
        away_exp = round(strengths[2] * strengths[1] * lg_avg[1], 3)
    
        return [home_exp, away_exp]
    
    # calculates implied probability scoring an certain amount of goals using possion Probability mass function
    def goals_prob(self):
        poss = []
        goals = range(7)
        expected = self.exp_goals()
        for i in expected:
            if expected.index(i) == 0:
                aList = []
                for g in goals:
                    aList.append(sc.poisson.pmf(g, i))
            elif expected.index(i) == 1:
                aList = []
                for g in goals:
                    aList.append(sc.poisson.pmf(g, i))
            poss.append(aList)
        return poss
    
    # calaculate implied probability of each score by multiplying each teams goals probabilties
    def calc_prob(self):
     
        new_list = [[],[],[],[],[],[]]
       
        home_list = self.goals_prob()[0]
        away_list = self.goals_prob()[1]
        index = 0
        for i in home_list:
            for j in range(len(away_list)):
                home_ind = home_list.index(home_list[index])
                away_ind = away_list.index(away_list[j])
            
                if home_ind > away_ind:
                    new_list[0].append((i * away_list[j]) * 100)
                    new_list[3].append(str(home_ind) + "-" + str(away_ind))
                elif home_ind < away_ind:
                    new_list[1].append((i * away_list[j]) * 100)
                    new_list[4].append(str(home_ind) + "-" + str(away_ind))
                else:
                    new_list[2].append((i * away_list[j]) * 100)
                    new_list[5].append(str(home_ind) + "-" + str(away_ind))
                
            index += 1
            
        return new_list
    
    # calculate Implied probability of match result odds
    def match_imp_odds(self):
        odds = self.calc_prob()
        home_odd  = 1/((sum(odds[0])/100))
        away_odd = 1/((sum(odds[1])/100))
        draw_odd = 1/((sum(odds[2])/100))
        return [round(home_odd, 3), round(draw_odd, 3), round(away_odd, 3)]
    
    # returns list of all data 
    def data_row(self):
        lg_avg = self.calc_lge_avg()
        last = self.team_last_games()
        strg = self.team_strengths()
        exp = self.exp_goals()
        odds = self.match_imp_odds()
        return [
            self.num_games,
            self.home,
            last[0]['FTHG'].sum(),
            last[0]['FTAG'].sum(),
            round(strg[0], 3),
            round(strg[1], 3),
            exp[0],
            self.away,
            last[1]['FTHG'].sum(),
            last[1]['FTAG'].sum(),
            round(strg[2], 3),
            round(strg[3], 3),
            exp[1],
            odds[0],
            odds[1],
            odds[2]
            
        ]
    
    
    
   

In [16]:
g1 = Goal_Exp('Tottenham', 'Stoke', 19, data)

In [17]:
g1.calc_lge_avg()

[1.4921052631578948, 1.2078947368421054]

In [18]:
g1.team_strengths()

[1.2345679012345678, 0.6535947712418301, 0.8278867102396513, 1.09347442680776]

In [19]:
g1.exp_goals()

[2.014, 0.654]

In [20]:
 print("match odds >>> home : " + str(g1.match_imp_odds()[0]) + ", draw : " + str(g1.match_imp_odds()[1]) + ", away : " + str(g1.match_imp_odds()[2]))  
    

match odds >>> home : 1.451, draw : 5.111, away : 9.062


In [21]:
g1.data_row()

[19,
 'Tottenham',
 35,
 15,
 1.235,
 0.654,
 2.014,
 'Stoke',
 31,
 19,
 0.828,
 1.093,
 0.654,
 1.451,
 5.111,
 9.062]