# Getting the data

## Imports

In [1]:
import pandas as pd
from os import listdir
from os.path import isfile, join
import matplotlib.pyplot as plt  
plt.style.use('dark_background')

## Loading the Dataframe

In [2]:
pwd

'/home/jonathan/code/rafabertolace/OnThePitch/notebooks'

### Merging the Seasons csv files (2019-2020 untill 2021-2022)

In [3]:
print(listdir('./../raw_data/UK'))

['E0.csv:Zone.Identifier', 'E0 (2).csv', 'E0 (2).csv:Zone.Identifier', 'E0 (1).csv:Zone.Identifier', 'E0.csv', 'E0 (1).csv']


In [4]:
files = [file for file in listdir('./../raw_data/UK')]
data = pd.DataFrame()


for file in files:
    df = pd.read_csv('./../raw_data/UK/'+file)
    data = pd.concat([data, df])

In [5]:
data.head()

Unnamed: 0,[ZoneTransfer],Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
0,ZoneId=3,,,,,,,,,,...,,,,,,,,,,
1,ReferrerUrl=https://www.football-data.co.uk/en...,,,,,,,,,,...,,,,,,,,,,
2,HostUrl=https://www.football-data.co.uk/mmz428...,,,,,,,,,,...,,,,,,,,,,
0,,E0,09/08/2019,20:00,Liverpool,Norwich,4.0,1.0,H,4.0,...,3.43,-2.25,1.91,1.99,1.94,1.98,1.99,2.07,1.9,1.99
1,,E0,10/08/2019,12:30,West Ham,Man City,0.0,5.0,A,0.0,...,2.91,1.75,1.95,1.95,1.96,1.97,2.07,1.98,1.97,1.92


In [6]:
for col_name in data.columns: 
    print(col_name)

[ZoneTransfer]
Div
Date
Time
HomeTeam
AwayTeam
FTHG
FTAG
FTR
HTHG
HTAG
HTR
Referee
HS
AS
HST
AST
HF
AF
HC
AC
HY
AY
HR
AR
B365H
B365D
B365A
BWH
BWD
BWA
IWH
IWD
IWA
PSH
PSD
PSA
WHH
WHD
WHA
VCH
VCD
VCA
MaxH
MaxD
MaxA
AvgH
AvgD
AvgA
B365>2.5
B365<2.5
P>2.5
P<2.5
Max>2.5
Max<2.5
Avg>2.5
Avg<2.5
AHh
B365AHH
B365AHA
PAHH
PAHA
MaxAHH
MaxAHA
AvgAHH
AvgAHA
B365CH
B365CD
B365CA
BWCH
BWCD
BWCA
IWCH
IWCD
IWCA
PSCH
PSCD
PSCA
WHCH
WHCD
WHCA
VCCH
VCCD
VCCA
MaxCH
MaxCD
MaxCA
AvgCH
AvgCD
AvgCA
B365C>2.5
B365C<2.5
PC>2.5
PC<2.5
MaxC>2.5
MaxC<2.5
AvgC>2.5
AvgC<2.5
AHCh
B365CAHH
B365CAHA
PCAHH
PCAHA
MaxCAHH
MaxCAHA
AvgCAHH
AvgCAHA


In [7]:
# data_date_2 = pd.to_datetime(data_date, dayfirst = True)

## Features Engineering

### Number of Goals, Over and Under

In [8]:
# total number of goals = goals from the home team + goals from visiting team
data['nb_goals']=data['FTHG']+data['FTAG']

# boolean: true or false regarding whether they were more than 2.5 goals
data['over_2.5_goals']=data['nb_goals']>2.5

# boolean: true or false regarding whether they were less than 2.5 goals
data['under_2.5_goals']=data['nb_goals']<2.5

### Payout Average

  Payout of betting on over/under 2.5 goals: we get 0 if we lose the bet, we get the Avg if we win the bet (Avg = market average of the odds)


In [9]:
# payout under 2.5 for Average OPENING odds
data['payout_avg_under_2.5'] = data['under_2.5_goals']*data['Avg<2.5']

# payout over 2.5 for Average OPENING odds
data['payout_avg_over_2.5'] = data['over_2.5_goals']*data['Avg>2.5']

# payout under 2.5 for Average CLOSING odds
data['payout_avg_under_closing_2.5'] = data['under_2.5_goals']*data['AvgC<2.5']

# payout over 2.5 for Average CLOSING odds
data['payout_avg_over_closing_2.5'] = data['over_2.5_goals']*data['AvgC>2.5']

#### Payout Pinnacle

In [10]:
#payout UNDER 2.5 for PINACLE specifically
data['payout_under_2.5_pinacle'] = data['under_2.5_goals']*data['P<2.5']

#payout UNDER 2.5 for PINACLE closing ddds specifically
data['payout_under_2.5_pinacle_closing'] = data['under_2.5_goals']*data['PC<2.5']

#payout OVER 2.5 for PINACLE specifically
data['payout_over_2.5_pinacle'] = data['over_2.5_goals']*data['P>2.5']

#payout OVER 2.5 for PINACLE closing odds specifically
data['payout_over_2.5_pinacle_closing'] = data['over_2.5_goals']*data['PC>2.5']

#### Payout bet365

In [11]:
#payout UNDER 2.5 for 365 specifically
data['payout_under_2.5_365'] = data['under_2.5_goals']*data['B365<2.5']

#payout UNDER 2.5 for 365 closing odds specifically
data['payout_under_2.5_365_closing'] = data['under_2.5_goals']*data['B365C<2.5']

#payout OVER 2.5 for 365 specifically
data['payout_over_2.5_365'] = data['over_2.5_goals']*data['B365>2.5']

#payout OVER 2.5 for 365 closing odds specifically
data['payout_over_2.5_365_closing'] = data['over_2.5_goals']*data['B365C>2.5']

### Implied Probability Market Average

In [12]:
#Implied Probability UNDER 2.5 goals for for overall market opening odds (Avg) 
data['Implied Probability <2.5 avg']=1/data['Avg<2.5']*100

#Implied Probability OVER 2.5 goals for for overall market opening odds (Avg) 
data['Implied Probability >2.5 avg']=1/data['Avg>2.5']*100

#Implied Probability UNDER 2.5 goals for overall market closing odds (AvgC)
data['Implied Probability <2.5 avg closing']=1/data['AvgC<2.5']*100

#Implied Probability OVER 2.5 goals for overall market closing odds (AvgC)
data['Implied Probability >2.5 avg closing']=1/data['AvgC>2.5']*100

### Implied Probability Pinnacle

In [13]:
#Implied Probability UNDER 2.5 goals for PINACLE
data['Implied Probability <2.5 pinacle']=1/data['P<2.5']*100

#Implied Probability OVER 2.5 goals for PINACLE
data['Implied Probability >2.5 pinacle']=1/data['P>2.5']*100

#Implied Probability UNDER 2.5 goals for PINACLE closing odds
data['Implied Probability <2.5 pinacle closing']=1/data['PC<2.5']*100

#Implied Probability OVER 2.5 goals for PINACLE closing odds
data['Implied Probability >2.5 pinacle closing']=1/data['PC>2.5']*100

### Implied Probability bet365

In [14]:
#Implied Probability UNDER 2.5 goals for 365
data['Implied Probability <2.5 365']=1/data['B365<2.5']*100

#Implied Probability UNDER 2.5 goals for 365 closing odds
data['Implied Probability <2.5 365 closing']=1/data['B365C<2.5']*100

#Implied Probability OVER 2.5 goals for 365
data['Implied Probability >2.5 365']=1/data['B365>2.5']*100

#Implied Probability OVER 2.5 goals for 365 closing odds
data['Implied Probability >2.5 365 closing']=1/data['B365C>2.5']*100

### Binning the implied probabilities

In [15]:
bins = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]

#Binning UNDER 2.5 Average Market opening odds
data['binned <2.5 avg'] = pd.cut(data['Implied Probability <2.5 avg'], bins)

#Binning Over 2.5 Average Market opening odds
data['binned >2.5 avg'] = pd.cut(data['Implied Probability >2.5 avg'], bins)

#Binning UNDER 2.5 Average Market closing odds
data['binned <2.5 avg closing'] = pd.cut(data['Implied Probability <2.5 avg closing'], bins)

#Binning OVER 2.5 Average Market closing odds
data['binned >2.5 avg closing'] = pd.cut(data['Implied Probability >2.5 avg closing'], bins)

# Creating (new) features for the feature analysis

## Building new features for Pinnacle

In [16]:
bins = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]

#Binned UNDER 2.5 Pinnacle opening odds
data['binned <2.5 pinacle'] = pd.cut(data['Implied Probability <2.5 pinacle'], bins)

#Binned UNDER 2.5 Pinnacle closing odds
data['binned <2.5 pinacle closing'] = pd.cut(data['Implied Probability <2.5 pinacle closing'], bins)

#Binned OVER 2.5 Pinnacle
data['binned >2.5 pinacle'] = pd.cut(data['Implied Probability >2.5 pinacle'], bins)

#Binned OVER 2.5 Pinnacle CLOSING odds
data['binned >2.5 pinacle closing'] = pd.cut(data['Implied Probability >2.5 pinacle closing'], bins)

In [17]:
# I am binning the odds of Pinnacle with the bins_odds defined below for feature exploration

bins_odds = [1, 1.5, 1.6, 1.7, 1.8, 1.9, 2, 2.5, 3, 999999]

#Binned UNDER 2.5 Pinnacle opening odds
data['binned PC<2.5'] = pd.cut(data['PC<2.5'], bins_odds)

#Binned OVER 2.5 Pinnacle opening odds
data['binned PC>2.5'] = pd.cut(data['PC>2.5'], bins_odds)

## Does Pinnacle pays better than the market matter?

### Feature Engineering

In [18]:
# Does Pinnacle pays better than the market feature

data['PC<2.5_is_better_than_AvgC'] = data['PC<2.5']>data['AvgC<2.5']

data['PC>2.5_is_better_than_AvgC'] = data['PC>2.5']>data['AvgC>2.5']

### Analysis

In [19]:
# binned OVER 2.5 Pinacle

new_view = data.groupby('PC<2.5_is_better_than_AvgC')['payout_under_2.5_pinacle_closing'].agg(['mean','count'])
new_view

Unnamed: 0_level_0,mean,count
PC<2.5_is_better_than_AvgC,Unnamed: 1_level_1,Unnamed: 2_level_1
False,0.963265,147
True,0.999799,993


In [20]:
# binned OVER 2.5 Pinacle

new_view2 = data.groupby('PC>2.5_is_better_than_AvgC')['payout_over_2.5_pinacle_closing'].agg(['mean','count'])
new_view2

Unnamed: 0_level_0,mean,count
PC>2.5_is_better_than_AvgC,Unnamed: 1_level_1,Unnamed: 2_level_1
False,0.914583,120
True,0.963451,1020


## Does the difference between Pinnacle and the market matters?

### Feature Engineering

In [21]:
# Does Pinnacle pays better than the market feature

data['PC<2.5_AvgC_relative_diff'] = data['PC<2.5']/data['AvgC<2.5']

data['PC>2.5_AvgC_relative_diff'] = data['PC>2.5']/data['AvgC>2.5']

data['PC>2.5_AvgC_relative_diff'] = data['PC>2.5']>data['AvgC>2.5']


buckets_diff = [0, 0.5, 0.7, 0.8, 0.9, 0.95, 1, 1.05, 1.1, 1.2, 1.3, 1.4, 1.5, 999999]

#Binned UNDER 2.5 Pinnacle opening odds
data['binned PC<2.5_AvgC_relative_diff'] = pd.cut(data['PC<2.5_AvgC_relative_diff'], buckets_diff)

#Binned OVER 2.5 Pinnacle opening odds
data['binned PC>2.5_AvgC_relative_diff'] = pd.cut(data['PC>2.5_AvgC_relative_diff'], buckets_diff)

### Analysis

In [22]:
# binned OVER 2.5 Pinacle

new_view = data.groupby('binned PC<2.5_AvgC_relative_diff')['payout_under_2.5_pinacle_closing'].agg(['mean','count'])
new_view

Unnamed: 0_level_0,mean,count
binned PC<2.5_AvgC_relative_diff,Unnamed: 1_level_1,Unnamed: 2_level_1
"(0.0, 0.5]",,0
"(0.5, 0.7]",,0
"(0.7, 0.8]",2.63,1
"(0.8, 0.9]",,0
"(0.9, 0.95]",0.0,3
"(0.95, 1.0]",0.971818,143
"(1.0, 1.05]",0.999761,921
"(1.05, 1.1]",1.023284,67
"(1.1, 1.2]",0.692,5
"(1.2, 1.3]",,0


## Does the VIG matter?

### Feature Engineering

In [36]:
data['%vig_p'].mean()

KeyError: '%vig_p'

In [37]:
data['%vig_365'] = (1 - (1 / (1/data['B365C>2.5'] + 1/data['B365C<2.5'])))*100
data['%vig_p'] = (1 - (1 / (1/data['PC>2.5'] + 1/data['PC<2.5'])))*100

buckets_vig = [0, 2, 3, 3.5, 4, 5, 10, 20, 50, 90, 95, 97.5, 100]

#Binned UNDER 2.5 Pinnacle opening odds
data['binned vig_p'] = pd.cut(data['%vig_p'], buckets_vig)

data['vig_p is low'] = data['%vig_p']<3.3
#fair odds, no VIG
#data['B365IP>2.5'] = ((1/(data['B365C>2.5']))*100 / (data['%vigC_365'] + 100))
#data['B365FOC>2.5'] = 1 / data['B365IPC>2.5']

### Analysis

In [38]:
new_view_3 = data.groupby('vig_p is low')['payout_under_2.5_pinacle_closing'].agg(['mean','count'])
new_view_3

Unnamed: 0_level_0,mean,count
vig_p is low,Unnamed: 1_level_1,Unnamed: 2_level_1
False,1.144167,72
True,0.985037,1068


In [39]:
new_view_3 = data.groupby('vig_p is low')['payout_over_2.5_pinacle_closing'].agg(['mean','count'])
new_view_3

Unnamed: 0_level_0,mean,count
vig_p is low,Unnamed: 1_level_1,Unnamed: 2_level_1
False,0.869028,72
True,0.964326,1068


In [40]:
data2 = data[data['PC<2.5_is_better_than_AvgC']==True]

new_view_3 = data2[data2['binned vig_p']==True].groupby('binned PC<2.5')['payout_under_2.5_pinacle_closing'].agg(['mean','count'])
new_view_3

Unnamed: 0_level_0,mean,count
binned PC<2.5,Unnamed: 1_level_1,Unnamed: 2_level_1
"(1.0, 1.5]",,0
"(1.5, 2.0]",,0
"(2.0, 2.5]",,0
"(2.5, 3.0]",,0
"(3.0, 999999.0]",,0


## Does the seasonality matter?

### Feature Engineering

In [41]:
## Adding the Year Feature
data_date = data['Date']
data_date_2 = pd.to_datetime(data_date, dayfirst = True)
data['month'] = pd.DatetimeIndex(data_date_2).month
data['year'] = pd.DatetimeIndex(data_date_2).year
data['month_year']=str(data['month'])+' '+str(data['year'])

In [42]:
data['year']

0         NaN
1         NaN
2         NaN
0      2019.0
1      2019.0
        ...  
375    2021.0
376    2021.0
377    2021.0
378    2021.0
379    2021.0
Name: year, Length: 1149, dtype: float64

### Analysis

In [43]:
new_view_4 = data.groupby('year')['payout_under_2.5_pinacle_closing'].agg(['mean','count'])
new_view_4

Unnamed: 0_level_0,mean,count
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2019.0,0.999749,199
2020.0,1.050446,336
2021.0,0.970245,408
2022.0,0.947411,197


## Does the spread min-max matters?

In [44]:
for col_name in data.columns: 
    print(col_name)

[ZoneTransfer]
Div
Date
Time
HomeTeam
AwayTeam
FTHG
FTAG
FTR
HTHG
HTAG
HTR
Referee
HS
AS
HST
AST
HF
AF
HC
AC
HY
AY
HR
AR
B365H
B365D
B365A
BWH
BWD
BWA
IWH
IWD
IWA
PSH
PSD
PSA
WHH
WHD
WHA
VCH
VCD
VCA
MaxH
MaxD
MaxA
AvgH
AvgD
AvgA
B365>2.5
B365<2.5
P>2.5
P<2.5
Max>2.5
Max<2.5
Avg>2.5
Avg<2.5
AHh
B365AHH
B365AHA
PAHH
PAHA
MaxAHH
MaxAHA
AvgAHH
AvgAHA
B365CH
B365CD
B365CA
BWCH
BWCD
BWCA
IWCH
IWCD
IWCA
PSCH
PSCD
PSCA
WHCH
WHCD
WHCA
VCCH
VCCD
VCCA
MaxCH
MaxCD
MaxCA
AvgCH
AvgCD
AvgCA
B365C>2.5
B365C<2.5
PC>2.5
PC<2.5
MaxC>2.5
MaxC<2.5
AvgC>2.5
AvgC<2.5
AHCh
B365CAHH
B365CAHA
PCAHH
PCAHA
MaxCAHH
MaxCAHA
AvgCAHH
AvgCAHA
nb_goals
over_2.5_goals
under_2.5_goals
payout_avg_under_2.5
payout_avg_over_2.5
payout_avg_under_closing_2.5
payout_avg_over_closing_2.5
payout_under_2.5_pinacle
payout_under_2.5_pinacle_closing
payout_over_2.5_pinacle
payout_over_2.5_pinacle_closing
payout_under_2.5_365
payout_under_2.5_365_closing
payout_over_2.5_365
payout_over_2.5_365_closing
Implied Probability <2.5 avg
Imp

In [45]:
 ## Min-Max
    
#MaxC<2.5
#AvgC>2.5
#AvgC<2.5

data['MaxC<2.5_AvgC_relative_diff'] = data['MaxC<2.5']/data['AvgC<2.5']
data['MaxC>2.5_AvgC_relative_diff'] = data['MaxC>2.5']/data['AvgC>2.5']

data['Market_consensus'] = data['MaxC>2.5_AvgC_relative_diff']<1.05

data['MaxC>2.5_AvgC_relative_diff'] = data['MaxC<2.5']/data['AvgC>2.5']

buckets_diff = [0, 0.5, 0.7, 0.8, 0.9, 0.95, 1, 1.02, 1.03, 1.04, 1.05, 1.1, 1.2, 1.3, 1.4, 1.5, 999999]

#Binned UNDER 2.5 Pinnacle opening odds
data['binned MaxC<2.5_AvgC_relative_diff'] = pd.cut(data['MaxC<2.5_AvgC_relative_diff'], buckets_diff)

In [46]:
new_view_5 = data.groupby('Market_consensus')['payout_under_2.5_pinacle_closing'].agg(['mean','count'])
new_view_5

#data[data['year']==2022]

Unnamed: 0_level_0,mean,count
Market_consensus,Unnamed: 1_level_1,Unnamed: 2_level_1
False,0.931039,356
True,1.024171,784


In [47]:
## better to bet on ODDS that are consensual (max vs avg close to 1)

## Does the spread P-PC matters?

In [48]:
for col_name in data.columns: 
    print(col_name)

[ZoneTransfer]
Div
Date
Time
HomeTeam
AwayTeam
FTHG
FTAG
FTR
HTHG
HTAG
HTR
Referee
HS
AS
HST
AST
HF
AF
HC
AC
HY
AY
HR
AR
B365H
B365D
B365A
BWH
BWD
BWA
IWH
IWD
IWA
PSH
PSD
PSA
WHH
WHD
WHA
VCH
VCD
VCA
MaxH
MaxD
MaxA
AvgH
AvgD
AvgA
B365>2.5
B365<2.5
P>2.5
P<2.5
Max>2.5
Max<2.5
Avg>2.5
Avg<2.5
AHh
B365AHH
B365AHA
PAHH
PAHA
MaxAHH
MaxAHA
AvgAHH
AvgAHA
B365CH
B365CD
B365CA
BWCH
BWCD
BWCA
IWCH
IWCD
IWCA
PSCH
PSCD
PSCA
WHCH
WHCD
WHCA
VCCH
VCCD
VCCA
MaxCH
MaxCD
MaxCA
AvgCH
AvgCD
AvgCA
B365C>2.5
B365C<2.5
PC>2.5
PC<2.5
MaxC>2.5
MaxC<2.5
AvgC>2.5
AvgC<2.5
AHCh
B365CAHH
B365CAHA
PCAHH
PCAHA
MaxCAHH
MaxCAHA
AvgCAHH
AvgCAHA
nb_goals
over_2.5_goals
under_2.5_goals
payout_avg_under_2.5
payout_avg_over_2.5
payout_avg_under_closing_2.5
payout_avg_over_closing_2.5
payout_under_2.5_pinacle
payout_under_2.5_pinacle_closing
payout_over_2.5_pinacle
payout_over_2.5_pinacle_closing
payout_under_2.5_365
payout_under_2.5_365_closing
payout_over_2.5_365
payout_over_2.5_365_closing
Implied Probability <2.5 avg
Imp

In [49]:
 ## Min-Max
    
#MaxC<2.5
#AvgC>2.5
#AvgC<2.5

data['PC<2.5_P_relative_diff'] = data['PC<2.5']>data['P<2.5']

data['PC>2.5_P_relative_diff'] = data['PC>2.5']>data['P>2.5']

buckets_diff = [0, 0.5, 0.7, 0.8, 0.9, 0.95, 1, 1.025, 1.05, 1.1, 1.2, 1.3, 1.4, 1.5, 999999]

#Binned UNDER 2.5 Pinnacle opening odds
data['binned PC<2.5_P_relative_diff'] = pd.cut(data['PC<2.5_P_relative_diff'], buckets_diff)

In [50]:
new_view_5 = data.groupby('PC<2.5_P_relative_diff')['payout_under_2.5_pinacle_closing'].agg(['mean','count'])
new_view_5

Unnamed: 0_level_0,mean,count
PC<2.5_P_relative_diff,Unnamed: 1_level_1,Unnamed: 2_level_1
False,0.985337,609
True,1.006271,531


## Does the ODDS matter?

In [51]:
# I am binning the odds of Pinnacle with the bins_odds defined below for feature exploration

bins_odds = [1, 1.5, 2, 2.5, 3, 999999]
bins_odds_2 = [1,2,999999]


#Binned UNDER 2.5 Pinnacle opening odds
data['binned PC<2.5'] = pd.cut(data['PC<2.5'], bins_odds)
data['PC<2.5 low odds'] = data['PC<2.5']<2

#Binned OVER 2.5 Pinnacle opening odds
data['binned PC>2.5'] = pd.cut(data['PC>2.5'], bins_odds)

In [52]:
# binned UNDER 2.5 MktAvg

fdf_under_average_closing_gb_mean = data.groupby('binned PC<2.5')['payout_avg_under_2.5'].agg(['mean','count'])
fdf_under_average_closing_gb_mean


Unnamed: 0_level_0,mean,count
binned PC<2.5,Unnamed: 1_level_1,Unnamed: 2_level_1
"(1.0, 1.5]",0.93625,8
"(1.5, 2.0]",0.994883,514
"(2.0, 2.5]",0.943514,424
"(2.5, 3.0]",0.889344,122
"(3.0, 999999.0]",1.17375,72


## Combining everything

In [53]:
data_f = data[
    (data['PC<2.5_is_better_than_AvgC'] == True) 
     # (data['PC<2.5 low odds'] == False)
     & (data['vig_p is low'] == False)
    #& (data['PC<2.5_P_relative_diff'] == False)
    #& (data['Market_consensus'] == True)
             ]

In [54]:
data_f_2 = data_f['payout_avg_under_2.5'].agg(['mean','std','count'])
#data_f_2.sort_values(by=['count'], inplace=True)
data_f_2
#,'PC<2.5_P_relative_diff','Market_consensus'

mean      1.078500
std       1.368625
count    60.000000
Name: payout_avg_under_2.5, dtype: float64

In [55]:
# P pays better than Avg
'PC<2.5_is_better_than_AvgC' == True

# Odds are <2 (low)
'PC<2.5 low odds' == True

# P VIG is <4 (low)
'vig_p is low' == True

# PC is NOT greater than P (odds did not increase)
'PC<2.5_P_relative_diff' == False

# Diff between max and avg is < 1.05
'Market_consensus'== True

False

# Analysing the payouts per feature

### Market Average

In [33]:
# binned UNDER 2.5 MktAvg

fdf_under_average_closing_gb_mean = data.groupby('binned AvgC<2.5')['payout_avg_under_2.5'].agg(['mean','count'])
fdf_under_average_closing_gb_mean



Unnamed: 0_level_0,mean,count
binned AvgC<2.5,Unnamed: 1_level_1,Unnamed: 2_level_1
"(1.0, 1.5]",1.66,3
"(1.5, 1.6]",0.886087,23
"(1.6, 1.7]",0.856818,88
"(1.7, 1.8]",0.962385,130
"(1.8, 1.9]",1.026149,161
"(1.9, 2.0]",0.997208,154
"(2.0, 2.5]",0.936392,474
"(2.5, 3.0]",0.83,59
"(3.0, 999999.0]",0.210714,14


In [34]:
# binned OVER 2.5 MktAvg

fdf_over_average_closing_gb_mean = data.groupby('binned AvgC>2.5')['payout_avg_over_2.5'].agg(['mean','count'])
fdf_over_average_closing_gb_mean

Unnamed: 0_level_0,mean,count
binned AvgC>2.5,Unnamed: 1_level_1,Unnamed: 2_level_1
"(1.0, 1.5]",1.059545,66
"(1.5, 1.6]",1.178727,110
"(1.6, 1.7]",0.868129,171
"(1.7, 1.8]",0.882222,207
"(1.8, 1.9]",0.945355,183
"(1.9, 2.0]",0.821504,133
"(2.0, 2.5]",0.995517,232
"(2.5, 3.0]",0.525,4
"(3.0, 999999.0]",,0


### Pinnacle

In [30]:
# binned OVER 2.5 Pinacle

fdf_under_pinacle_closing_gb_mean = data.groupby('binned PC>2.5')['payout_over_2.5_pinacle_closing'].agg(['mean','count'])
fdf_under_pinacle_closing_gb_mean

Unnamed: 0_level_0,mean,count
binned PC>2.5,Unnamed: 1_level_1,Unnamed: 2_level_1
"(1.0, 1.5]",1.018929,56
"(1.5, 1.6]",1.142903,93
"(1.6, 1.7]",0.908028,142
"(1.7, 1.8]",0.886806,216
"(1.8, 1.9]",0.870612,147
"(1.9, 2.0]",1.011605,162
"(2.0, 2.5]",0.999173,278
"(2.5, 3.0]",1.17,11
"(3.0, 999999.0]",,0


In [None]:
# binned OVER 2.5 Avg

fdf_under_pinacle_closing_gb_mean = data.groupby('binned AvgC>2.5')['payout_over_2.5_market_closing'].agg(['mean','count'])
fdf_under_pinacle_closing_gb_mean

### Binning bet365

In [272]:
bins = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]

#Binned UNDER 2.5 bet365 OPENING odds
data['binned <2.5 365'] = pd.cut(data['Implied Probability <2.5 365'], bins)

#Binned UNDER 2.5 bet365 CLOSING odds
data['binned <2.5 365 closing'] = pd.cut(data['Implied Probability <2.5 365 closing'], bins)

#Binned OVER 2.5 bet365 OPENING odds
data['binned >2.5 365'] = pd.cut(data['Implied Probability >2.5 365'], bins)

#Binned OVER 2.5 bet365 CLOSING odds
data['binned >2.5 365 closing'] = pd.cut(data['Implied Probability >2.5 365 closing'], bins)

In [170]:
data

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,payout_under_2.5_365_closing,payout_over_2.5_365_closing,Implied Probability >2.5 pinacle closing,Implied Probability <2.5 pinacle closing,Implied Probability <2.5 365 closing,Implied Probability >2.5 365 closing,binned <2.5 pinacle closing,binned >2.5 pinacle closing,binned <2.5 365 closing,binned >2.5 365 closing
0,T1,11/09/2020,18:00,Rizespor,Fenerbahce,1,2,A,0,0,...,0.00,1.70,57.142857,46.082949,47.619048,58.823529,"(45, 50]","(55, 60]","(45, 50]","(55, 60]"
1,T1,12/09/2020,15:00,Karagumruk,Yeni Malatyaspor,3,0,H,1,0,...,0.00,1.90,51.813472,51.020408,51.282051,52.631579,"(50, 55]","(50, 55]","(50, 55]","(50, 55]"
2,T1,12/09/2020,15:00,Sivasspor,Alanyaspor,0,2,A,0,1,...,2.10,0.00,56.818182,46.728972,47.619048,58.823529,"(45, 50]","(55, 60]","(45, 50]","(55, 60]"
3,T1,12/09/2020,18:00,Galatasaray,Gaziantep,3,1,H,3,0,...,0.00,1.60,61.728395,41.666667,43.478261,62.500000,"(40, 45]","(60, 65]","(40, 45]","(60, 65]"
4,T1,12/09/2020,18:00,Goztep,Denizlispor,5,1,H,3,0,...,0.00,2.05,48.309179,54.644809,57.142857,48.780488,"(50, 55]","(45, 50]","(55, 60]","(45, 50]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,T1,21/05/2022,17:00,Yeni Malatyaspor,Fenerbahce,0,5,A,0,1,...,0.00,1.40,69.930070,34.602076,34.843206,71.428571,"(30, 35]","(65, 70]","(30, 35]","(70, 75]"
376,T1,22/05/2022,14:00,Buyuksehyr,Trabzonspor,3,1,H,3,0,...,0.00,1.66,58.139535,45.045045,46.511628,60.240964,"(45, 50]","(55, 60]","(45, 50]","(60, 65]"
377,T1,22/05/2022,14:00,Karagumruk,Alanyaspor,0,1,A,0,1,...,2.35,0.00,62.893082,40.485830,42.553191,63.694268,"(40, 45]","(60, 65]","(40, 45]","(60, 65]"
378,T1,22/05/2022,17:00,Ad. Demirspor,Goztep,7,0,H,5,0,...,0.00,1.40,71.428571,32.467532,34.843206,71.428571,"(30, 35]","(70, 75]","(30, 35]","(70, 75]"


## Final Dataset

### Average Market (AvG)

In [293]:
#Average Market UNDER opening odds
fdf_under_avg = data[['Implied Probability <2.5 avg','under_2.5_goals','binned <2.5 avg','payout_avg_under_2.5']]

#Average Market OVER opening odds
fdf_over_avg = data[['Implied Probability >2.5 avg','over_2.5_goals','binned >2.5 avg','payout_avg_over_2.5']]

#Average Market UNDER closing odds
fdf_under_avg_closing = data[['Implied Probability <2.5 avg closing','under_2.5_goals','binned <2.5 avg closing','payout_avg_under_closing_2.5']]

#Average Market OVER opening odds
fdf_over_avg_closing = data[['Implied Probability >2.5 avg closing','over_2.5_goals','binned >2.5 avg closing','payout_avg_over_closing_2.5']]

### Pinnacle

In [177]:
#pinnacle UNDER opening odds
fdf_under_pinacle = data[['Implied Probability <2.5 pinacle','under_2.5_goals','binned <2.5 pinacle','payout_under_2.5_pinacle']]

#pinnacle OVER opening odds
fdf_over_pinacle = data[['Implied Probability >2.5 pinacle','over_2.5_goals','binned >2.5 pinacle','payout_over_2.5_pinacle']]

#pinnacle UNDER closing odds
fdf_under_pinacle_closing = data[['Implied Probability <2.5 pinacle closing','under_2.5_goals','binned <2.5 pinacle closing','payout_under_2.5_pinacle_closing']]

#pinnacle OVER closing odds
fdf_over_pinacle_closing = data[['Implied Probability >2.5 pinacle closing','over_2.5_goals','binned >2.5 pinacle closing','payout_over_2.5_pinacle_closing']]

### bet365

In [295]:
#bet365 UNDER opening odds
fdf_under_365 = data[['Implied Probability <2.5 365','under_2.5_goals','binned <2.5 365','payout_under_2.5_365']]

#bet365 OVER opening odds
fdf_over_365 = data[['Implied Probability >2.5 365','over_2.5_goals','binned >2.5 365','payout_over_2.5_365']]

#bet365 UNDER closing odds
fdf_under_365_closing = data[['Implied Probability <2.5 365 closing','under_2.5_goals','binned <2.5 365 closing','payout_under_2.5_365_closing']]

#bet365 OVER closing odds
fdf_over_365_closing = data[['Implied Probability >2.5 365 closing','over_2.5_goals','binned >2.5 365 closing','payout_over_2.5_365_closing']]

## Results per implied prob Average Market

### Under

#### Avg opening odds

In [302]:
fdf_under_avg_gb_count = fdf_under_avg.groupby('binned <2.5 avg')['payout_avg_under_2.5'].count()
fdf_under_avg_gb_count

binned <2.5 avg
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       1
(25, 30]       1
(30, 35]      10
(35, 40]      36
(40, 45]     156
(45, 50]     349
(50, 55]     327
(55, 60]     193
(60, 65]      31
(65, 70]       2
(70, 75]       0
(75, 80]       0
(80, 85]       0
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_avg_under_2.5, dtype: int64

In [301]:
fdf_under_avg_gb_mean = fdf_under_avg.groupby('binned <2.5 avg')['payout_avg_under_2.5'].mean()
fdf_under_avg_gb_mean

binned <2.5 avg
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]     0.000000
(25, 30]     0.000000
(30, 35]     0.295000
(35, 40]     0.782500
(40, 45]     0.756154
(45, 50]     1.049083
(50, 55]     0.997492
(55, 60]     0.889223
(60, 65]     0.835161
(65, 70]     0.760000
(70, 75]          NaN
(75, 80]          NaN
(80, 85]          NaN
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_avg_under_2.5, dtype: float64

#### Avg closing odds

In [304]:
fdf_under_avg_closing_gb_count = fdf_under_avg_closing.groupby('binned <2.5 avg closing')['payout_avg_under_closing_2.5'].count()
fdf_under_avg_closing_gb_count

binned <2.5 avg closing
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       1
(25, 30]       3
(30, 35]      14
(35, 40]      56
(40, 45]     167
(45, 50]     315
(50, 55]     304
(55, 60]     176
(60, 65]      62
(65, 70]       8
(70, 75]       0
(75, 80]       0
(80, 85]       0
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_avg_under_closing_2.5, dtype: int64

In [305]:
fdf_under_avg_closing_gb_mean = fdf_under_avg_closing.groupby('binned <2.5 avg closing')['payout_avg_under_closing_2.5'].mean()
fdf_under_avg_closing_gb_mean

binned <2.5 avg closing
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]     0.000000
(25, 30]     0.000000
(30, 35]     0.233571
(35, 40]     0.996964
(40, 45]     0.760898
(45, 50]     1.059079
(50, 55]     0.987401
(55, 60]     0.919659
(60, 65]     0.861129
(65, 70]     0.742500
(70, 75]          NaN
(75, 80]          NaN
(80, 85]          NaN
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_avg_under_closing_2.5, dtype: float64

### Over

#### Opening Odds

In [309]:
fdf_over_avg_gb_count = fdf_over_avg.groupby('binned >2.5 avg')['payout_avg_over_2.5'].count()
fdf_over_avg_gb_count

binned >2.5 avg
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       0
(25, 30]       0
(30, 35]       0
(35, 40]       2
(40, 45]      23
(45, 50]     185
(50, 55]     322
(55, 60]     332
(60, 65]     184
(65, 70]      43
(70, 75]      10
(75, 80]       4
(80, 85]       0
(85, 90]       1
(90, 95]       0
(95, 100]      0
Name: payout_avg_over_2.5, dtype: int64

In [308]:
fdf_over_avg_gb_mean = fdf_over_avg.groupby('binned >2.5 avg')['payout_avg_over_2.5'].mean()
fdf_over_avg_gb_mean

binned >2.5 avg
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]          NaN
(25, 30]          NaN
(30, 35]          NaN
(35, 40]     1.310000
(40, 45]     1.104348
(45, 50]     1.016757
(50, 55]     0.905590
(55, 60]     0.878283
(60, 65]     1.010272
(65, 70]     0.974651
(70, 75]     1.243000
(75, 80]     1.317500
(80, 85]          NaN
(85, 90]     1.170000
(90, 95]          NaN
(95, 100]         NaN
Name: payout_avg_over_2.5, dtype: float64

#### Closing Odds

In [313]:
fdf_over_avg_closing_gb_count = fdf_over_avg_closing.groupby('binned >2.5 avg closing')['payout_avg_over_closing_2.5'].count()
fdf_over_avg_closing_gb_count

binned >2.5 avg closing
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       0
(25, 30]       0
(30, 35]       0
(35, 40]       5
(40, 45]      46
(45, 50]     191
(50, 55]     297
(55, 60]     296
(60, 65]     186
(65, 70]      62
(70, 75]      14
(75, 80]       7
(80, 85]       2
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_avg_over_closing_2.5, dtype: int64

In [312]:
fdf_over_avg_closing_gb_mean = fdf_over_avg_closing.groupby('binned >2.5 avg closing')['payout_avg_over_closing_2.5'].mean()
fdf_over_avg_closing_gb_mean

binned >2.5 avg closing
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]          NaN
(25, 30]          NaN
(30, 35]          NaN
(35, 40]     0.502000
(40, 45]     1.256739
(45, 50]     0.976545
(50, 55]     0.917239
(55, 60]     0.862230
(60, 65]     0.979140
(65, 70]     1.056129
(70, 75]     0.982143
(75, 80]     1.120000
(80, 85]     1.215000
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_avg_over_closing_2.5, dtype: float64

## Results per implied prob Pinnacle

### Under

#### Pinnacle opening odds

In [296]:
fdf_under_pinacle_gb_count = fdf_under_pinacle.groupby('binned <2.5 pinacle')['payout_under_2.5_pinacle'].count()
fdf_under_pinacle_gb_count

binned <2.5 pinacle
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       1
(25, 30]       3
(30, 35]      12
(35, 40]      49
(40, 45]     192
(45, 50]     398
(50, 55]     276
(55, 60]     146
(60, 65]      23
(65, 70]       2
(70, 75]       0
(75, 80]       0
(80, 85]       0
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_under_2.5_pinacle, dtype: int64

In [298]:
fdf_under_pinacle_gb_mean = fdf_under_pinacle.groupby('binned <2.5 pinacle')['payout_under_2.5_pinacle'].mean()
fdf_under_pinacle_gb_mean

binned <2.5 pinacle
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]     0.000000
(25, 30]     0.000000
(30, 35]     0.260000
(35, 40]     0.956939
(40, 45]     0.835833
(45, 50]     1.044196
(50, 55]     1.001304
(55, 60]     0.910068
(60, 65]     0.847826
(65, 70]     0.765000
(70, 75]          NaN
(75, 80]          NaN
(80, 85]          NaN
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_under_2.5_pinacle, dtype: float64

In [207]:
fdf_under_pinacle['payout_under_2.5_pinacle'].mean()

0.9585662431941923

In [208]:
fdf_under_pinacle_above_40 = fdf_Under_pinacle[fdf_Under_pinacle['Implied Probability <2.5 pinacle']>40]

In [209]:
fdf_under_pinacle_above_40['payout_under_2.5_pinacle'].mean()

0.9975848303393213

#### Closing Odds

In [299]:
fdf_under_pinacle_closing_gb_count = fdf_under_pinacle_closing.groupby('binned <2.5 pinacle closing')['payout_under_2.5_pinacle_closing'].count()
fdf_under_pinacle_closing_gb_count

binned <2.5 pinacle closing
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       2
(25, 30]       6
(30, 35]      16
(35, 40]      79
(40, 45]     187
(45, 50]     329
(50, 55]     272
(55, 60]     164
(60, 65]      45
(65, 70]       5
(70, 75]       0
(75, 80]       0
(80, 85]       0
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_under_2.5_pinacle_closing, dtype: int64

In [300]:
fdf_under_pinacle_closing_gb_mean = fdf_under_pinacle_closing.groupby('binned <2.5 pinacle closing')['payout_under_2.5_pinacle_closing'].mean()
fdf_under_pinacle_closing_gb_mean

binned <2.5 pinacle closing
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]     0.000000
(25, 30]     0.566667
(30, 35]     1.102500
(35, 40]     0.573418
(40, 45]     0.990428
(45, 50]     1.064468
(50, 55]     1.010147
(55, 60]     0.891341
(60, 65]     0.861333
(65, 70]     0.892000
(70, 75]          NaN
(75, 80]          NaN
(80, 85]          NaN
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_under_2.5_pinacle_closing, dtype: float64

In [17]:
fdf_under_pinacle_closing_gb_count = fdf_under_pinacle_closing.groupby('binned <2.5 pinacle odds')['payout_under_2.5_pinacle_closing'].count()
fdf_under_pinacle_closing_gb_count

NameError: name 'fdf_under_pinacle_closing' is not defined

In [21]:
fdf_under_pinacle_closing_gb_mean = data.groupby('binned PC<2.5')['payout_under_2.5_pinacle_closing'].agg(['mean','count'])
fdf_under_pinacle_closing_gb_mean

Unnamed: 0_level_0,mean,count
binned PC<2.5,Unnamed: 1_level_1,Unnamed: 2_level_1
"(1.0, 1.5]",1.44,1
"(1.5, 2.0]",0.967505,501
"(2.0, 3.0]",0.976746,587
"(3.0, 999999.0]",0.400625,16


### Over

#### Opening odds

In [258]:
fdf_over_pinacle_gb_count = fdf_over_pinacle.groupby('binned >2.5 pinacle')['payout_over_2.5_pinacle'].count()
fdf_over_pinacle_gb_count

binned >2.5 pinacle
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       0
(25, 30]       0
(30, 35]       0
(35, 40]       2
(40, 45]      46
(45, 50]     224
(50, 55]     297
(55, 60]     321
(60, 65]     158
(65, 70]      43
(70, 75]       7
(75, 80]       3
(80, 85]       1
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_over_2.5_pinacle, dtype: int64

In [217]:
fdf_over_pinacle.groupby('binned >2.5 pinacle')['payout_over_2.5_pinacle'].mean()

binned >2.5 pinacle
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]          NaN
(25, 30]          NaN
(30, 35]          NaN
(35, 40]     1.390000
(40, 45]     0.912391
(45, 50]     1.031205
(50, 55]     0.964141
(55, 60]     0.844486
(60, 65]     1.069367
(65, 70]     1.075116
(70, 75]     1.167143
(75, 80]     1.303333
(80, 85]     1.210000
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_over_2.5_pinacle, dtype: float64

In [257]:
fdf_over_pinacle_gb_mean = fdf_over_pinacle['payout_over_2.5_pinacle'].mean()
fdf_over_pinacle_gb_mean

0.9633847549909257

In [229]:
fdf_over_pinacle_above_60 = fdf_over_pinacle[fdf_over_pinacle['Implied Probability >2.5 pinacle']>60]

In [220]:
fdf_over_pinacle_above_60['payout_over_2.5_pinacle'].mean()

1.0777358490566038

#### Closing odds

In [228]:
fdf_over_pinacle_closing_gb_count = fdf_over_pinacle_closing.groupby('binned >2.5 pinacle closing')['payout_over_2.5_pinacle_closing'].count()
fdf_over_pinacle_closing_gb_count

binned >2.5 pinacle closing
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       0
(25, 30]       0
(30, 35]       1
(35, 40]      12
(40, 45]      82
(45, 50]     203
(50, 55]     281
(55, 60]     293
(60, 65]     158
(65, 70]      56
(70, 75]      12
(75, 80]       5
(80, 85]       2
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_over_2.5_pinacle_closing, dtype: int64

In [226]:
fdf_over_pinacle_closing_gb_mean = fdf_over_pinacle_closing.groupby('binned >2.5 pinacle closing')['payout_over_2.5_pinacle_closing'].mean()
fdf_over_pinacle_closing_gb_mean

binned >2.5 pinacle closing
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]          NaN
(25, 30]          NaN
(30, 35]     0.000000
(35, 40]     1.072500
(40, 45]     1.155732
(45, 50]     0.931034
(50, 55]     0.933559
(55, 60]     0.894915
(60, 65]     1.055696
(65, 70]     1.040000
(70, 75]     0.921667
(75, 80]     1.320000
(80, 85]     1.230000
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_over_2.5_pinacle_closing, dtype: float64

## Results per implied prob bet365

### Under

#### Opening odds

In [240]:
fdf_under_bet365_gb_count = fdf_under_365.groupby('binned <2.5 365')['payout_under_2.5_365'].count()
fdf_under_bet365_gb_count

binned <2.5 365
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       1
(25, 30]       5
(30, 35]       9
(35, 40]      26
(40, 45]     137
(45, 50]     458
(50, 55]     233
(55, 60]     173
(60, 65]      58
(65, 70]       3
(70, 75]       0
(75, 80]       0
(80, 85]       0
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_under_2.5_365, dtype: int64

In [241]:
fdf_under_bet365_gb_mean = fdf_under_365.groupby('binned <2.5 365')['payout_under_2.5_365'].mean()
fdf_under_bet365_gb_mean

binned <2.5 365
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]     0.000000
(25, 30]     0.000000
(30, 35]     0.344444
(35, 40]     0.788462
(40, 45]     0.729197
(45, 50]     1.023166
(50, 55]     1.053391
(55, 60]     0.831561
(60, 65]     0.959310
(65, 70]     0.500000
(70, 75]          NaN
(75, 80]          NaN
(80, 85]          NaN
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_under_2.5_365, dtype: float64

In [237]:
fdf_under_365['payout_under_2.5_365'].mean()

0.9415684496826835

In [231]:
fdf_under_365_above_40 = fdf_under_365[fdf_under_365['Implied Probability <2.5 365']>40]

In [232]:
fdf_under_365_above_40['payout_under_2.5_365'].mean()

0.9556967984934087

#### Closing odds

In [244]:
fdf_under_bet365_closing_gb_count = fdf_under_365_closing.groupby('binned <2.5 365 closing')['payout_under_2.5_365_closing'].count()
fdf_under_bet365_closing_gb_count

binned <2.5 365 closing
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       3
(25, 30]       7
(30, 35]      15
(35, 40]      49
(40, 45]     148
(45, 50]     391
(50, 55]     217
(55, 60]     162
(60, 65]     100
(65, 70]      14
(70, 75]       0
(75, 80]       0
(80, 85]       0
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_under_2.5_365_closing, dtype: int64

In [245]:
fdf_under_bet365_closing_gb_mean = fdf_under_365_closing.groupby('binned <2.5 365 closing')['payout_under_2.5_365_closing'].mean()
fdf_under_bet365_closing_gb_mean

binned <2.5 365 closing
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]     0.000000
(25, 30]     0.500000
(30, 35]     0.956667
(35, 40]     0.744898
(40, 45]     0.800000
(45, 50]     1.031867
(50, 55]     0.980968
(55, 60]     0.959815
(60, 65]     0.798700
(65, 70]     0.966429
(70, 75]          NaN
(75, 80]          NaN
(80, 85]          NaN
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_under_2.5_365_closing, dtype: float64

### Over

#### Opening Odds

In [247]:
fdf_over_bet365_gb_count = fdf_over_365.groupby('binned >2.5 365')['payout_over_2.5_365'].count()
fdf_over_bet365_gb_count

binned >2.5 365
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       0
(25, 30]       0
(30, 35]       0
(35, 40]       2
(40, 45]      23
(45, 50]     246
(50, 55]     236
(55, 60]     322
(60, 65]     205
(65, 70]      54
(70, 75]       9
(75, 80]       5
(80, 85]       1
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_over_2.5_365, dtype: int64

In [248]:
fdf_over_bet365_gb_mean = fdf_over_365.groupby('binned >2.5 365')['payout_over_2.5_365'].mean()
fdf_over_bet365_gb_mean

binned >2.5 365
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]          NaN
(25, 30]          NaN
(30, 35]          NaN
(35, 40]     1.300000
(40, 45]     1.097826
(45, 50]     1.034350
(50, 55]     0.880551
(55, 60]     0.868851
(60, 65]     0.976537
(65, 70]     0.973333
(70, 75]     1.231111
(75, 80]     1.318000
(80, 85]     1.180000
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_over_2.5_365, dtype: float64

In [131]:
fdf_over_365['payout_over_2.5_365'].mean()

0.9448462929475587

#### Closing Odds

In [251]:
fdf_over_bet365_closing_gb_count = fdf_over_365_closing.groupby('binned >2.5 365 closing')['payout_over_2.5_365_closing'].count()
fdf_over_bet365_closing_gb_count

binned >2.5 365 closing
(0, 5]         0
(5, 10]        0
(10, 15]       0
(15, 20]       0
(20, 25]       0
(25, 30]       0
(30, 35]       0
(35, 40]       6
(40, 45]      53
(45, 50]     236
(50, 55]     224
(55, 60]     279
(60, 65]     202
(65, 70]      81
(70, 75]      15
(75, 80]       8
(80, 85]       2
(85, 90]       0
(90, 95]       0
(95, 100]      0
Name: payout_over_2.5_365_closing, dtype: int64

In [252]:
fdf_over_bet365_closing_gb_mean = fdf_over_365_closing.groupby('binned >2.5 365 closing')['payout_over_2.5_365_closing'].mean()
fdf_over_bet365_closing_gb_mean

binned >2.5 365 closing
(0, 5]            NaN
(5, 10]           NaN
(10, 15]          NaN
(15, 20]          NaN
(20, 25]          NaN
(25, 30]          NaN
(30, 35]          NaN
(35, 40]     0.833333
(40, 45]     1.083962
(45, 50]     0.975636
(50, 55]     0.931518
(55, 60]     0.854229
(60, 65]     0.942079
(65, 70]     1.107407
(70, 75]     0.920000
(75, 80]     1.136250
(80, 85]     1.210000
(85, 90]          NaN
(90, 95]          NaN
(95, 100]         NaN
Name: payout_over_2.5_365_closing, dtype: float64

In [256]:
fdf_over_365_closing['payout_over_2.5_365_closing'].mean()

0.9448462929475587