In [2]:
import pandas as pd
import math
import numpy as np
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, confusion_matrix

## Read Full Dataset

In [3]:
# Load data
DATA_PATH = '../data/full_data_season.csv'
data_full = pd.read_csv(DATA_PATH, index_col=0)
data_full

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,home_xG,home_xGA,home_npxG,home_npxGA,home_deep,...,FTHG-2,FTAG-2,home_bool-2,FTHG-1,FTAG-1,home_bool-1,result,result-3,result-2,result-1
0,2014-08-29,Getafe,Almeria,1.0,0.0,0.673965,1.917210,0.673965,1.917210,4.0,...,,,,,,,w,,,
1,2014-08-29,Valencia,Malaga,3.0,0.0,1.749030,1.171970,1.749030,1.171970,4.0,...,,,,,,,w,,,
2,2014-08-30,Athletic Club,Levante,3.0,0.0,1.141510,1.321070,1.141510,0.438073,5.0,...,,,,,,,w,,,
3,2014-08-30,Atletico Madrid,Eibar,2.0,1.0,0.901586,0.357337,0.901586,0.357337,3.0,...,,,,,,,w,,,
4,2014-08-30,Cordoba,Celta Vigo,1.0,1.0,0.378410,0.612645,0.378410,0.612645,4.0,...,,,,,,,d,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13205,2021-12-22,Troyes,Brest,1.0,1.0,16.707601,26.101496,16.707601,24.581299,74.0,...,,,,,,,d,,,
13206,2022-01-07,Bordeaux,Marseille,0.0,1.0,23.760819,35.108390,23.000729,31.307914,89.0,...,0.0,0.0,1.0,2.0,2.0,0.0,l,w,d,d
13207,2022-01-08,Lens,Rennes,1.0,0.0,32.300567,23.155727,30.020267,21.635537,96.0,...,0.0,0.0,1.0,1.0,1.0,0.0,w,l,d,d
13208,2022-01-09,Brest,Nice,0.0,3.0,24.123663,26.411888,20.323186,25.651793,96.0,...,3.0,2.0,0.0,2.0,1.0,0.0,l,w,w,w


## Preprocessing

### Drop columns, create dummy variables (one-hot encoding)

In [5]:
## Preprocessing

data = data_full.copy()
# # Drop redundant columns:
# data.drop(columns=['Date','HomeTeam','AwayTeam'], inplace=True)
data.drop(columns=['FTHG','FTAG'], inplace=True)
data.drop(columns=['FTHG-3','FTAG-3','home_bool-3','result-3','FTHG-2','FTAG-2','home_bool-2','result-2'], inplace=True)
data.drop(columns=['FTHG-1','FTAG-1'], inplace=True)

# Drop NAs:
data.dropna(inplace=True)

# one-hot encoding
data = pd.concat([data, pd.get_dummies(data["result-1"], prefix='result-1')], axis = 1)
data = pd.concat([data, pd.get_dummies(data["result"], prefix='result')], axis = 1)
data


Unnamed: 0,Date,HomeTeam,AwayTeam,home_xG,home_xGA,home_npxG,home_npxGA,home_deep,home_deep_allowed,home_xpts,...,away_oppda_ratio,home_bool-1,result,result-1,result-1_d,result-1_l,result-1_w,result_d,result_l,result_w
179,2015-01-24,Atletico Madrid,Rayo Vallecano,31.522310,13.963810,27.805928,12.477254,98.0,46.0,39.2793,...,9.525176,0.0,w,d,1,0,0,0,0,1
180,2015-01-24,Cordoba,Real Madrid,13.869439,21.627751,13.126162,18.655141,43.0,106.0,20.6576,...,13.669672,0.0,l,w,0,0,1,0,1,0
181,2015-01-24,Elche,Barcelona,18.600180,29.337184,15.627195,24.877526,44.0,116.0,19.3153,...,16.976409,0.0,l,w,0,0,1,0,1,0
182,2015-01-24,Real Sociedad,Eibar,18.132464,24.919037,17.389191,23.432481,72.0,73.0,20.5190,...,7.575753,0.0,w,w,0,0,1,0,0,1
183,2015-01-24,Villarreal,Levante,32.543107,16.684369,31.799827,15.941093,124.0,71.0,35.9120,...,8.002077,0.0,w,l,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13204,2021-12-22,Saint-Etienne,Nantes,18.414077,33.056830,16.133782,30.776541,68.0,101.0,16.4375,...,10.459699,1.0,l,d,1,0,0,0,1,0
13206,2022-01-07,Bordeaux,Marseille,23.760819,35.108390,23.000729,31.307914,89.0,117.0,18.4353,...,19.863733,0.0,l,d,1,0,0,0,1,0
13207,2022-01-08,Lens,Rennes,32.300567,23.155727,30.020267,21.635537,96.0,104.0,31.8740,...,17.886594,0.0,w,d,1,0,0,0,0,1
13208,2022-01-09,Brest,Nice,24.123663,26.411888,20.323186,25.651793,96.0,116.0,25.8753,...,16.767501,0.0,l,w,0,0,1,0,1,0


In [6]:
train_set, test_set = train_test_split(data, test_size=0.2, shuffle = False, stratify = None)  #20% test data, no randomness (matches from Feb 2021 onwards are used for testing)
print ('Train set:', train_set.shape)
print ('Test set:', test_set.shape)

Train set: (9080, 40)
Test set: (2271, 40)


### Prepare feature variables (x)

In [7]:
# Create x and y variables, Normalize Dataset

train_set_x = train_set.copy()
train_set_x.drop(columns=['result','result-1','result_l','result_d','result_w'], inplace=True)
train_set_x.drop(columns=['Date','HomeTeam','AwayTeam'], inplace=True)
train_set_x
test_set_x = test_set.copy()
test_set_x.drop(columns=['result','result-1','result_l','result_d','result_w'], inplace=True)
test_set_x.drop(columns=['Date','HomeTeam','AwayTeam'], inplace=True)
test_set_x

Unnamed: 0,home_xG,home_xGA,home_npxG,home_npxGA,home_deep,home_deep_allowed,home_xpts,home_wins,home_draws,home_loses,...,away_draws,away_loses,away_pts,away_npxGD,away_ppda_ratio,away_oppda_ratio,home_bool-1,result-1_d,result-1_l,result-1_w
10763,22.922667,30.153819,18.333154,27.180705,69.0,97.0,24.7698,4.0,8.0,10.0,...,8.0,10.0,20.0,-10.418225,10.582213,8.977892,0.0,0,1,0
10764,48.166869,23.836323,44.450489,22.349892,272.0,91.0,42.7712,13.0,4.0,4.0,...,7.0,10.0,22.0,-9.663945,11.956043,8.633144,0.0,1,0,0
10765,19.984789,25.565793,17.755213,22.592808,64.0,89.0,26.4346,6.0,6.0,10.0,...,8.0,5.0,35.0,15.067627,8.129563,13.390928,0.0,0,0,1
10766,39.728278,23.087432,38.241718,17.884612,184.0,79.0,41.0805,14.0,4.0,4.0,...,9.0,8.0,24.0,-9.524665,10.415837,8.469576,0.0,0,0,1
10767,27.759832,27.760120,24.786726,26.273577,148.0,132.0,28.4508,6.0,9.0,6.0,...,7.0,10.0,22.0,-2.878551,11.449910,8.877896,0.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13204,18.414077,33.056830,16.133782,30.776541,68.0,101.0,16.4375,2.0,6.0,10.0,...,4.0,7.0,25.0,-1.681447,15.413029,10.459699,1.0,1,0,0
13206,23.760819,35.108390,23.000729,31.307914,89.0,117.0,18.4353,3.0,8.0,8.0,...,6.0,3.0,33.0,3.523299,12.572019,19.863733,0.0,1,0,0
13207,32.300567,23.155727,30.020267,21.635537,96.0,104.0,31.8740,7.0,6.0,6.0,...,4.0,6.0,31.0,13.068737,10.130094,17.886594,0.0,1,0,0
13208,24.123663,26.411888,20.323186,25.651793,96.0,116.0,25.8753,6.0,7.0,6.0,...,4.0,5.0,34.0,8.390046,12.496909,16.767501,0.0,0,0,1


### Normalize data, create X and y variables 

In [8]:
# Train set:
X_train = preprocessing.StandardScaler().fit(train_set_x).transform(train_set_x)
y_loss_train = np.asarray(train_set['result_l'])
y_draw_train = np.asarray(train_set['result_d'])
y_win_train = np.asarray(train_set['result_w'])

# Test set:
X_test = preprocessing.StandardScaler().fit(test_set_x).transform(test_set_x)
y_loss_test = np.asarray(test_set['result_l'])
y_draw_test = np.asarray(test_set['result_d'])
y_win_test = np.asarray(test_set['result_w'])


## Logistic Regression

In [9]:
## Loss
LR_loss = LogisticRegression(C=0.01, solver='liblinear').fit(X_train,y_loss_train)
y_loss_test_prob = LR_loss.predict_proba(X_test)
y_loss_test_pred = LR_loss.predict(X_test)
print("LOSS:")
print (classification_report(y_loss_test, y_loss_test_pred))

## Draw
LR_draw = LogisticRegression(C=0.01, solver='liblinear').fit(X_train,y_draw_train)
y_draw_test_prob = LR_draw.predict_proba(X_test)
y_draw_test_pred = LR_draw.predict(X_test)
print("DRAW:")
print (classification_report(y_draw_test, y_draw_test_pred))

## Win
LR_win = LogisticRegression(C=0.01, solver='liblinear').fit(X_train,y_win_train)
y_win_test_prob = LR_win.predict_proba(X_test)
y_win_test_pred = LR_win.predict(X_test)
print("WIN:")
print (classification_report(y_win_test, y_win_test_pred))

LOSS:
              precision    recall  f1-score   support

           0       0.71      0.94      0.81      1512
           1       0.65      0.23      0.34       759

    accuracy                           0.70      2271
   macro avg       0.68      0.59      0.58      2271
weighted avg       0.69      0.70      0.65      2271

DRAW:
              precision    recall  f1-score   support

           0       0.74      1.00      0.85      1676
           1       0.00      0.00      0.00       595

    accuracy                           0.74      2271
   macro avg       0.37      0.50      0.42      2271
weighted avg       0.54      0.74      0.63      2271

WIN:
              precision    recall  f1-score   support

           0       0.71      0.74      0.72      1354
           1       0.59      0.56      0.57       917

    accuracy                           0.66      2271
   macro avg       0.65      0.65      0.65      2271
weighted avg       0.66      0.66      0.66      2271



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
# join probability and prediction column
test_set["prob_draw"] = [x[1] for x in y_draw_test_prob]
test_set["prob_loss"] = [x[1] for x in y_loss_test_prob]
test_set["prob_win"] = [x[1] for x in y_win_test_prob]
test_set["pred_draw"] = y_draw_test_pred
test_set["pred_win"] = y_win_test_pred
test_set["pred_loss"] = y_loss_test_pred
test_set

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_set["prob_draw"] = [x[1] for x in y_draw_test_prob]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_set["prob_loss"] = [x[1] for x in y_loss_test_prob]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_set["prob_win"] = [x[1] for x in y_win_test_prob]
A value is trying to be set on a cop

Unnamed: 0,Date,HomeTeam,AwayTeam,home_xG,home_xGA,home_npxG,home_npxGA,home_deep,home_deep_allowed,home_xpts,...,result-1_w,result_d,result_l,result_w,prob_draw,prob_loss,prob_win,pred_draw,pred_win,pred_loss
10763,2021-02-13,Eibar,Real Valladolid,22.922667,30.153819,18.333154,27.180705,69.0,97.0,24.7698,...,0,1,0,0,0.286526,0.270237,0.475887,0,0,0
10764,2021-02-13,Barcelona,Alaves,48.166869,23.836323,44.450489,22.349892,272.0,91.0,42.7712,...,0,0,0,1,0.215466,0.065929,0.804566,0,1,0
10765,2021-02-14,Getafe,Real Sociedad,19.984789,25.565793,17.755213,22.592808,64.0,89.0,26.4346,...,1,0,1,0,0.287182,0.522249,0.237174,0,0,1
10766,2021-02-14,Real Madrid,Valencia,39.728278,23.087432,38.241718,17.884612,184.0,79.0,41.0805,...,1,0,0,1,0.236061,0.081890,0.763710,0,1,0
10767,2021-02-14,Levante,Osasuna,27.759832,27.760120,24.786726,26.273577,148.0,132.0,28.4508,...,0,0,1,0,0.260542,0.220132,0.536450,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13204,2021-12-22,Saint-Etienne,Nantes,18.414077,33.056830,16.133782,30.776541,68.0,101.0,16.4375,...,0,0,1,0,0.306005,0.326029,0.385143,0,0,0
13206,2022-01-07,Bordeaux,Marseille,23.760819,35.108390,23.000729,31.307914,89.0,117.0,18.4353,...,0,0,1,0,0.255367,0.467437,0.294684,0,0,0
13207,2022-01-08,Lens,Rennes,32.300567,23.155727,30.020267,21.635537,96.0,104.0,31.8740,...,0,0,0,1,0.227886,0.366826,0.371509,0,0,0
13208,2022-01-09,Brest,Nice,24.123663,26.411888,20.323186,25.651793,96.0,116.0,25.8753,...,1,0,1,0,0.279464,0.482824,0.249875,0,0,0


## Compare with Odds


In [11]:
# Read odds CSV
DATA_ODDS_PATH = '../data/full_data_odds.csv'
data_odds = pd.read_csv(DATA_ODDS_PATH, index_col=0)

In [12]:
# Add date column and drop reduntant field
data_odds['date'] = [start_date[0:10] for start_date in data_odds['start_date']]
data_odds.drop(columns=['Unnamed: 0.1'], inplace=True)
data_odds

Unnamed: 0,id,back_decimal,start_date,updated,bet_with_bookie_name,bet_slug,market_slug,bookie_code,best_back_decimal,home_team,away_team,home_team_id,away_team_id,result,date
0,bh/football/england-premier-league/arsenal-v-m...,4.60,2022-01-01T13:30:00Z,2021-12-24T16:31:23Z,Draw (Bet At Home),draw,win-market,bh,4.60,arsenal,manchester-city,Arsenal,Manchester City,draw,2022-01-01
1,bh/football/england-premier-league/arsenal-v-m...,5.80,2022-01-01T13:30:00Z,2021-12-24T16:31:23Z,Arsenal (Bet At Home),arsenal,win-market,bh,5.80,arsenal,manchester-city,Arsenal,Manchester City,win,2022-01-01
2,bh/football/england-premier-league/arsenal-v-m...,1.47,2022-01-01T13:30:00Z,2021-12-24T16:31:23Z,Manchester City (Bet At Home),manchester-city,win-market,bh,1.47,arsenal,manchester-city,Arsenal,Manchester City,loss,2022-01-01
3,bh/football/england-premier-league/arsenal-v-m...,4.50,2022-01-01T13:30:00Z,2021-12-26T03:44:24Z,Draw (Bet At Home),draw,win-market,bh,4.50,arsenal,manchester-city,Arsenal,Manchester City,draw,2022-01-01
4,bh/football/england-premier-league/arsenal-v-m...,5.60,2022-01-01T13:30:00Z,2021-12-26T03:44:24Z,Arsenal (Bet At Home),arsenal,win-market,bh,5.60,arsenal,manchester-city,Arsenal,Manchester City,win,2022-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20294,vc/football/esp-la-liga-primera/villarreal-v-m...,9.00,2022-01-22T15:15:00Z,2022-01-22T10:27:29Z,Mallorca (Betvictor),mallorca,win-market,vc,9.00,villareal,mallorca,Villarreal,Mallorca,loss,2022-01-22
20295,vc/football/esp-la-liga-primera/villarreal-v-m...,1.36,2022-01-22T15:15:00Z,2022-01-22T10:27:29Z,Villareal (Betvictor),villareal,win-market,vc,1.42,villareal,mallorca,Villarreal,Mallorca,win,2022-01-22
20296,nv/soccer/laliga-spain/villarreal-v-mallorca/f...,4.60,2022-01-22T15:15:00Z,2022-01-22T10:27:29Z,Draw (Novibet),draw,win-market,nv,5.25,villareal,mallorca,Villarreal,Mallorca,draw,2022-01-22
20297,nv/soccer/laliga-spain/villarreal-v-mallorca/f...,8.80,2022-01-22T15:15:00Z,2022-01-22T10:27:29Z,Mallorca (Novibet),mallorca,win-market,nv,9.00,villareal,mallorca,Villarreal,Mallorca,loss,2022-01-22


In [13]:
# Find avg odds and best odds for each match
best_odds = data_odds.groupby(["home_team_id","away_team_id","date","result"]).mean().reset_index()
best_odds

Unnamed: 0,home_team_id,away_team_id,date,result,back_decimal,best_back_decimal
0,AC Milan,Juventus,2022-01-23,draw,3.167308,3.320440
1,AC Milan,Juventus,2022-01-23,loss,2.699861,2.825093
2,AC Milan,Juventus,2022-01-23,win,2.664868,2.777982
3,AC Milan,Napoli,2021-12-19,draw,3.402298,3.597282
4,AC Milan,Napoli,2021-12-19,loss,2.978791,3.129951
...,...,...,...,...,...,...
1731,Wolverhampton Wanderers,Southampton,2022-01-15,loss,3.393902,3.537764
1732,Wolverhampton Wanderers,Southampton,2022-01-15,win,2.216872,2.285450
1733,Wolverhampton Wanderers,Watford,2021-12-26,draw,3.473226,3.673226
1734,Wolverhampton Wanderers,Watford,2021-12-26,loss,4.203911,4.532673


In [14]:
test_set_comparison = test_set.copy()
test_set_comparison['odds_min_draw'] = [1]/test_set_comparison['prob_draw']
test_set_comparison['odds_min_win'] = [1]/test_set_comparison['prob_win']
test_set_comparison['odds_min_loss'] = [1]/test_set_comparison['prob_loss']
test_set_comparison

Unnamed: 0,Date,HomeTeam,AwayTeam,home_xG,home_xGA,home_npxG,home_npxGA,home_deep,home_deep_allowed,home_xpts,...,result_w,prob_draw,prob_loss,prob_win,pred_draw,pred_win,pred_loss,odds_min_draw,odds_min_win,odds_min_loss
10763,2021-02-13,Eibar,Real Valladolid,22.922667,30.153819,18.333154,27.180705,69.0,97.0,24.7698,...,0,0.286526,0.270237,0.475887,0,0,0,3.490080,2.101340,3.700458
10764,2021-02-13,Barcelona,Alaves,48.166869,23.836323,44.450489,22.349892,272.0,91.0,42.7712,...,1,0.215466,0.065929,0.804566,0,1,0,4.641108,1.242906,15.167921
10765,2021-02-14,Getafe,Real Sociedad,19.984789,25.565793,17.755213,22.592808,64.0,89.0,26.4346,...,0,0.287182,0.522249,0.237174,0,0,1,3.482108,4.216321,1.914797
10766,2021-02-14,Real Madrid,Valencia,39.728278,23.087432,38.241718,17.884612,184.0,79.0,41.0805,...,1,0.236061,0.081890,0.763710,0,1,0,4.236202,1.309397,12.211535
10767,2021-02-14,Levante,Osasuna,27.759832,27.760120,24.786726,26.273577,148.0,132.0,28.4508,...,0,0.260542,0.220132,0.536450,0,1,0,3.838152,1.864105,4.542735
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13204,2021-12-22,Saint-Etienne,Nantes,18.414077,33.056830,16.133782,30.776541,68.0,101.0,16.4375,...,0,0.306005,0.326029,0.385143,0,0,0,3.267918,2.596441,3.067216
13206,2022-01-07,Bordeaux,Marseille,23.760819,35.108390,23.000729,31.307914,89.0,117.0,18.4353,...,0,0.255367,0.467437,0.294684,0,0,0,3.915937,3.393463,2.139327
13207,2022-01-08,Lens,Rennes,32.300567,23.155727,30.020267,21.635537,96.0,104.0,31.8740,...,1,0.227886,0.366826,0.371509,0,0,0,4.388152,2.691727,2.726089
13208,2022-01-09,Brest,Nice,24.123663,26.411888,20.323186,25.651793,96.0,116.0,25.8753,...,0,0.279464,0.482824,0.249875,0,0,0,3.578278,4.002004,2.071150


In [15]:
# Join odds to test train set
test_set_comparison = pd.merge(test_set_comparison, best_odds, left_on=['HomeTeam','AwayTeam','Date'], right_on=['home_team_id','away_team_id','date'], how = 'inner')

In [28]:
print("DRAW")
test_set_comparison_draw = test_set_comparison[
    (test_set_comparison['result_y']=='draw') 
    & (test_set_comparison['odds_min_draw']<test_set_comparison['best_back_decimal'])
]
test_set_comparison_draw

DRAW


Unnamed: 0,Date,HomeTeam,AwayTeam,home_xG,home_xGA,home_npxG,home_npxGA,home_deep,home_deep_allowed,home_xpts,...,pred_loss,odds_min_draw,odds_min_win,odds_min_loss,home_team_id,away_team_id,date,result_y,back_decimal,best_back_decimal
6,2021-12-01,Real Madrid,Athletic Club,26.734788,16.352680,25.991638,15.609405,174.0,69.0,25.9563,...,0,4.044555,1.840445,4.781873,Real Madrid,Athletic Club,2021-12-01,draw,4.457708,4.693424
15,2021-12-04,Barcelona,Real Betis,26.212996,14.774037,22.496613,14.774037,116.0,62.0,26.4087,...,0,4.418758,1.933707,4.077135,Barcelona,Real Betis,2021-12-04,draw,4.594566,4.799422
18,2021-12-04,Atletico Madrid,Mallorca,23.078592,12.162365,22.335312,9.932531,106.0,56.0,27.0943,...,0,3.558856,1.541181,7.613091,Atletico Madrid,Mallorca,2021-12-04,draw,5.011532,5.364414
21,2021-12-04,Real Sociedad,Real Madrid,18.958675,14.047990,15.238861,12.561430,112.0,49.0,25.1511,...,0,3.567525,2.304689,3.776012,Real Sociedad,Real Madrid,2021-12-04,draw,3.414598,3.570172
24,2021-12-05,Rayo Vallecano,Espanyol,20.565311,21.696941,18.335491,16.494004,63.0,88.0,21.1361,...,0,3.319826,1.924121,4.709484,Rayo Vallecano,Espanyol,2021-12-05,draw,3.219709,3.321337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
666,2021-12-22,Marseille,Reims,24.278110,22.163333,23.518020,19.883046,101.0,81.0,25.7935,...,0,3.796740,1.620514,6.451128,Marseille,Reims,2021-12-22,draw,3.658835,3.833910
672,2021-12-22,Montpellier,Angers,21.660441,25.427261,20.900343,25.427261,80.0,116.0,23.6024,...,0,3.217677,2.567372,3.257024,Montpellier,Angers,2021-12-22,draw,3.310385,3.510979
678,2021-12-22,Saint-Etienne,Nantes,18.414077,33.056830,16.133782,30.776541,68.0,101.0,16.4375,...,0,3.267918,2.596441,3.067216,Saint-Etienne,Nantes,2021-12-22,draw,3.207518,3.433165
681,2022-01-07,Bordeaux,Marseille,23.760819,35.108390,23.000729,31.307914,89.0,117.0,18.4353,...,0,3.915937,3.393463,2.139327,Bordeaux,Marseille,2022-01-07,draw,3.877061,4.168286


In [32]:
# test_set_comparison_draw['best_back_decimal']
test_set_comparison_draw['result_y']

6      draw
15     draw
18     draw
21     draw
24     draw
       ... 
666    draw
672    draw
678    draw
681    draw
687    draw
Name: result_y, Length: 132, dtype: object

In [25]:
print("WIN")
test_set_comparison[
    (test_set_comparison['result_y']=='win') 
    & (test_set_comparison['odds_min_win']<test_set_comparison['best_back_decimal'])
]

WIN


Unnamed: 0,Date,HomeTeam,AwayTeam,home_xG,home_xGA,home_npxG,home_npxGA,home_deep,home_deep_allowed,home_xpts,...,pred_loss,odds_min_draw,odds_min_win,odds_min_loss,home_team_id,away_team_id,date,result_y,back_decimal,best_back_decimal
2,2021-11-28,Real Madrid,Sevilla,25.487188,15.051010,24.744038,14.307735,161.0,68.0,24.6538,...,0,4.102948,2.242918,3.528918,Real Madrid,Sevilla,2021-11-28,win,1.720000,2.950000
5,2021-11-29,Osasuna,Elche,14.916613,15.579794,11.943508,14.093241,77.0,84.0,18.9327,...,0,3.554763,1.843927,4.803230,Osasuna,Elche,2021-11-29,win,1.812473,1.869140
11,2021-12-03,Granada,Alaves,12.624837,23.523517,11.138277,21.293819,49.0,117.0,11.7041,...,0,3.466203,2.156996,3.600998,Granada,Alaves,2021-12-03,win,2.504123,2.578465
14,2021-12-04,Sevilla,Villarreal,21.749505,12.265271,21.006225,10.775291,83.0,49.0,26.0352,...,0,4.333572,1.686335,5.257086,Sevilla,Villarreal,2021-12-04,win,1.988144,2.029072
23,2021-12-04,Real Sociedad,Real Madrid,18.958675,14.047990,15.238861,12.561430,112.0,49.0,25.1511,...,0,3.567525,2.304689,3.776012,Real Sociedad,Real Madrid,2021-12-04,win,3.213523,3.327102
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
662,2021-12-22,Lorient,Paris Saint Germain,15.819841,26.995987,14.299655,23.195511,69.0,93.0,16.7727,...,1,3.785138,6.985266,1.469766,Lorient,Paris Saint Germain,2021-12-22,win,7.538959,8.390694
668,2021-12-22,Marseille,Reims,24.278110,22.163333,23.518020,19.883046,101.0,81.0,25.7935,...,0,3.796740,1.620514,6.451128,Marseille,Reims,2021-12-22,win,1.694949,1.752935
683,2022-01-07,Bordeaux,Marseille,23.760819,35.108390,23.000729,31.307914,89.0,117.0,18.4353,...,0,3.915937,3.393463,2.139327,Bordeaux,Marseille,2022-01-07,win,4.095930,4.433462
686,2022-01-08,Lens,Rennes,32.300567,23.155727,30.020267,21.635537,96.0,104.0,31.8740,...,0,4.388152,2.691727,2.726089,Lens,Rennes,2022-01-08,win,2.714065,2.823145


In [24]:
print("LOSS")
test_set_comparison[
    (test_set_comparison['result_y']=='loss') 
    & (test_set_comparison['odds_min_loss']<test_set_comparison['best_back_decimal'])
]


LOSS


Unnamed: 0,Date,HomeTeam,AwayTeam,home_xG,home_xGA,home_npxG,home_npxGA,home_deep,home_deep_allowed,home_xpts,...,pred_loss,odds_min_draw,odds_min_win,odds_min_loss,home_team_id,away_team_id,date,result_y,back_decimal,best_back_decimal
1,2021-11-28,Real Madrid,Sevilla,25.487188,15.051010,24.744038,14.307735,161.0,68.0,24.6538,...,0,4.102948,2.242918,3.528918,Real Madrid,Sevilla,2021-11-28,loss,4.600000,5.150000
4,2021-11-29,Osasuna,Elche,14.916613,15.579794,11.943508,14.093241,77.0,84.0,18.9327,...,0,3.554763,1.843927,4.803230,Osasuna,Elche,2021-11-29,loss,4.824468,5.132979
7,2021-12-01,Real Madrid,Athletic Club,26.734788,16.352680,25.991638,15.609405,174.0,69.0,25.9563,...,0,4.044555,1.840445,4.781873,Real Madrid,Athletic Club,2021-12-01,loss,6.819079,7.487829
16,2021-12-04,Barcelona,Real Betis,26.212996,14.774037,22.496613,14.774037,116.0,62.0,26.4087,...,0,4.418758,1.933707,4.077135,Barcelona,Real Betis,2021-12-04,loss,6.076923,6.459615
19,2021-12-04,Atletico Madrid,Mallorca,23.078592,12.162365,22.335312,9.932531,106.0,56.0,27.0943,...,0,3.558856,1.541181,7.613091,Atletico Madrid,Mallorca,2021-12-04,loss,9.931707,11.449187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664,2021-12-22,Lyon,Metz,27.613052,29.520880,24.572677,26.480560,116.0,101.0,22.4788,...,0,4.089921,1.491932,7.332566,Lyon,Metz,2021-12-22,loss,8.038797,9.149378
670,2021-12-22,Monaco,Rennes,26.226101,20.409400,22.425614,15.088752,139.0,50.0,29.5757,...,0,4.658997,2.427347,2.967343,Monaco,Rennes,2021-12-22,loss,3.143373,3.279076
673,2021-12-22,Montpellier,Angers,21.660441,25.427261,20.900343,25.427261,80.0,116.0,23.6024,...,0,3.217677,2.567372,3.257024,Montpellier,Angers,2021-12-22,loss,3.214825,3.384615
676,2021-12-22,Nice,Lens,32.719807,20.433332,28.159307,18.913135,86.0,102.0,32.2178,...,0,4.228831,2.399712,3.166797,Nice,Lens,2021-12-22,loss,3.368830,3.546226


In [58]:
test_set_comparison_win = test_set_comparison[
    (test_set_comparison['result_y']=='win') 
    & (test_set_comparison['odds_min_win']<test_set_comparison['best_back_decimal'])
]
x1 = test_set_comparison_win['best_back_decimal'].to_numpy()
x2 = test_set_comparison_win['result_w'].to_numpy()
y = x1 * x2
(sum(y) - len(y))/len(y)

-0.11915107333857947

In [57]:
test_set_comparison_draw = test_set_comparison[
    (test_set_comparison['result_y']=='draw') 
    & (test_set_comparison['odds_min_draw']<test_set_comparison['best_back_decimal'])
]
x1 = test_set_comparison_draw['best_back_decimal'].to_numpy()
x2 = test_set_comparison_draw['result_d'].to_numpy()
y = x1 * x2
(sum(y) - len(y))/len(y)

-0.04208718349624511

In [60]:
test_set_comparison_loss = test_set_comparison[
    (test_set_comparison['result_y']=='loss') 
    & (test_set_comparison['odds_min_loss']<test_set_comparison['best_back_decimal'])
]
x1 = test_set_comparison_loss['best_back_decimal'].to_numpy()
x2 = test_set_comparison_loss['result_l'].to_numpy()
y = x1 * x2
(sum(y) - len(y))/len(y)

0.16049614848774302