In [185]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score

In [186]:
def rolling_averages(group, cols, new_cols):
    group = group.sort_values('date')
    rolling_stats = group[cols].rolling(3, closed='left').mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols) #dropping NaN to avoid passing missing values
    return group

In [187]:
# Importing files
season_1 = pd.read_csv("datasets/matches_2017-2018.csv", index_col=0)
season_2 = pd.read_csv("datasets/matches_2018-2019.csv", index_col=0)
season_3 = pd.read_csv("datasets/matches_2019-2020.csv", index_col=0)
season_4 = pd.read_csv("datasets/matches_2020-2021.csv", index_col=0)
season_5 = pd.read_csv("datasets/matches_2021-2022.csv", index_col=0)


In [188]:
# Making an array with all seasons
all_seasons = [season_1, season_2, season_3, season_4, season_5]

# Combining all the files into one dataframe with all 5 seasons
matches = pd.concat(all_seasons)

In [189]:
matches.head()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,match report,notes,sh,sot,dist,fk,pk,pkatt,season,team
0,2017-08-12,17:30,Premier League,Matchweek 1,Sat,Away,W,2.0,0.0,Brighton,...,Match Report,,14.0,4.0,19.4,2.0,0.0,0.0,2022,ManchesterCity
1,2017-08-21,20:00,Premier League,Matchweek 2,Mon,Home,D,1.0,1.0,Everton,...,Match Report,,20.0,6.0,18.9,1.0,0.0,0.0,2022,ManchesterCity
2,2017-08-26,12:30,Premier League,Matchweek 3,Sat,Away,W,2.0,1.0,Bournemouth,...,Match Report,,18.0,8.0,16.4,1.0,0.0,0.0,2022,ManchesterCity
3,2017-09-09,12:30,Premier League,Matchweek 4,Sat,Home,W,5.0,0.0,Liverpool,...,Match Report,,13.0,10.0,14.2,0.0,0.0,0.0,2022,ManchesterCity
5,2017-09-16,15:00,Premier League,Matchweek 5,Sat,Away,W,6.0,0.0,Watford,...,Match Report,,27.0,9.0,17.3,0.0,1.0,1.0,2022,ManchesterCity


In [190]:
# Removing irrelevant attributes comp= All are EPL games, notes=Unnecessary data

del matches['comp']
del matches['notes']

In [191]:
# Observing data types in the dataset
matches.dtypes

date             object
time             object
round            object
day              object
venue            object
result           object
gf              float64
ga              float64
opponent         object
xg              float64
xga             float64
poss            float64
attendance      float64
captain          object
formation        object
referee          object
match report     object
sh              float64
sot             float64
dist            float64
fk              float64
pk              float64
pkatt           float64
season            int64
team             object
dtype: object

Some of the attributes data types need to be change:

- Venue to category
- Opponent to category
- Time to to interger
- Date to date and extract day of week
- Result to interger

In [192]:
# Creating Predictors attributes

matches['date'] = pd.to_datetime(matches['date'])
matches["day_code"] = matches["date"].dt.dayofweek
matches['hour'] = matches['time'].str.replace(':.+', '', regex=True).astype('int')
matches['venue_code'] = matches['venue'].astype("category").cat.codes
matches['opp_code'] = matches['opponent'].astype('category').cat.codes
matches['target'] = (matches['result'] == 'W').astype('int')

matches.head()

Unnamed: 0,date,time,round,day,venue,result,gf,ga,opponent,xg,...,fk,pk,pkatt,season,team,day_code,hour,venue_code,opp_code,target
0,2017-08-12,17:30,Matchweek 1,Sat,Away,W,2.0,0.0,Brighton,1.8,...,2.0,0.0,0.0,2022,ManchesterCity,5,17,0,4,1
1,2017-08-21,20:00,Matchweek 2,Mon,Home,D,1.0,1.0,Everton,1.2,...,1.0,0.0,0.0,2022,ManchesterCity,0,20,1,9,0
2,2017-08-26,12:30,Matchweek 3,Sat,Away,W,2.0,1.0,Bournemouth,1.6,...,1.0,0.0,0.0,2022,ManchesterCity,5,12,0,2,1
3,2017-09-09,12:30,Matchweek 4,Sat,Home,W,5.0,0.0,Liverpool,2.7,...,0.0,0.0,0.0,2022,ManchesterCity,5,12,1,14,1
5,2017-09-16,15:00,Matchweek 5,Sat,Away,W,6.0,0.0,Watford,3.6,...,0.0,1.0,1.0,2022,ManchesterCity,5,15,0,24,1


In [193]:
matches['team'].value_counts()

ManchesterCity            189
LeicesterCity             189
Southampton               189
ManchesterUnited          189
WestHamUnited             189
CrystalPalace             189
NewcastleUnited           189
BrightonandHoveAlbion     189
Everton                   189
Arsenal                   189
Chelsea                   189
Liverpool                 189
TottenhamHotspur          189
Burnley                   189
Watford                   151
WolverhamptonWanderers    151
Bournemouth               114
AstonVilla                113
HuddersfieldTown           76
WestBromwichAlbion         76
Fulham                     76
SheffieldUnited            76
LeedsUnited                75
NorwichCity                75
StokeCity                  38
CardiffCity                38
SwanseaCity                38
Brentford                  37
Name: team, dtype: int64

Some Teams have not played all of the 5 seasons, each season contains a total of 38 games. In no particular order, here is how many seasons each team have played:

Played 1 Seasons
- Stoke City                  38
- Cardiff City                38
- Swansea City                38
- Brentford                  37

Played 2 Seasons
- Huddersfield Town           76
- West Bromwich Albion         76
- Fulham                     76
- Sheffield United            76
- Leeds United                75
- Norwich City                75

Played 3 Seasons
- Bournemouth               114
- Aston Villa                113

Played 4 Seasons
- Watford                   151
- Wolverhampton Wanderers    151

Played 5 Seasons

- Manchester City            189
- Leicester City             189
- Southampton               189
- Manchester United          189
- WestHam United             189
- Crystal Palace             189
- Newcastle United           189
- Brighton and Hove Albion     189
- Everton                   189
- Arsenal                   189
- Chelsea                   189
- Liverpool                 189
- Tottenham Hotspur          189
- Burnley                   189



## Preperaring Data for Predictions

Random Forest will be use as it pick up none lineal data

In [194]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

In [195]:
# Any data before 2022
train = matches[matches["date"] < '2022-01-01']

In [196]:
# Data in 2022
test = matches[matches["date"] > '2022-01-01']

In [197]:
# Setting predictors to be use
predictors = ['venue_code', 'opp_code', 'hour', 'day_code']

In [198]:
# Setting the ML model
rf.fit(train[predictors], train['target'])

RandomForestClassifier(min_samples_split=10, n_estimators=50, random_state=1)

In [199]:
# Making predictions
preds = rf.predict(test[predictors])

In [200]:
# Determining accuracy of the model
error = accuracy_score(test['target'], preds)

In [201]:
error

0.5896739130434783

In [202]:
# Seeing where accuracy is high or low
combined = pd.DataFrame(dict(actual=test['target'], prediction=preds))

In [203]:
# Creating a cross tab // try to use crossvalidation

pd.crosstab(index=combined['actual'], columns=combined['prediction'])

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,172,51
1,100,45


In [204]:
# What percentage of time the team actually win
precision_score(test['target'], preds)

0.46875

# Improving model accuracy using rolling averages

In [205]:
# dividing dataset into groups

grouped_matches = matches.groupby('team') # creates a datafram for every team in the data

In [206]:
group = grouped_matches.get_group('ManchesterCity').sort_values('date')

In [207]:
group

Unnamed: 0,date,time,round,day,venue,result,gf,ga,opponent,xg,...,fk,pk,pkatt,season,team,day_code,hour,venue_code,opp_code,target
0,2017-08-12,17:30,Matchweek 1,Sat,Away,W,2.0,0.0,Brighton,1.8,...,2.0,0.0,0.0,2022,ManchesterCity,5,17,0,4,1
1,2017-08-21,20:00,Matchweek 2,Mon,Home,D,1.0,1.0,Everton,1.2,...,1.0,0.0,0.0,2022,ManchesterCity,0,20,1,9,0
2,2017-08-26,12:30,Matchweek 3,Sat,Away,W,2.0,1.0,Bournemouth,1.6,...,1.0,0.0,0.0,2022,ManchesterCity,5,12,0,2,1
3,2017-09-09,12:30,Matchweek 4,Sat,Home,W,5.0,0.0,Liverpool,2.7,...,0.0,0.0,0.0,2022,ManchesterCity,5,12,1,14,1
5,2017-09-16,15:00,Matchweek 5,Sat,Away,W,6.0,0.0,Watford,3.6,...,0.0,1.0,1.0,2022,ManchesterCity,5,15,0,24,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50,2022-04-23,15:00,Matchweek 34,Sat,Home,W,5.0,1.0,Watford,3.0,...,0.0,1.0,1.0,2019,ManchesterCity,5,15,1,24,1
52,2022-04-30,17:30,Matchweek 35,Sat,Away,W,4.0,0.0,Leeds United,2.7,...,0.0,0.0,0.0,2019,ManchesterCity,5,17,0,12,1
54,2022-05-08,16:30,Matchweek 36,Sun,Home,W,5.0,0.0,Newcastle Utd,3.1,...,1.0,0.0,0.0,2019,ManchesterCity,6,16,1,17,1
55,2022-05-11,20:15,Matchweek 33,Wed,Away,W,5.0,1.0,Wolves,3.1,...,0.0,0.0,0.0,2019,ManchesterCity,2,20,0,27,1


In [208]:
cols = ['gf', 'ga', 'sh', 'sot', 'dist', 'fk', 'pk', 'pkatt']
new_cols = [f"{c}_rolling" for c in cols]

In [209]:
new_cols

['gf_rolling',
 'ga_rolling',
 'sh_rolling',
 'sot_rolling',
 'dist_rolling',
 'fk_rolling',
 'pk_rolling',
 'pkatt_rolling']

In [210]:
rolling_averages(group, cols, new_cols)

Unnamed: 0,date,time,round,day,venue,result,gf,ga,opponent,xg,...,opp_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
3,2017-09-09,12:30,Matchweek 4,Sat,Home,W,5.0,0.0,Liverpool,2.7,...,14,1,1.666667,0.666667,17.333333,6.000000,18.233333,1.333333,0.000000,0.000000
5,2017-09-16,15:00,Matchweek 5,Sat,Away,W,6.0,0.0,Watford,3.6,...,24,1,2.666667,0.666667,17.000000,8.000000,16.500000,0.666667,0.000000,0.000000
7,2017-09-23,15:00,Matchweek 6,Sat,Home,W,5.0,0.0,Crystal Palace,4.4,...,8,1,4.333333,0.333333,19.333333,9.000000,15.966667,0.333333,0.333333,0.333333
9,2017-09-30,17:30,Matchweek 7,Sat,Away,W,1.0,0.0,Chelsea,1.1,...,7,1,5.333333,0.000000,21.666667,9.333333,14.866667,0.000000,0.333333,0.333333
10,2017-10-14,15:00,Matchweek 8,Sat,Home,W,7.0,2.0,Stoke City,3.9,...,21,1,4.000000,0.000000,23.000000,7.666667,15.866667,0.333333,0.333333,0.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50,2022-04-23,15:00,Matchweek 34,Sat,Home,W,5.0,1.0,Watford,3.0,...,24,1,2.333333,0.666667,15.333333,5.000000,17.200000,0.333333,0.000000,0.000000
52,2022-04-30,17:30,Matchweek 35,Sat,Away,W,4.0,0.0,Leeds United,2.7,...,12,1,3.333333,1.000000,16.000000,5.333333,16.700000,0.333333,0.333333,0.333333
54,2022-05-08,16:30,Matchweek 36,Sun,Home,W,5.0,0.0,Newcastle Utd,3.1,...,17,1,4.000000,0.333333,18.666667,6.000000,16.333333,0.000000,0.333333,0.333333
55,2022-05-11,20:15,Matchweek 33,Wed,Away,W,5.0,1.0,Wolves,3.1,...,27,1,4.666667,0.333333,20.000000,7.333333,15.166667,0.333333,0.333333,0.333333


In [211]:
matches_rolling = matches.groupby('team').apply(lambda x: rolling_averages(x, cols, new_cols))

In [212]:
matches_rolling

Unnamed: 0_level_0,Unnamed: 1_level_0,date,time,round,day,venue,result,gf,ga,opponent,xg,...,opp_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Arsenal,4,2017-09-09,15:00,Matchweek 4,Sat,Home,W,3.0,0.0,Bournemouth,2.0,...,2,1,1.333333,2.666667,18.000000,5.666667,17.600000,0.000000,0.000000,0.000000
Arsenal,6,2017-09-17,13:30,Matchweek 5,Sun,Away,D,0.0,0.0,Chelsea,1.4,...,7,0,1.000000,1.666667,14.666667,5.333333,16.233333,0.333333,0.000000,0.000000
Arsenal,8,2017-09-25,20:00,Matchweek 6,Mon,Home,W,2.0,0.0,West Brom,2.4,...,25,1,1.000000,1.333333,12.000000,3.666667,16.033333,0.333333,0.000000,0.000000
Arsenal,10,2017-10-01,12:00,Matchweek 7,Sun,Home,W,2.0,0.0,Brighton,3.0,...,4,1,1.666667,0.000000,14.333333,5.333333,16.800000,1.333333,0.333333,0.333333
Arsenal,11,2017-10-14,17:30,Matchweek 8,Sat,Away,L,1.0,2.0,Watford,1.1,...,24,0,1.333333,0.000000,17.333333,5.000000,17.833333,1.666667,0.333333,0.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WolverhamptonWanderers,36,2022-04-24,14:00,Matchweek 34,Sun,Away,L,0.0,1.0,Burnley,0.7,...,5,0,1.333333,1.666667,10.000000,4.666667,17.633333,0.000000,0.000000,0.000000
WolverhamptonWanderers,37,2022-04-30,15:00,Matchweek 35,Sat,Home,L,0.0,3.0,Brighton,0.6,...,4,0,0.666667,1.000000,8.666667,3.333333,16.966667,0.000000,0.000000,0.000000
WolverhamptonWanderers,38,2022-05-07,15:00,Matchweek 36,Sat,Away,D,2.0,2.0,Chelsea,2.1,...,7,0,0.000000,1.666667,8.666667,2.333333,18.233333,0.333333,0.000000,0.000000
WolverhamptonWanderers,39,2022-05-11,20:15,Matchweek 33,Wed,Home,L,1.0,5.0,Manchester City,0.5,...,15,0,0.666667,2.000000,11.666667,3.000000,17.133333,0.333333,0.000000,0.000000


Each team was added to the side of the dataframe as an extra index level but is not needed

In [213]:
matches_rolling = matches_rolling.droplevel('team')

In [214]:
matches_rolling

Unnamed: 0,date,time,round,day,venue,result,gf,ga,opponent,xg,...,opp_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
4,2017-09-09,15:00,Matchweek 4,Sat,Home,W,3.0,0.0,Bournemouth,2.0,...,2,1,1.333333,2.666667,18.000000,5.666667,17.600000,0.000000,0.000000,0.000000
6,2017-09-17,13:30,Matchweek 5,Sun,Away,D,0.0,0.0,Chelsea,1.4,...,7,0,1.000000,1.666667,14.666667,5.333333,16.233333,0.333333,0.000000,0.000000
8,2017-09-25,20:00,Matchweek 6,Mon,Home,W,2.0,0.0,West Brom,2.4,...,25,1,1.000000,1.333333,12.000000,3.666667,16.033333,0.333333,0.000000,0.000000
10,2017-10-01,12:00,Matchweek 7,Sun,Home,W,2.0,0.0,Brighton,3.0,...,4,1,1.666667,0.000000,14.333333,5.333333,16.800000,1.333333,0.333333,0.333333
11,2017-10-14,17:30,Matchweek 8,Sat,Away,L,1.0,2.0,Watford,1.1,...,24,0,1.333333,0.000000,17.333333,5.000000,17.833333,1.666667,0.333333,0.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36,2022-04-24,14:00,Matchweek 34,Sun,Away,L,0.0,1.0,Burnley,0.7,...,5,0,1.333333,1.666667,10.000000,4.666667,17.633333,0.000000,0.000000,0.000000
37,2022-04-30,15:00,Matchweek 35,Sat,Home,L,0.0,3.0,Brighton,0.6,...,4,0,0.666667,1.000000,8.666667,3.333333,16.966667,0.000000,0.000000,0.000000
38,2022-05-07,15:00,Matchweek 36,Sat,Away,D,2.0,2.0,Chelsea,2.1,...,7,0,0.000000,1.666667,8.666667,2.333333,18.233333,0.333333,0.000000,0.000000
39,2022-05-11,20:15,Matchweek 33,Wed,Home,L,1.0,5.0,Manchester City,0.5,...,15,0,0.666667,2.000000,11.666667,3.000000,17.133333,0.333333,0.000000,0.000000


In [215]:
# reseting index to display all indivual values and not as repeated values
matches_rolling.index = range(matches_rolling.shape[0])

In [216]:
matches_rolling

Unnamed: 0,date,time,round,day,venue,result,gf,ga,opponent,xg,...,opp_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
0,2017-09-09,15:00,Matchweek 4,Sat,Home,W,3.0,0.0,Bournemouth,2.0,...,2,1,1.333333,2.666667,18.000000,5.666667,17.600000,0.000000,0.000000,0.000000
1,2017-09-17,13:30,Matchweek 5,Sun,Away,D,0.0,0.0,Chelsea,1.4,...,7,0,1.000000,1.666667,14.666667,5.333333,16.233333,0.333333,0.000000,0.000000
2,2017-09-25,20:00,Matchweek 6,Mon,Home,W,2.0,0.0,West Brom,2.4,...,25,1,1.000000,1.333333,12.000000,3.666667,16.033333,0.333333,0.000000,0.000000
3,2017-10-01,12:00,Matchweek 7,Sun,Home,W,2.0,0.0,Brighton,3.0,...,4,1,1.666667,0.000000,14.333333,5.333333,16.800000,1.333333,0.333333,0.333333
4,2017-10-14,17:30,Matchweek 8,Sat,Away,L,1.0,2.0,Watford,1.1,...,24,0,1.333333,0.000000,17.333333,5.000000,17.833333,1.666667,0.333333,0.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3682,2022-04-24,14:00,Matchweek 34,Sun,Away,L,0.0,1.0,Burnley,0.7,...,5,0,1.333333,1.666667,10.000000,4.666667,17.633333,0.000000,0.000000,0.000000
3683,2022-04-30,15:00,Matchweek 35,Sat,Home,L,0.0,3.0,Brighton,0.6,...,4,0,0.666667,1.000000,8.666667,3.333333,16.966667,0.000000,0.000000,0.000000
3684,2022-05-07,15:00,Matchweek 36,Sat,Away,D,2.0,2.0,Chelsea,2.1,...,7,0,0.000000,1.666667,8.666667,2.333333,18.233333,0.333333,0.000000,0.000000
3685,2022-05-11,20:15,Matchweek 33,Wed,Home,L,1.0,5.0,Manchester City,0.5,...,15,0,0.666667,2.000000,11.666667,3.000000,17.133333,0.333333,0.000000,0.000000


# Retraining model using new set of predictors

In [217]:
def make_predictions(data, predictor):
    train = data[data['date'] < '2022-01-01']
    test = data[data['date'] > '2022-01-01']
    rf.fit(train[predictors], train['target'])
    preds = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test['target'], predicted=preds), index=test.index)
    precision = precision_score(test['target'], preds)
    return combined, precision

In [218]:
combined, error = make_predictions(matches_rolling, predictors + new_cols)

In [226]:
error

0.4742268041237113

In [219]:
#adding Team info to cross validation

combined = combined.merge(matches_rolling[['date', 'team', 'opponent', 'result']], left_index=True, right_index=True)

In [220]:
combined.head(10)

Unnamed: 0,actual,predicted,date,team,opponent,result
169,0,0,2022-01-23,Arsenal,Burnley,D
170,1,0,2022-02-10,Arsenal,Wolves,W
171,1,0,2022-02-19,Arsenal,Brentford,W
172,1,0,2022-02-24,Arsenal,Wolves,W
173,1,0,2022-03-06,Arsenal,Watford,W
174,1,1,2022-03-13,Arsenal,Leicester City,W
175,0,0,2022-03-16,Arsenal,Liverpool,L
176,1,0,2022-03-19,Arsenal,Aston Villa,W
177,0,1,2022-04-04,Arsenal,Crystal Palace,L
178,0,1,2022-04-09,Arsenal,Brighton,L


# Combining Home and Away predictions

In [221]:
class MissingDict(dict):
    __missing__ = lambda self, key: key

map_values = {
    "Brighton and Hove Albion": "Brighton",
    "Manchester United": "Manchester Utd",
    "Newcastle United": "Newcastle Utd",
    "Tottenham Hotspur": "Tottenham",
    "West Ham United": "West Ham",
    "Wolverhampton Wanderers": "Wolves"
} 
mapping = MissingDict(**map_values)

In [222]:
combined['new_team'] = combined['team'].map(mapping)

In [223]:
merged = combined.merge(combined, left_on=['date', 'new_team'], right_on=['date', 'opponent'])

In [227]:
merged.head(10)

Unnamed: 0,actual_x,predicted_x,date,team_x,opponent_x,result_x,new_team_x,actual_y,predicted_y,team_y,opponent_y,result_y,new_team_y
0,0,0,2022-01-23,Arsenal,Burnley,D,Arsenal,0,0,Burnley,Arsenal,D,Burnley
1,1,0,2022-02-10,Arsenal,Wolves,W,Arsenal,0,0,WolverhamptonWanderers,Arsenal,L,WolverhamptonWanderers
2,1,0,2022-02-19,Arsenal,Brentford,W,Arsenal,0,0,Brentford,Arsenal,L,Brentford
3,1,0,2022-02-24,Arsenal,Wolves,W,Arsenal,0,0,WolverhamptonWanderers,Arsenal,L,WolverhamptonWanderers
4,1,0,2022-03-06,Arsenal,Watford,W,Arsenal,0,0,Watford,Arsenal,L,Watford
5,1,1,2022-03-13,Arsenal,Leicester City,W,Arsenal,0,0,LeicesterCity,Arsenal,L,LeicesterCity
6,0,0,2022-03-16,Arsenal,Liverpool,L,Arsenal,1,0,Liverpool,Arsenal,W,Liverpool
7,1,0,2022-03-19,Arsenal,Aston Villa,W,Arsenal,0,0,AstonVilla,Arsenal,L,AstonVilla
8,0,1,2022-04-04,Arsenal,Crystal Palace,L,Arsenal,1,0,CrystalPalace,Arsenal,W,CrystalPalace
9,0,1,2022-04-09,Arsenal,Brighton,L,Arsenal,1,0,BrightonandHoveAlbion,Arsenal,W,BrightonandHoveAlbion


In [225]:
merged[(merged['predicted_x'] == 1) & (merged['predicted_y'] == 0)]["actual_x"].value_counts()

0    13
1    10
Name: actual_x, dtype: int64