#**Reading match data into pandas dataframe**

In [None]:
import pandas as pd

In [None]:
matches = pd.read_csv("matches.csv", index_col=0)

# **Discovering missing data**

In [None]:
matches.head()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,match report,notes,sh,sot,dist,fk,pk,pkatt,season,team
1,2023-08-11,20:00,Premier League,Matchweek 1,Fri,Away,W,3.0,0.0,Burnley,...,Match Report,,17.0,8.0,13.9,0.0,0,0,2024,Manchester City
3,2023-08-19,20:00,Premier League,Matchweek 2,Sat,Home,W,1.0,0.0,Newcastle Utd,...,Match Report,,14.0,4.0,17.9,0.0,0,0,2024,Manchester City
4,2023-08-27,14:00,Premier League,Matchweek 3,Sun,Away,W,2.0,1.0,Sheffield Utd,...,Match Report,,29.0,9.0,17.3,2.0,0,1,2024,Manchester City
5,2023-09-02,15:00,Premier League,Matchweek 4,Sat,Home,W,5.0,1.0,Fulham,...,Match Report,,6.0,4.0,14.8,0.0,1,1,2024,Manchester City
6,2023-09-16,15:00,Premier League,Matchweek 5,Sat,Away,W,3.0,1.0,West Ham,...,Match Report,,29.0,13.0,16.4,1.0,0,0,2024,Manchester City


In [None]:
matches.shape

(1500, 27)

### **Amount of games each team has played**


> **Relegated and Promoted teams on 37-38 matches**



In [None]:
matches["team"].value_counts() #Looking for teams with unusual number of matches

team
Manchester City             75
Brighton and Hove Albion    75
Nottingham Forest           75
Brentford                   75
Everton                     75
Fulham                      75
Wolverhampton Wanderers     75
Arsenal                     75
Bournemouth                 75
Crystal Palace              75
West Ham United             75
Manchester United           75
Newcastle United            75
Chelsea                     75
Tottenham Hotspur           75
Aston Villa                 75
Liverpool                   75
Leicester City              38
Leeds United                38
Southampton                 38
Luton Town                  37
Burnley                     37
Sheffield United            37
Name: count, dtype: int64

### **Amount of teams in each game week**

> **Season 23/24 not over**



In [None]:
matches["round"].value_counts() #Looking for unusual game weeks

round
Matchweek 1     40
Matchweek 30    40
Matchweek 23    40
Matchweek 24    40
Matchweek 25    40
Matchweek 18    40
Matchweek 26    40
Matchweek 27    40
Matchweek 28    40
Matchweek 31    40
Matchweek 2     40
Matchweek 32    40
Matchweek 33    40
Matchweek 29    40
Matchweek 35    40
Matchweek 36    40
Matchweek 37    40
Matchweek 34    40
Matchweek 22    40
Matchweek 21    40
Matchweek 20    40
Matchweek 19    40
Matchweek 3     40
Matchweek 4     40
Matchweek 5     40
Matchweek 6     40
Matchweek 7     40
Matchweek 8     40
Matchweek 9     40
Matchweek 10    40
Matchweek 11    40
Matchweek 12    40
Matchweek 13    40
Matchweek 14    40
Matchweek 15    40
Matchweek 16    40
Matchweek 17    40
Matchweek 38    20
Name: count, dtype: int64

# **Cleaning our data for machine learning**

In [None]:
matches.dtypes #Machine Learning uses int and float

date             object
time             object
comp             object
round            object
day              object
venue            object
result           object
gf              float64
ga              float64
opponent         object
xg              float64
xga             float64
poss            float64
attendance      float64
captain          object
formation        object
referee          object
match report     object
notes           float64
sh              float64
sot             float64
dist            float64
fk              float64
pk                int64
pkatt             int64
season            int64
team             object
dtype: object

In [None]:
matches["date"] = pd.to_datetime(matches["date"])  #Change date column to datetime

In [None]:
matches.dtypes

date            datetime64[ns]
time                    object
comp                    object
round                   object
day                     object
venue                   object
result                  object
gf                     float64
ga                     float64
opponent                object
xg                     float64
xga                    float64
poss                   float64
attendance             float64
captain                 object
formation               object
referee                 object
match report            object
notes                  float64
sh                     float64
sot                    float64
dist                   float64
fk                     float64
pk                       int64
pkatt                    int64
season                   int64
team                    object
dtype: object

# **Creating predictors for machine learning**

In [None]:
matches["venue_code"] = matches["venue"].astype("category").cat.codes # Convert String -> category -> numbers

In [None]:
matches["opp_code"] = matches["opponent"].astype("category").cat.codes # Convert String -> category -> numbers

In [None]:
matches["hour"] = matches["time"].str.replace(":.+","", regex = True).astype("int") # Change time to only the hour

In [None]:
matches["day_code"] = matches ["date"].dt.dayofweek # Change day of week to numbers

In [None]:
matches["target"] = (matches["result"]=="W").astype("int") # W == 1 L/D == 0 Change W L D to numbers # Can use 0 1 2 future me

In [None]:
matches

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,fk,pk,pkatt,season,team,venue_code,opp_code,hour,day_code,target
1,2023-08-11,20:00,Premier League,Matchweek 1,Fri,Away,W,3.0,0.0,Burnley,...,0.0,0,0,2024,Manchester City,0,5,20,4,1
3,2023-08-19,20:00,Premier League,Matchweek 2,Sat,Home,W,1.0,0.0,Newcastle Utd,...,0.0,0,0,2024,Manchester City,1,16,20,5,1
4,2023-08-27,14:00,Premier League,Matchweek 3,Sun,Away,W,2.0,1.0,Sheffield Utd,...,2.0,0,1,2024,Manchester City,0,18,14,6,1
5,2023-09-02,15:00,Premier League,Matchweek 4,Sat,Home,W,5.0,1.0,Fulham,...,0.0,1,1,2024,Manchester City,1,9,15,5,1
6,2023-09-16,15:00,Premier League,Matchweek 5,Sat,Away,W,3.0,1.0,West Ham,...,1.0,0,0,2024,Manchester City,0,21,15,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42,2023-04-30,14:00,Premier League,Matchweek 34,Sun,Away,L,1.0,3.0,Newcastle Utd,...,0.0,0,0,2023,Southampton,0,16,14,6,0
43,2023-05-08,20:00,Premier League,Matchweek 35,Mon,Away,L,3.0,4.0,Nott'ham Forest,...,0.0,1,1,2023,Southampton,0,17,20,0,0
44,2023-05-13,15:00,Premier League,Matchweek 36,Sat,Home,L,0.0,2.0,Fulham,...,0.0,0,0,2023,Southampton,1,9,15,5,0
45,2023-05-21,14:00,Premier League,Matchweek 37,Sun,Away,L,1.0,3.0,Brighton,...,1.0,0,0,2023,Southampton,0,4,14,6,0


# **Creating initial machine learning model**

In [None]:
from sklearn.ensemble import RandomForestClassifier # Non Linear Model (Can recognize that numbers etc. opp code is just a value with no linear relationship)

In [None]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10,random_state=1) #Decision tree

In [None]:
train = matches[matches["date"] < '2024-01-01']

In [None]:
train

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,fk,pk,pkatt,season,team,venue_code,opp_code,hour,day_code,target
1,2023-08-11,20:00,Premier League,Matchweek 1,Fri,Away,W,3.0,0.0,Burnley,...,0.0,0,0,2024,Manchester City,0,5,20,4,1
3,2023-08-19,20:00,Premier League,Matchweek 2,Sat,Home,W,1.0,0.0,Newcastle Utd,...,0.0,0,0,2024,Manchester City,1,16,20,5,1
4,2023-08-27,14:00,Premier League,Matchweek 3,Sun,Away,W,2.0,1.0,Sheffield Utd,...,2.0,0,1,2024,Manchester City,0,18,14,6,1
5,2023-09-02,15:00,Premier League,Matchweek 4,Sat,Home,W,5.0,1.0,Fulham,...,0.0,1,1,2024,Manchester City,1,9,15,5,1
6,2023-09-16,15:00,Premier League,Matchweek 5,Sat,Away,W,3.0,1.0,West Ham,...,1.0,0,0,2024,Manchester City,0,21,15,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42,2023-04-30,14:00,Premier League,Matchweek 34,Sun,Away,L,1.0,3.0,Newcastle Utd,...,0.0,0,0,2023,Southampton,0,16,14,6,0
43,2023-05-08,20:00,Premier League,Matchweek 35,Mon,Away,L,3.0,4.0,Nott'ham Forest,...,0.0,1,1,2023,Southampton,0,17,20,0,0
44,2023-05-13,15:00,Premier League,Matchweek 36,Sat,Home,L,0.0,2.0,Fulham,...,0.0,0,0,2023,Southampton,1,9,15,5,0
45,2023-05-21,14:00,Premier League,Matchweek 37,Sun,Away,L,1.0,3.0,Brighton,...,1.0,0,0,2023,Southampton,0,4,14,6,0


In [None]:
test = matches[matches["date"] > '2024-01-01']

In [None]:
predictors = ["venue_code" , "opp_code" , "hour" , "day_code"]

In [None]:
rf.fit(train[predictors], train["target"])

In [None]:
preds = rf.predict(test[predictors])

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
acc = accuracy_score(test["target"],preds)

In [None]:
acc

0.7630057803468208

In [None]:
combined = pd.DataFrame(dict(actual=test["target"],prediction=preds))

In [None]:
pd.crosstab(index=combined["actual"], columns=combined["prediction"])

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,191,28
1,54,73


In [None]:
from sklearn.metrics import precision_score

In [None]:
precision_score(test["target"],preds)

0.7227722772277227

# **Improving precision with rolling averages**

In [None]:
grouped_matches = matches.groupby("team")

In [None]:
group = grouped_matches.get_group("Manchester City")

In [None]:
group

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,fk,pk,pkatt,season,team,venue_code,opp_code,hour,day_code,target
1,2023-08-11,20:00,Premier League,Matchweek 1,Fri,Away,W,3.0,0.0,Burnley,...,0.0,0,0,2024,Manchester City,0,5,20,4,1
3,2023-08-19,20:00,Premier League,Matchweek 2,Sat,Home,W,1.0,0.0,Newcastle Utd,...,0.0,0,0,2024,Manchester City,1,16,20,5,1
4,2023-08-27,14:00,Premier League,Matchweek 3,Sun,Away,W,2.0,1.0,Sheffield Utd,...,2.0,0,1,2024,Manchester City,0,18,14,6,1
5,2023-09-02,15:00,Premier League,Matchweek 4,Sat,Home,W,5.0,1.0,Fulham,...,0.0,1,1,2024,Manchester City,1,9,15,5,1
6,2023-09-16,15:00,Premier League,Matchweek 5,Sat,Away,W,3.0,1.0,West Ham,...,1.0,0,0,2024,Manchester City,0,21,15,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52,2023-05-06,15:00,Premier League,Matchweek 35,Sat,Home,W,2.0,1.0,Leeds United,...,2.0,0,1,2023,Manchester City,1,10,15,5,1
54,2023-05-14,14:00,Premier League,Matchweek 36,Sun,Away,W,3.0,0.0,Everton,...,2.0,0,0,2023,Manchester City,0,8,14,6,1
56,2023-05-21,16:00,Premier League,Matchweek 37,Sun,Home,W,1.0,0.0,Chelsea,...,0.0,0,0,2023,Manchester City,1,6,16,6,1
57,2023-05-24,20:00,Premier League,Matchweek 32,Wed,Away,D,1.0,1.0,Brighton,...,0.0,0,0,2023,Manchester City,0,4,20,2,0


In [None]:
def rolling_averages(group , cols , new_cols):
    group = group.sort_values("date")
    rolling_stats = group[cols].rolling(3, closed='left').mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    return group

In [None]:
cols = ["gf", "ga", "sh", "sot", "dist", "fk", "pk", "pkatt"]
new_cols = [f"{c}_rolling" for c in cols]

In [None]:
new_cols

['gf_rolling',
 'ga_rolling',
 'sh_rolling',
 'sot_rolling',
 'dist_rolling',
 'fk_rolling',
 'pk_rolling',
 'pkatt_rolling']

In [None]:
rolling_averages( group , cols , new_cols)

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,day_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
4,2022-08-27,15:00,Premier League,Matchweek 4,Sat,Home,W,4.0,2.0,Crystal Palace,...,5,1,3.000000,1.000000,17.666667,6.000000,17.466667,0.666667,0.333333,0.333333
5,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,6.0,0.0,Nott'ham Forest,...,2,1,3.666667,1.666667,19.333333,7.333333,15.933333,0.333333,0.000000,0.000000
6,2022-09-03,17:30,Premier League,Matchweek 6,Sat,Away,D,1.0,1.0,Aston Villa,...,5,0,4.333333,1.666667,18.666667,8.000000,15.033333,0.333333,0.000000,0.000000
9,2022-09-17,12:30,Premier League,Matchweek 8,Sat,Away,W,3.0,0.0,Wolves,...,5,1,3.666667,1.000000,16.000000,6.000000,15.233333,0.333333,0.000000,0.000000
10,2022-10-02,14:00,Premier League,Matchweek 9,Sun,Home,W,6.0,3.0,Manchester Utd,...,6,1,3.333333,0.333333,15.333333,6.666667,17.000000,0.333333,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50,2024-04-25,20:00,Premier League,Matchweek 29,Thu,Away,W,4.0,0.0,Brighton,...,3,1,4.333333,1.333333,26.333333,10.333333,16.300000,0.666667,0.333333,0.333333
51,2024-04-28,16:30,Premier League,Matchweek 35,Sun,Away,W,2.0,0.0,Nott'ham Forest,...,6,1,4.333333,1.000000,22.666667,8.666667,16.666667,0.333333,0.333333,0.333333
52,2024-05-04,17:30,Premier League,Matchweek 36,Sat,Home,W,5.0,1.0,Wolves,...,5,1,3.666667,0.333333,20.333333,7.666667,17.533333,0.666667,0.333333,0.333333
53,2024-05-11,12:30,Premier League,Matchweek 37,Sat,Away,W,4.0,0.0,Fulham,...,5,1,3.666667,0.333333,14.333333,7.000000,17.000000,0.666667,0.666667,0.666667


In [None]:
matches_rolling = matches.groupby("team").apply(lambda x: rolling_averages(x, cols, new_cols))

In [None]:
matches_rolling

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,day_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
3,2022-08-27,17:30,Premier League,Matchweek 4,Sat,Home,W,2.0,1.0,Fulham,...,5,1,3.000000,0.666667,14.333333,5.000000,14.133333,0.333333,0.000000,0.000000
4,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,2.0,1.0,Aston Villa,...,2,1,3.000000,1.000000,18.333333,7.000000,14.433333,0.333333,0.000000,0.000000
5,2022-09-04,16:30,Premier League,Matchweek 6,Sun,Away,L,1.0,3.0,Manchester Utd,...,6,0,2.333333,0.666667,19.333333,7.333333,15.533333,0.666667,0.000000,0.000000
7,2022-09-18,12:00,Premier League,Matchweek 8,Sun,Away,W,3.0,0.0,Brentford,...,6,1,1.666667,1.666667,20.000000,6.333333,16.800000,1.000000,0.000000,0.000000
8,2022-10-01,12:30,Premier League,Matchweek 9,Sat,Home,W,3.0,1.0,Tottenham,...,5,1,2.000000,1.333333,17.000000,6.000000,17.700000,0.666667,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,2024-04-20,19:30,Premier League,Matchweek 34,Sat,Home,L,0.0,2.0,Arsenal,...,5,0,1.333333,1.666667,10.666667,3.666667,17.966667,0.333333,0.333333,0.333333
40,2024-04-24,19:45,Premier League,Matchweek 29,Wed,Home,L,0.0,1.0,Bournemouth,...,2,0,1.000000,2.000000,9.666667,4.000000,20.533333,0.333333,0.333333,0.333333
41,2024-04-27,15:00,Premier League,Matchweek 35,Sat,Home,W,2.0,1.0,Luton Town,...,5,1,0.666667,1.666667,10.333333,3.333333,18.766667,0.000000,0.000000,0.000000
42,2024-05-04,17:30,Premier League,Matchweek 36,Sat,Away,L,1.0,5.0,Manchester City,...,5,0,0.666667,1.333333,11.000000,4.000000,19.666667,0.000000,0.000000,0.000000


In [None]:
matches_rolling = matches_rolling.droplevel('team')

In [None]:
matches_rolling

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,day_code,target,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling
0,2022-08-27,17:30,Premier League,Matchweek 4,Sat,Home,W,2.0,1.0,Fulham,...,5,1,3.000000,0.666667,14.333333,5.000000,14.133333,0.333333,0.000000,0.000000
1,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,2.0,1.0,Aston Villa,...,2,1,3.000000,1.000000,18.333333,7.000000,14.433333,0.333333,0.000000,0.000000
2,2022-09-04,16:30,Premier League,Matchweek 6,Sun,Away,L,1.0,3.0,Manchester Utd,...,6,0,2.333333,0.666667,19.333333,7.333333,15.533333,0.666667,0.000000,0.000000
3,2022-09-18,12:00,Premier League,Matchweek 8,Sun,Away,W,3.0,0.0,Brentford,...,6,1,1.666667,1.666667,20.000000,6.333333,16.800000,1.000000,0.000000,0.000000
4,2022-10-01,12:30,Premier League,Matchweek 9,Sat,Home,W,3.0,1.0,Tottenham,...,5,1,2.000000,1.333333,17.000000,6.000000,17.700000,0.666667,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1426,2024-04-20,19:30,Premier League,Matchweek 34,Sat,Home,L,0.0,2.0,Arsenal,...,5,0,1.333333,1.666667,10.666667,3.666667,17.966667,0.333333,0.333333,0.333333
1427,2024-04-24,19:45,Premier League,Matchweek 29,Wed,Home,L,0.0,1.0,Bournemouth,...,2,0,1.000000,2.000000,9.666667,4.000000,20.533333,0.333333,0.333333,0.333333
1428,2024-04-27,15:00,Premier League,Matchweek 35,Sat,Home,W,2.0,1.0,Luton Town,...,5,1,0.666667,1.666667,10.333333,3.333333,18.766667,0.000000,0.000000,0.000000
1429,2024-05-04,17:30,Premier League,Matchweek 36,Sat,Away,L,1.0,5.0,Manchester City,...,5,0,0.666667,1.333333,11.000000,4.000000,19.666667,0.000000,0.000000,0.000000


In [None]:
matches_rolling.index = range(matches_rolling.shape[0])

In [None]:
def make_predictions(data, predictors):
    train = data[data["date"] < '2024-01-01']
    test = data[data["date"] > '2024-01-01']
    rf.fit(train[predictors], train["target"])
    preds = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test["target"], predicted=preds), index=test.index)
    error = precision_score(test["target"], preds)
    return combined, error

In [None]:
combined , precision = make_predictions(matches_rolling, predictors + new_cols)

In [None]:
precision

0.5344827586206896

In [None]:
combined

Unnamed: 0,actual,predicted
55,1,1
56,1,1
57,1,1
58,1,0
59,1,0
...,...,...
1426,0,0
1427,0,0
1428,1,0
1429,0,0


In [None]:
combined = combined.merge(matches_rolling[["date", "team", "opponent", "result"]], left_index=True, right_index=True)

In [None]:
combined

Unnamed: 0,actual,predicted,date,team,opponent,result
55,1,1,2024-01-20,Arsenal,Crystal Palace,W
56,1,1,2024-01-30,Arsenal,Nott'ham Forest,W
57,1,1,2024-02-04,Arsenal,Liverpool,W
58,1,0,2024-02-11,Arsenal,West Ham,W
59,1,0,2024-02-17,Arsenal,Burnley,W
...,...,...,...,...,...,...
1426,0,0,2024-04-20,Wolverhampton Wanderers,Arsenal,L
1427,0,0,2024-04-24,Wolverhampton Wanderers,Bournemouth,L
1428,1,0,2024-04-27,Wolverhampton Wanderers,Luton Town,W
1429,0,0,2024-05-04,Wolverhampton Wanderers,Manchester City,L


# **Combining home and away predictions**

In [None]:
class MissingDict(dict):
    __missing__ = lambda self, key: key

map_values = {"Brighton and Hove Albion": "Brighton", "Manchester United": "Manchester Utd", "Newcastle United": "Newcastle Utd", "Tottenham Hotspur": "Tottenham", "West Ham United": "West Ham", "Wolverhampton Wanderers": "Wolves"}
mapping = MissingDict(**map_values)

In [None]:
combined["new_team"] = combined["team"].map(mapping)

In [None]:
combined

Unnamed: 0,actual,predicted,date,team,opponent,result,new_team
55,1,1,2024-01-20,Arsenal,Crystal Palace,W,Arsenal
56,1,1,2024-01-30,Arsenal,Nott'ham Forest,W,Arsenal
57,1,1,2024-02-04,Arsenal,Liverpool,W,Arsenal
58,1,0,2024-02-11,Arsenal,West Ham,W,Arsenal
59,1,0,2024-02-17,Arsenal,Burnley,W,Arsenal
...,...,...,...,...,...,...,...
1426,0,0,2024-04-20,Wolverhampton Wanderers,Arsenal,L,Wolves
1427,0,0,2024-04-24,Wolverhampton Wanderers,Bournemouth,L,Wolves
1428,1,0,2024-04-27,Wolverhampton Wanderers,Luton Town,W,Wolves
1429,0,0,2024-05-04,Wolverhampton Wanderers,Manchester City,L,Wolves


In [None]:
merged = combined.merge(combined, left_on=["date", "new_team"], right_on=["date", "opponent"])

In [None]:
merged

Unnamed: 0,actual_x,predicted_x,date,team_x,opponent_x,result_x,new_team_x,actual_y,predicted_y,team_y,opponent_y,result_y,new_team_y
0,1,1,2024-01-20,Arsenal,Crystal Palace,W,Arsenal,0,0,Crystal Palace,Arsenal,L,Crystal Palace
1,1,1,2024-01-30,Arsenal,Nott'ham Forest,W,Arsenal,0,0,Nottingham Forest,Arsenal,L,Nottingham Forest
2,1,1,2024-02-04,Arsenal,Liverpool,W,Arsenal,0,0,Liverpool,Arsenal,L,Liverpool
3,1,0,2024-02-11,Arsenal,West Ham,W,Arsenal,0,0,West Ham United,Arsenal,L,West Ham
4,1,0,2024-02-17,Arsenal,Burnley,W,Arsenal,0,0,Burnley,Arsenal,L,Burnley
...,...,...,...,...,...,...,...,...,...,...,...,...,...
307,0,0,2024-04-20,Wolverhampton Wanderers,Arsenal,L,Wolves,1,0,Arsenal,Wolves,W,Arsenal
308,0,0,2024-04-24,Wolverhampton Wanderers,Bournemouth,L,Wolves,1,0,Bournemouth,Wolves,W,Bournemouth
309,1,0,2024-04-27,Wolverhampton Wanderers,Luton Town,W,Wolves,0,0,Luton Town,Wolves,L,Luton Town
310,0,0,2024-05-04,Wolverhampton Wanderers,Manchester City,L,Wolves,1,1,Manchester City,Wolves,W,Manchester City


In [None]:
merged[(merged["predicted_x"] == 1) & (merged["predicted_y"] ==0)]["actual_x"].value_counts()

actual_x
1    54
0    39
Name: count, dtype: int64