# Could Brazil have won the World Cup?

In this project, we'll determine if Brazil could have won the 2022 FIFA World Cup in Qatar.

**Project Steps**
- Clean data and prepare it for machine learning using pandas
- Make predictions on the outcome of Brazil's matches using scikit-learn
- Measure error and improve our predictions
- Make predictions using future matches

Data by FBref

### Predicting the Outcome of Matches

In [1]:
import pandas as pd

In [2]:
#Calling our two data files

matches = pd.read_table('brazil_matches.csv', sep = ',' )
future_matches = pd.read_table('brazil_future_matches.csv', sep = ',' )
matches.head(10)

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,GD,KTPM,Opponent,Attendance,Captain,Formation,Referee,Coach
0,2020,10/9/20,21:30 (02:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,5,0,5,,BOL,,Casemiro,4/3/03,Leodán González,Tite
1,2020,10/13/20,19:00 (02:00),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,4,2,2,,PER,,Thiago Silva,4/3/03,Julio Bascuñán,Tite
2,2020,11/13/20,21:30 (01:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,1,0,1,,VEN,,Thiago Silva,4/3/03,Juan Benítez,Tite
3,2020,11/17/20,20:00 (00:00),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,2,0,2,,URU,,Thiago Silva,4/3/03,Roberto Tobar,Tite
4,2021,6/4/21,21:30 (02:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,2,0,2,,ECU,,Casemiro,4/3/03,Alexis Herrera,Tite
5,2021,6/8/21,20:30 (02:30),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,2,0,2,,PAR,,Marquinhos,4-2-2-2,Patricio Loustau,Tite
6,2021,9/2/21,21:00 (03:00),WCQ,WCQ — CONMEBOL (M),Thu,Away,W,1,0,1,,CHI,,Casemiro,4/4/02,Diego Haro,Tite
7,2021,9/5/21,16:00 (21:00),WCQ,WCQ — CONMEBOL (M),Sun,Home,D,0,0,0,,ARG,,Casemiro,4-2-2-2,Jesús Valenzuela,Tite
8,2021,9/9/21,21:30 (02:30),WCQ,WCQ — CONMEBOL (M),Thu,Home,W,2,0,2,,PER,,Casemiro,4/4/02,Wilmar Roldán,Tite
9,2021,10/7/21,19:30 (01:30),WCQ,WCQ — CONMEBOL (M),Thu,Away,W,3,1,2,,VEN,,Thiago Silva,4/4/02,Kevin Ortega,Tite


In [3]:
future_matches

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,GD,KTPM,Opponent,Attendance,Captain,Formation,Referee,Coach
0,2022,,,FWC,Semi-finals,,Neutral,,,,,,ARG,,,,,Tite
1,2022,,,FWC,Final,,Neutral,,,,,,FRA,,,,,Tite
2,2023,,,FRL,Friendly,,Away,,,,,,MAR,,,,,New


In [4]:
#Checking dtype
matches.dtypes

Year           int64
Date          object
Time          object
Comp          object
Round         object
Day           object
Venue         object
Result        object
GF             int64
GA             int64
GD             int64
KTPM          object
Opponent      object
Attendance    object
Captain       object
Formation     object
Referee       object
Coach         object
dtype: object

In [5]:
#Reviewing our Opponent Sample
count_rival = pd.DataFrame(pd.value_counts(matches.Opponent))
count_rival.head()

Unnamed: 0,Opponent
PER,10
ARG,9
COL,8
VEN,7
URU,6


In [6]:
#Reviewing our Number of Matches Sample
count_matches = pd.DataFrame(pd.value_counts(matches.Year))
count_matches.head()

Unnamed: 0,Year
2021,27
2019,16
2018,14
2022,13
2016,8


In [7]:
#Converting Opponent, Year, and Comp as a Catergory Code
matches['opp_code'] = matches['Opponent'].astype('category').cat.codes
matches['year_code'] = matches['Year'].astype('category').cat.codes
matches['kind_code'] = matches['Comp'].astype('category').cat.codes
matches['target'] = (matches['Result'] == 'W').astype('int')

In [8]:
matches.head(5)

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,...,Opponent,Attendance,Captain,Formation,Referee,Coach,opp_code,year_code,kind_code,target
0,2020,10/9/20,21:30 (02:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,5,0,...,BOL,,Casemiro,4/3/03,Leodán González,Tite,3,5,3,1
1,2020,10/13/20,19:00 (02:00),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,4,2,...,PER,,Thiago Silva,4/3/03,Julio Bascuñán,Tite,21,5,3,1
2,2020,11/13/20,21:30 (01:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,1,0,...,VEN,,Thiago Silva,4/3/03,Juan Benítez,Tite,31,5,3,1
3,2020,11/17/20,20:00 (00:00),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,2,0,...,URU,,Thiago Silva,4/3/03,Roberto Tobar,Tite,29,5,3,1
4,2021,6/4/21,21:30 (02:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,2,0,...,ECU,,Casemiro,4/3/03,Alexis Herrera,Tite,10,6,3,1


In [9]:
from sklearn.ensemble import RandomForestClassifier

In [10]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

In [11]:
#Identifying our Training and Test Data
train = matches[matches['Year'] < 2021]
test = matches[matches['Year'] >= 2022]
predictors = ['kind_code','year_code','opp_code']

In [12]:
#Fitting the Training Data into the Classifier
rf.fit(train[predictors], train['target'])

RandomForestClassifier(min_samples_split=10, n_estimators=50, random_state=1)

In [13]:
#Defining preds
preds = rf.predict(test[predictors])

In [14]:
from sklearn.metrics import accuracy_score

In [15]:
#Checking our accuracy_score
accuracy_score(test['target'], preds)

0.7692307692307693

In [16]:
#Combing the actual and predicted outcomes into a df
combined = pd.DataFrame(dict(actual = test['target'], prediction = preds))
pd. crosstab(index=combined['actual'], columns = combined['prediction'])

prediction,1
actual,Unnamed: 1_level_1
0,3
1,10


In [17]:
from sklearn.metrics import precision_score

In [18]:
#Checking our precision_score
precision_score(test['target'], preds)

0.7692307692307693

In [19]:
#Grouping Matches
grouped_matches = matches.groupby('Coach')
group = grouped_matches.get_group('Tite').sort_values('Date')

In [20]:
#Defining our rolling averages for GF, GA, and GD
def rolling_averages(group, cols, new_cols):
  group = group.sort_values('Date')
  rolling_stats = group[cols].rolling(1, closed='left').mean()
  group[new_cols] = rolling_stats
  group = group.dropna(subset=new_cols)
  return group
  
cols = ['GF','GA','GD']
new_cols = [f'{c}_rolling' for c in cols]

rolling_averages(group, cols, new_cols).head()

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,...,Formation,Referee,Coach,opp_code,year_code,kind_code,target,GF_rolling,GA_rolling,GD_rolling
77,2017,10/10/17,20:30 (01:30),WCQ,WCQ — CONMEBOL (M),Tue,Home,W,3,0,...,,Juan Bravo,Tite,5,2,3,1,1.0,1.0,0.0
56,2019,10/10/19,20:00 (14:00),FRL,Friendlies (M),Thu,Home,D,1,1,...,4-2-2-2,Taqi Aljaafari Jahari,Tite,24,4,1,0,3.0,0.0,3.0
10,2021,10/10/21,16:00 (23:00),WCQ,WCQ — CONMEBOL (M),Sun,Away,D,0,0,...,4-2-3-1,Patricio Loustau,Tite,6,6,3,0,1.0,1.0,0.0
40,2021,10/10/21,16:00 (23:00),WCQ,WCQ — CONMEBOL (M),Sun,Away,D,0,0,...,4-2-3-1,Patricio Loustau,Tite,6,6,3,0,0.0,0.0,0.0
69,2016,10/11/16,20:30 (02:30),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,2,0,...,,Víctor Carrillo,Tite,31,1,3,1,0.0,0.0,0.0


In [21]:
#Grouping our rolling averages 
matches_rolling = matches.groupby('Opponent').apply(lambda x: rolling_averages(x, cols, new_cols))
matches_rolling = matches_rolling.droplevel('Opponent')
matches_rolling.index = range(matches_rolling.shape[0])
matches_rolling.head()

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,...,Formation,Referee,Coach,opp_code,year_code,kind_code,target,GF_rolling,GA_rolling,GD_rolling
0,2015,11/13/15,21:00 (01:00),WCQ,WCQ — CONMEBOL (M),Fri,Away,D,1,1,...,,Antonio Arias,Tite,0,0,3,0,3.0,0.0,3.0
1,2019,11/15/19,20:00 (18:00),FRL,Friendlies (M),Fri,Home,L,0,1,...,4-1-4-1,Matt Conger,Tite,0,4,1,0,1.0,1.0,0.0
2,2021,11/16/21,20:30 (00:30),WCQ,WCQ — CONMEBOL (M),Tue,Away,D,0,0,...,4-2-3-1,Andrés Cunha,Tite,0,6,3,0,0.0,1.0,-1.0
3,2021,11/16/21,20:30 (00:30),WCQ,WCQ — CONMEBOL (M),Tue,Away,D,0,0,...,4-2-3-1,Andrés Cunha,Tite,0,6,3,0,0.0,0.0,0.0
4,2021,7/10/21,21:00 (02:00),CA,Final,Sat,Neutral,L,0,1,...,4/3/03,Esteban Ostojich,Tite,0,6,0,0,0.0,0.0,0.0


In [22]:
def make_prediction (matches, predictors):
  train = matches[matches['Year'] < 2020]
  test = matches[matches['Year'] >= 2021]
  rf.fit(train[predictors],train['target'])
  preds = rf.predict(test[predictors])
  combined = pd.DataFrame(dict(actual = test['target'], prediction = preds), index=test.index)
  precision = precision_score(test['target'], preds)
  return combined, precision

In [23]:
combined, precision = make_prediction(matches_rolling, predictors + new_cols)

In [24]:
precision

0.6818181818181818

In [25]:
combined = combined.merge(matches_rolling[['Year','Date','GF','GA','Opponent','Coach','Result','Comp']], left_index=True, right_index=True)

combined.head()

Unnamed: 0,actual,prediction,Year,Date,GF,GA,Opponent,Coach,Result,Comp
2,0,0,2021,11/16/21,0,0,ARG,Tite,D,WCQ
3,0,1,2021,11/16/21,0,0,ARG,Tite,D,WCQ
4,0,1,2021,7/10/21,0,1,ARG,Tite,L,CA
6,0,1,2021,9/5/21,0,0,ARG,Tite,D,WCQ
7,0,1,2021,9/5/21,0,0,ARG,Tite,D,WCQ


### Predicting the Future Matches

In [26]:
future = future_matches
future.head()

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,GD,KTPM,Opponent,Attendance,Captain,Formation,Referee,Coach
0,2022,,,FWC,Semi-finals,,Neutral,,,,,,ARG,,,,,Tite
1,2022,,,FWC,Final,,Neutral,,,,,,FRA,,,,,Tite
2,2023,,,FRL,Friendly,,Away,,,,,,MAR,,,,,New


In [27]:
future['isFuture'] = True
matches['isFuture'] = False
matches_and_future = pd.concat([matches, future])
matches_and_future

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,...,Attendance,Captain,Formation,Referee,Coach,opp_code,year_code,kind_code,target,isFuture
0,2020,10/9/20,21:30 (02:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,5.0,0.0,...,,Casemiro,4/3/03,Leodán González,Tite,3.0,5.0,3.0,1.0,False
1,2020,10/13/20,19:00 (02:00),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,4.0,2.0,...,,Thiago Silva,4/3/03,Julio Bascuñán,Tite,21.0,5.0,3.0,1.0,False
2,2020,11/13/20,21:30 (01:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,1.0,0.0,...,,Thiago Silva,4/3/03,Juan Benítez,Tite,31.0,5.0,3.0,1.0,False
3,2020,11/17/20,20:00 (00:00),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,2.0,0.0,...,,Thiago Silva,4/3/03,Roberto Tobar,Tite,29.0,5.0,3.0,1.0,False
4,2021,6/4/21,21:30 (02:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,2.0,0.0,...,,Casemiro,4/3/03,Alexis Herrera,Tite,10.0,6.0,3.0,1.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,2018,11/16/18,20:00 (21:00),FRL,Friendlies (M),Fri,Home,W,1.0,0.0,...,,Neymar,4/3/03,Craig Pawson,Tite,29.0,3.0,1.0,1.0,False
91,2018,11/20/18,19:30 (20:30),FRL,Friendlies (M),Tue,Home,W,1.0,0.0,...,,Neymar,4-1-4-1,Michael Oliver,Tite,4.0,3.0,1.0,1.0,False
0,2022,,,FWC,Semi-finals,,Neutral,,,,...,,,,,Tite,,,,,True
1,2022,,,FWC,Final,,Neutral,,,,...,,,,,Tite,,,,,True


In [28]:
matches_and_future['opp_code'] = matches_and_future['Opponent'].astype('category').cat.codes
matches_and_future['year_code'] = matches_and_future['Year'].astype('category').cat.codes
matches_and_future['kind_code'] = matches_and_future['Comp'].astype('category').cat.codes
matches_and_future['target'] = (matches_and_future['Result'] == 'W').astype('int')

In [29]:
matches_and_future.dtypes

Year            int64
Date           object
Time           object
Comp           object
Round          object
Day            object
Venue          object
Result         object
GF            float64
GA            float64
GD            float64
KTPM           object
Opponent       object
Attendance     object
Captain        object
Formation      object
Referee        object
Coach          object
opp_code         int8
year_code        int8
kind_code        int8
target          int64
isFuture         bool
dtype: object

In [30]:
#Checking out DF
matches_and_future.head()

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,...,Attendance,Captain,Formation,Referee,Coach,opp_code,year_code,kind_code,target,isFuture
0,2020,10/9/20,21:30 (02:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,5.0,0.0,...,,Casemiro,4/3/03,Leodán González,Tite,3,5,3,1,False
1,2020,10/13/20,19:00 (02:00),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,4.0,2.0,...,,Thiago Silva,4/3/03,Julio Bascuñán,Tite,23,5,3,1,False
2,2020,11/13/20,21:30 (01:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,1.0,0.0,...,,Thiago Silva,4/3/03,Juan Benítez,Tite,33,5,3,1,False
3,2020,11/17/20,20:00 (00:00),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,2.0,0.0,...,,Thiago Silva,4/3/03,Roberto Tobar,Tite,31,5,3,1,False
4,2021,6/4/21,21:30 (02:30),WCQ,WCQ — CONMEBOL (M),Fri,Home,W,2.0,0.0,...,,Casemiro,4/3/03,Alexis Herrera,Tite,10,6,3,1,False


In [31]:
#Filtering to see only FWC matches
matches_and_future[(matches_and_future['Comp'] == 'FWC')]

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,...,Attendance,Captain,Formation,Referee,Coach,opp_code,year_code,kind_code,target,isFuture
22,2022,11/24/22,22:00 (20:00),FWC,Group stage,Thu,Neutral,W,2.0,0.0,...,88103.0,Thiago Silva,4-2-3-1,Alireza Faghani,Tite,28,7,2,1,False
23,2022,11/28/22,19:00 (17:00),FWC,Group stage,Mon,Neutral,W,1.0,0.0,...,43649.0,Thiago Silva,4/3/03,Iván Barton,Tite,29,7,2,1,False
24,2022,12/2/22,22:00 (20:00),FWC,Group stage,Fri,Neutral,L,0.0,1.0,...,85986.0,Dani Alves,4-2-3-1,Ismail Elfath,Tite,4,7,2,0,False
25,2022,12/5/22,22:00 (20:00),FWC,Round of 16,Mon,Neutral,W,4.0,1.0,...,43847.0,Thiago Silva,4-2-3-1,Clément Turpin,Tite,16,7,2,1,False
26,2022,12/9/22,18:00 (16:00),FWC,Quarter-finals,Fri,Neutral,D,1.0,1.0,...,43893.0,Thiago Silva,4-2-3-1,Michael Oliver,Tite,8,7,2,0,False
82,2018,6/17/18,21:00 (20:00),FWC,Group stage,Sun,Neutral,D,1.0,1.0,...,43109.0,Marcelo,4/3/03,César Ramos,Tite,29,3,2,0,False
83,2018,6/22/18,15:00 (14:00),FWC,Group stage,Fri,Neutral,W,2.0,0.0,...,64468.0,Thiago Silva,4/3/03,Björn Kuipers,Tite,7,3,2,1,False
84,2018,6/27/18,21:00 (20:00),FWC,Group stage,Wed,Neutral,W,2.0,0.0,...,44190.0,Miranda,4/3/03,Alireza Faghani,Tite,28,3,2,1,False
85,2018,7/2/18,18:00 (16:00),FWC,Round of 16,Mon,Neutral,W,2.0,0.0,...,41970.0,Thiago Silva,4/3/03,Gianluca Rocchi,Tite,19,3,2,1,False
86,2018,7/6/18,21:00 (20:00),FWC,Quarter-finals,Fri,Neutral,L,1.0,2.0,...,42873.0,Miranda,4/3/03,Milorad Mažić,Tite,2,3,2,0,False


In [32]:
from sklearn.ensemble import RandomForestClassifier

In [33]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

In [34]:
train = matches_and_future[matches_and_future['Year'] < 2021]
test = matches_and_future[matches_and_future['Year'] >= 2022]
predictors = ['kind_code','year_code','opp_code']

In [35]:
rf.fit(train[predictors],train['target'])

RandomForestClassifier(min_samples_split=10, n_estimators=50, random_state=1)

In [36]:
preds = rf.predict(test[predictors])

In [37]:
from sklearn.metrics import accuracy_score

In [38]:
accuracy_score(test['target'], preds)

0.625

In [39]:
combined = pd.DataFrame(dict(actual = test['target'], prediction = preds))

pd. crosstab(index=combined['actual'], columns = combined['prediction'])

prediction,1
actual,Unnamed: 1_level_1
0,6
1,10


In [40]:
from sklearn.metrics import precision_score

precision_score(test['target'], preds)

0.625

In [41]:
grouped_matches = matches_and_future.groupby('Coach')
group = grouped_matches.get_group('Tite').sort_values('Date')

In [42]:
def rolling_averages(group, cols, new_cols):
  group = group.sort_values('Date')
  rolling_stats = group[cols].rolling(1, closed='left').mean()
  group[new_cols] = rolling_stats
  group = group.dropna(subset=new_cols)
  return group
  
cols = ['opp_code','kind_code']
new_cols = [f'{c}_rolling' for c in cols]

In [43]:
rolling_averages(group, cols, new_cols).head()

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,...,Formation,Referee,Coach,opp_code,year_code,kind_code,target,isFuture,opp_code_rolling,kind_code_rolling
77,2017,10/10/17,20:30 (01:30),WCQ,WCQ — CONMEBOL (M),Tue,Home,W,3.0,0.0,...,,Juan Bravo,Tite,5,2,3,1,False,10.0,3.0
56,2019,10/10/19,20:00 (14:00),FRL,Friendlies (M),Thu,Home,D,1.0,1.0,...,4-2-2-2,Taqi Aljaafari Jahari,Tite,26,4,1,0,False,5.0,3.0
10,2021,10/10/21,16:00 (23:00),WCQ,WCQ — CONMEBOL (M),Sun,Away,D,0.0,0.0,...,4-2-3-1,Patricio Loustau,Tite,6,6,3,0,False,26.0,1.0
40,2021,10/10/21,16:00 (23:00),WCQ,WCQ — CONMEBOL (M),Sun,Away,D,0.0,0.0,...,4-2-3-1,Patricio Loustau,Tite,6,6,3,0,False,6.0,3.0
69,2016,10/11/16,20:30 (02:30),WCQ,WCQ — CONMEBOL (M),Tue,Away,W,2.0,0.0,...,,Víctor Carrillo,Tite,33,1,3,1,False,6.0,3.0


In [44]:
matches_rolling = matches_and_future.groupby('Result').apply(lambda x: rolling_averages(x, cols, new_cols))
matches_rolling = matches_rolling.droplevel('Result')
matches_rolling.index = range(matches_rolling.shape[0])
matches_rolling.head()

Unnamed: 0,Year,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,...,Formation,Referee,Coach,opp_code,year_code,kind_code,target,isFuture,opp_code_rolling,kind_code_rolling
0,2019,10/10/19,20:00 (14:00),FRL,Friendlies (M),Thu,Home,D,1.0,1.0,...,4-2-2-2,Taqi Aljaafari Jahari,Tite,26,4,1,0,False,10.0,3.0
1,2021,10/10/21,16:00 (23:00),WCQ,WCQ — CONMEBOL (M),Sun,Away,D,0.0,0.0,...,4-2-3-1,Patricio Loustau,Tite,6,6,3,0,False,26.0,1.0
2,2021,10/10/21,16:00 (23:00),WCQ,WCQ — CONMEBOL (M),Sun,Away,D,0.0,0.0,...,4-2-3-1,Patricio Loustau,Tite,6,6,3,0,False,6.0,3.0
3,2019,10/13/19,20:00 (14:00),FRL,Friendlies (M),Sun,Home,D,1.0,1.0,...,4-2-3-1,Jansen Foo,Tite,20,4,1,0,False,6.0,3.0
4,2017,10/5/17,16:00 (22:00),WCQ,WCQ — CONMEBOL (M),Thu,Away,D,0.0,0.0,...,,Fernando Rapallini,Tite,3,2,3,0,False,20.0,1.0


In [45]:
def make_prediction(matches_and_future, predictors):
    train = matches_and_future[matches_and_future['Year'] < 2020]
    test = matches_and_future[matches_and_future['Year'] >= 2021]
    rf.fit(train[predictors], train['target'])
    preds = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test['target'], prediction=preds), index=test.index)
    precision = precision_score(test['target'], preds)
    return combined, precision

In [46]:
combined, precision = make_prediction(matches_rolling, predictors + new_cols)

In [47]:
precision

0.8888888888888888

In [48]:
combined = combined.merge(matches_rolling[['Year','Date','GF','GA','Opponent','Result','Comp']], left_index=True, right_index=True)

opp = combined[(combined['Comp'] == 'FWC')]
opp.sort_values(by=['Year'], ascending = False)

Unnamed: 0,actual,prediction,Year,Date,GF,GA,Opponent,Result,Comp
8,0,0,2022,12/9/22,1.0,1.0,CRO,D,FWC
21,0,0,2022,12/2/22,0.0,1.0,CAM,L,FWC
45,1,1,2022,11/24/22,2.0,0.0,SRB,W,FWC
46,1,1,2022,11/28/22,1.0,0.0,SUI,W,FWC
47,1,1,2022,12/5/22,4.0,1.0,KOR,W,FWC
