### Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier, AdaBoostClassifier, \
    BaggingClassifier, StackingClassifier
from sklearn.decomposition import PCA
from sklearn.impute import KNNImputer
from sklearn.neural_network import MLPClassifier

### Import Data

In [2]:
train_away_player_statistics_df = pd.read_csv('Train_Data/train_away_player_statistics_df.csv', index_col=0)
train_away_team_statistics_df = pd.read_csv('Train_Data/train_away_team_statistics_df.csv', index_col=0)
train_home_player_statistics_df = pd.read_csv('Train_Data/train_home_player_statistics_df.csv', index_col=0)
train_home_team_statistics_df = pd.read_csv('Train_Data/train_home_team_statistics_df.csv', index_col=0)

Y_train = pd.read_csv('Y_train_1rknArQ.csv', index_col=0)

test_away_player_statistics_df = pd.read_csv('Test_Data/test_away_player_statistics_df.csv', index_col=0)
test_away_team_statistics_df = pd.read_csv('Test_Data/test_away_team_statistics_df.csv', index_col=0)
test_home_player_statistics_df = pd.read_csv('Test_Data/test_home_player_statistics_df.csv', index_col=0)
test_home_team_statistics_df = pd.read_csv('Test_Data/test_home_team_statistics_df.csv', index_col=0)

### Train Data processing

##### Remove some useless features

In [3]:
train_away_player_statistics_drop_df = train_away_player_statistics_df.drop(labels=['PLAYER_NAME', 'PLAYER_CAPTAIN_season_sum', 'PLAYER_LONG_BALLS_season_sum', 'PLAYER_LONG_BALLS_WON_season_sum', 'PLAYER_SHOTS_OFF_TARGET_season_sum', 'PLAYER_CAPTAIN_season_average', 'PLAYER_PUNCHES_season_average', 'PLAYER_LONG_BALLS_season_average', 'PLAYER_LONG_BALLS_WON_season_average', 'PLAYER_SHOTS_OFF_TARGET_season_average', 'PLAYER_CAPTAIN_season_std', 'PLAYER_PUNCHES_season_std', 'PLAYER_LONG_BALLS_season_std', 'PLAYER_LONG_BALLS_WON_season_std', 'PLAYER_SHOTS_OFF_TARGET_season_std', 'PLAYER_CAPTAIN_5_last_match_sum', 'PLAYER_PUNCHES_5_last_match_sum', 'PLAYER_LONG_BALLS_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_5_last_match_sum', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_sum', 'PLAYER_CAPTAIN_5_last_match_average', 'PLAYER_PUNCHES_5_last_match_average', 'PLAYER_LONG_BALLS_5_last_match_average', 'PLAYER_LONG_BALLS_WON_5_last_match_average', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_average', 'PLAYER_CAPTAIN_5_last_match_std', 'PLAYER_PUNCHES_5_last_match_std', 'PLAYER_LONG_BALLS_5_last_match_std', 'PLAYER_LONG_BALLS_WON_5_last_match_std', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_std'], axis=1)

train_home_player_statistics_drop_df= train_home_player_statistics_df.drop(labels=['PLAYER_NAME', 'PLAYER_CAPTAIN_season_sum', 'PLAYER_LONG_BALLS_season_sum', 'PLAYER_LONG_BALLS_WON_season_sum', 'PLAYER_SHOTS_OFF_TARGET_season_sum', 'PLAYER_CAPTAIN_season_average', 'PLAYER_PUNCHES_season_average', 'PLAYER_LONG_BALLS_season_average', 'PLAYER_LONG_BALLS_WON_season_average', 'PLAYER_SHOTS_OFF_TARGET_season_average', 'PLAYER_CAPTAIN_season_std', 'PLAYER_PUNCHES_season_std', 'PLAYER_LONG_BALLS_season_std', 'PLAYER_LONG_BALLS_WON_season_std', 'PLAYER_SHOTS_OFF_TARGET_season_std', 'PLAYER_CAPTAIN_5_last_match_sum', 'PLAYER_PUNCHES_5_last_match_sum', 'PLAYER_LONG_BALLS_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_5_last_match_sum', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_sum', 'PLAYER_CAPTAIN_5_last_match_average', 'PLAYER_PUNCHES_5_last_match_average', 'PLAYER_LONG_BALLS_5_last_match_average', 'PLAYER_LONG_BALLS_WON_5_last_match_average', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_average', 'PLAYER_CAPTAIN_5_last_match_std', 'PLAYER_PUNCHES_5_last_match_std', 'PLAYER_LONG_BALLS_5_last_match_std', 'PLAYER_LONG_BALLS_WON_5_last_match_std', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_std'], axis=1)

In [4]:
train_home_team_statistics_drop_df = train_home_team_statistics_df.iloc[:,2:]
train_away_team_statistics_drop_df = train_away_team_statistics_df.iloc[:,2:]

train_home_team_statistics_drop_df.columns = 'HOME_' + train_home_team_statistics_drop_df.columns
train_away_team_statistics_drop_df.columns = 'AWAY_' + train_away_team_statistics_drop_df.columns

In [5]:
train_home_player_statistics_drop2_df = train_home_player_statistics_drop_df.drop(columns=['POSITION', 'LEAGUE', 'TEAM_NAME'])

train_away_player_statistics_drop2_df = train_away_player_statistics_drop_df.drop(columns=['POSITION', 'LEAGUE', 'TEAM_NAME'])

##### Mean of the statistics of the players of each team

In [6]:
train_home_player_statistics_groupbyID_df = train_home_player_statistics_drop2_df.groupby(["ID"]).mean()

train_away_player_statistics_groupbyID_df = train_away_player_statistics_drop2_df.groupby(["ID"]).mean()

In [7]:
train_home_player_statistics_groupbyID_df.columns = "HOME_" + train_home_player_statistics_groupbyID_df.columns

train_away_player_statistics_groupbyID_df.columns = "AWAY_" + train_away_player_statistics_groupbyID_df.columns

##### Join the information about the players and the team together

In [8]:
X_train_home = train_home_player_statistics_groupbyID_df.join(train_home_team_statistics_drop_df, on=['ID'], how='outer')

X_train_away = train_away_player_statistics_groupbyID_df.join(train_away_team_statistics_drop_df, on=['ID'], how='outer')

In [9]:
X_train_home.head()

Unnamed: 0_level_0,HOME_PLAYER_ACCURATE_CROSSES_season_sum,HOME_PLAYER_ACCURATE_PASSES_season_sum,HOME_PLAYER_AERIALS_WON_season_sum,HOME_PLAYER_ASSISTS_season_sum,HOME_PLAYER_BIG_CHANCES_CREATED_season_sum,HOME_PLAYER_BIG_CHANCES_MISSED_season_sum,HOME_PLAYER_BLOCKED_SHOTS_season_sum,HOME_PLAYER_CLEARANCES_season_sum,HOME_PLAYER_CLEARANCE_OFFLINE_season_sum,HOME_PLAYER_DISPOSSESSED_season_sum,...,HOME_TEAM_YELLOWCARDS_5_last_match_std,HOME_TEAM_REDCARDS_5_last_match_std,HOME_TEAM_OFFSIDES_5_last_match_std,HOME_TEAM_ATTACKS_5_last_match_std,HOME_TEAM_PENALTIES_5_last_match_std,HOME_TEAM_SUBSTITUTIONS_5_last_match_std,HOME_TEAM_BALL_SAFE_5_last_match_std,HOME_TEAM_DANGEROUS_ATTACKS_5_last_match_std,HOME_TEAM_INJURIES_5_last_match_std,HOME_TEAM_GOALS_5_last_match_std
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,10.222222,21.333333,15.277778,7.777778,7.222222,6.166667,9.222222,16.722222,2.777778,17.611111,...,3.0,0.0,6.0,0.0,10.0,8.0,7.0,2.0,4.0,3.0
1,9.222222,31.555556,14.555556,16.666667,10.277778,9.0,11.888889,17.166667,2.777778,21.0,...,4.0,0.0,4.0,3.0,10.0,0.0,1.0,2.0,8.0,4.0
2,8.434783,16.608696,15.347826,2.826087,3.652174,3.869565,8.826087,12.826087,0.0,9.869565,...,4.0,5.0,6.0,3.0,6.0,7.0,2.0,3.0,2.0,4.0
3,8.888889,34.055556,15.833333,12.0,14.444444,8.722222,7.444444,19.333333,4.166667,9.388889,...,4.0,0.0,1.0,8.0,8.0,5.0,5.0,5.0,,6.0
4,9.722222,25.888889,15.111111,8.166667,9.5,10.666667,9.611111,17.555556,0.0,11.277778,...,1.0,0.0,2.0,5.0,8.0,7.0,2.0,6.0,4.0,4.0


##### Join the information about home and away team

In [29]:
X_train =  pd.concat([X_train_home,X_train_away],join='outer',axis=1)
Y_train = Y_train.loc[X_train.index]

X_train = X_train.replace({np.inf:np.nan,-np.inf:np.nan})

In [30]:
columns_name = X_train.columns

In [31]:
X_train.head()

Unnamed: 0_level_0,HOME_PLAYER_ACCURATE_CROSSES_season_sum,HOME_PLAYER_ACCURATE_PASSES_season_sum,HOME_PLAYER_AERIALS_WON_season_sum,HOME_PLAYER_ASSISTS_season_sum,HOME_PLAYER_BIG_CHANCES_CREATED_season_sum,HOME_PLAYER_BIG_CHANCES_MISSED_season_sum,HOME_PLAYER_BLOCKED_SHOTS_season_sum,HOME_PLAYER_CLEARANCES_season_sum,HOME_PLAYER_CLEARANCE_OFFLINE_season_sum,HOME_PLAYER_DISPOSSESSED_season_sum,...,AWAY_TEAM_YELLOWCARDS_5_last_match_std,AWAY_TEAM_REDCARDS_5_last_match_std,AWAY_TEAM_OFFSIDES_5_last_match_std,AWAY_TEAM_ATTACKS_5_last_match_std,AWAY_TEAM_PENALTIES_5_last_match_std,AWAY_TEAM_SUBSTITUTIONS_5_last_match_std,AWAY_TEAM_BALL_SAFE_5_last_match_std,AWAY_TEAM_DANGEROUS_ATTACKS_5_last_match_std,AWAY_TEAM_INJURIES_5_last_match_std,AWAY_TEAM_GOALS_5_last_match_std
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,10.222222,21.333333,15.277778,7.777778,7.222222,6.166667,9.222222,16.722222,2.777778,17.611111,...,5.0,5.0,4.0,0.0,6.0,8.0,4.0,3.0,2.0,3.0
1,9.222222,31.555556,14.555556,16.666667,10.277778,9.0,11.888889,17.166667,2.777778,21.0,...,0.0,0.0,3.0,1.0,8.0,4.0,10.0,0.0,5.0,3.0
2,8.434783,16.608696,15.347826,2.826087,3.652174,3.869565,8.826087,12.826087,0.0,9.869565,...,6.0,10.0,4.0,4.0,0.0,8.0,3.0,0.0,9.0,6.0
3,8.888889,34.055556,15.833333,12.0,14.444444,8.722222,7.444444,19.333333,4.166667,9.388889,...,0.0,0.0,1.0,2.0,0.0,5.0,6.0,3.0,,2.0
4,9.722222,25.888889,15.111111,8.166667,9.5,10.666667,9.611111,17.555556,0.0,11.277778,...,1.0,0.0,4.0,4.0,9.0,4.0,1.0,4.0,6.0,5.0


In [32]:
Y_train.head()

Unnamed: 0_level_0,HOME_WINS,DRAW,AWAY_WINS
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,1
1,0,1,0
2,0,0,1
3,1,0,0
4,0,1,0


### Test Data processing

##### Remove some useless features

In [33]:
test_away_player_statistics_drop_df = test_away_player_statistics_df.drop(labels=['PLAYER_CAPTAIN_season_sum', 'PLAYER_LONG_BALLS_season_sum', 'PLAYER_LONG_BALLS_WON_season_sum', 'PLAYER_SHOTS_OFF_TARGET_season_sum', 'PLAYER_CAPTAIN_season_average', 'PLAYER_PUNCHES_season_average', 'PLAYER_LONG_BALLS_season_average', 'PLAYER_LONG_BALLS_WON_season_average', 'PLAYER_SHOTS_OFF_TARGET_season_average', 'PLAYER_CAPTAIN_season_std', 'PLAYER_PUNCHES_season_std', 'PLAYER_LONG_BALLS_season_std', 'PLAYER_LONG_BALLS_WON_season_std', 'PLAYER_SHOTS_OFF_TARGET_season_std', 'PLAYER_CAPTAIN_5_last_match_sum', 'PLAYER_PUNCHES_5_last_match_sum', 'PLAYER_LONG_BALLS_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_5_last_match_sum', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_sum', 'PLAYER_CAPTAIN_5_last_match_average', 'PLAYER_PUNCHES_5_last_match_average', 'PLAYER_LONG_BALLS_5_last_match_average', 'PLAYER_LONG_BALLS_WON_5_last_match_average', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_average', 'PLAYER_CAPTAIN_5_last_match_std', 'PLAYER_PUNCHES_5_last_match_std', 'PLAYER_LONG_BALLS_5_last_match_std', 'PLAYER_LONG_BALLS_WON_5_last_match_std', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_std'], axis=1)

test_home_player_statistics_drop_df = test_home_player_statistics_df.drop(labels=['PLAYER_CAPTAIN_season_sum', 'PLAYER_LONG_BALLS_season_sum', 'PLAYER_LONG_BALLS_WON_season_sum', 'PLAYER_SHOTS_OFF_TARGET_season_sum', 'PLAYER_CAPTAIN_season_average', 'PLAYER_PUNCHES_season_average', 'PLAYER_LONG_BALLS_season_average', 'PLAYER_LONG_BALLS_WON_season_average', 'PLAYER_SHOTS_OFF_TARGET_season_average', 'PLAYER_CAPTAIN_season_std', 'PLAYER_PUNCHES_season_std', 'PLAYER_LONG_BALLS_season_std', 'PLAYER_LONG_BALLS_WON_season_std', 'PLAYER_SHOTS_OFF_TARGET_season_std', 'PLAYER_CAPTAIN_5_last_match_sum', 'PLAYER_PUNCHES_5_last_match_sum', 'PLAYER_LONG_BALLS_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_5_last_match_sum', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_sum', 'PLAYER_CAPTAIN_5_last_match_average', 'PLAYER_PUNCHES_5_last_match_average', 'PLAYER_LONG_BALLS_5_last_match_average', 'PLAYER_LONG_BALLS_WON_5_last_match_average', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_average', 'PLAYER_CAPTAIN_5_last_match_std', 'PLAYER_PUNCHES_5_last_match_std', 'PLAYER_LONG_BALLS_5_last_match_std', 'PLAYER_LONG_BALLS_WON_5_last_match_std', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_std'], axis=1)

In [34]:
test_home_player_statistics_drop2_df = test_home_player_statistics_drop_df.drop(columns=['POSITION'])

test_away_player_statistics_drop2_df = test_away_player_statistics_drop_df.drop(columns=['POSITION'])

##### Mean of the statistics of the players of each team

In [35]:
test_home_player_statistics_groupbyID_df = test_home_player_statistics_drop2_df.groupby(["ID"]).mean()

test_away_player_statistics_groupbyID_df = test_away_player_statistics_drop2_df.groupby(["ID"]).mean()

In [36]:
test_home_team_statistics_df.columns = 'HOME_' + test_home_team_statistics_df.columns
test_away_team_statistics_df.columns = 'AWAY_' + test_away_team_statistics_df.columns

In [37]:
test_home_player_statistics_groupbyID_df.columns = "HOME_" + test_home_player_statistics_groupbyID_df.columns

test_away_player_statistics_groupbyID_df.columns = "AWAY_" + test_away_player_statistics_groupbyID_df.columns

##### Join the information about the players and the team together

In [38]:
X_test_home = test_home_player_statistics_groupbyID_df.join(test_home_team_statistics_df, on=['ID'], how='outer')

X_test_away = test_away_player_statistics_groupbyID_df.join(test_away_team_statistics_df, on=['ID'], how='outer')

##### Join the information about the home and away team

In [39]:
X_test =  pd.concat([X_test_home,X_test_away],join='outer',axis=1)

X_test = X_test.replace({np.inf:np.nan,-np.inf:np.nan})

In [40]:
X_test.head()

Unnamed: 0_level_0,HOME_PLAYER_ACCURATE_CROSSES_season_sum,HOME_PLAYER_ACCURATE_PASSES_season_sum,HOME_PLAYER_AERIALS_WON_season_sum,HOME_PLAYER_ASSISTS_season_sum,HOME_PLAYER_BIG_CHANCES_CREATED_season_sum,HOME_PLAYER_BIG_CHANCES_MISSED_season_sum,HOME_PLAYER_BLOCKED_SHOTS_season_sum,HOME_PLAYER_CLEARANCES_season_sum,HOME_PLAYER_CLEARANCE_OFFLINE_season_sum,HOME_PLAYER_DISPOSSESSED_season_sum,...,AWAY_TEAM_YELLOWCARDS_5_last_match_std,AWAY_TEAM_REDCARDS_5_last_match_std,AWAY_TEAM_OFFSIDES_5_last_match_std,AWAY_TEAM_ATTACKS_5_last_match_std,AWAY_TEAM_PENALTIES_5_last_match_std,AWAY_TEAM_SUBSTITUTIONS_5_last_match_std,AWAY_TEAM_BALL_SAFE_5_last_match_std,AWAY_TEAM_DANGEROUS_ATTACKS_5_last_match_std,AWAY_TEAM_INJURIES_5_last_match_std,AWAY_TEAM_GOALS_5_last_match_std
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12303,8.818182,20.272727,8.136364,8.181818,6.363636,2.0,8.727273,9.227273,0.0,18.5,...,6.0,0.0,1.0,4.0,0.0,8.0,7.0,2.0,7.0,0.0
12304,6.444444,18.055556,14.333333,6.722222,10.833333,5.222222,10.888889,11.166667,1.833333,17.0,...,0.0,0.0,3.0,6.0,8.0,5.0,1.0,1.0,7.0,9.0
12305,10.235294,27.529412,11.882353,19.647059,17.882353,8.823529,8.882353,11.0,2.941176,19.411765,...,10.0,0.0,0.0,2.0,0.0,4.0,2.0,3.0,,9.0
12306,6.722222,26.055556,19.944444,13.333333,13.944444,9.222222,9.055556,12.0,1.833333,21.611111,...,1.0,5.0,4.0,5.0,10.0,9.0,6.0,9.0,7.0,3.0
12307,9.722222,15.833333,15.277778,6.222222,12.388889,8.222222,11.777778,16.777778,13.888889,12.611111,...,3.0,0.0,6.0,5.0,0.0,4.0,3.0,10.0,3.0,1.0


### Data normalisation 

In [41]:
scaler = preprocessing.StandardScaler().fit(X_train)

X_train_standard = scaler.transform(X_train)
X_test_standard = scaler.transform(X_test)

In [42]:
imputer = KNNImputer(n_neighbors=50)

X_train_standard_without_NaN = imputer.fit_transform(X_train_standard)
X_test_standard_without_NaN = imputer.fit_transform(X_test_standard)

In [43]:
pca = PCA(n_components=400, random_state=42)

X_train_standard_without_NaN_PCA = pca.fit_transform(X_train_standard_without_NaN)
X_test_standard_without_NaN_PCA = pca.transform(X_test_standard_without_NaN)

### Model with the 3 outcome 

In [44]:
Y_train_new = pd.DataFrame(index=Y_train.index)

In [45]:
Y_train_new.loc[Y_train['HOME_WINS']==1, 'RESULT'] = 0
Y_train_new.loc[Y_train['DRAW']==1, 'RESULT'] = 1
Y_train_new.loc[Y_train['AWAY_WINS'] ==1, 'RESULT'] = 2
Y_train_new

Unnamed: 0_level_0,RESULT
ID,Unnamed: 1_level_1
0,2.0
1,1.0
2,2.0
3,0.0
4,1.0
...,...
12298,2.0
12299,2.0
12300,2.0
12301,0.0


### train, valid test split


In [46]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X_train_standard_without_NaN_PCA, Y_train_new, test_size=0.2, random_state=42)

In [47]:
X_train = pd.DataFrame(X_train)
#X_train.columns = columns_name
#X_valid.columns = columns_name
X_test = pd.DataFrame(X_test)
#X_test.columns = columns_name

In [66]:
y_train = y_train.ravel()
y_train

array([1., 1., 0., ..., 0., 0., 1.])

### Gradient Boosting with sklearn

In [79]:
GB = HistGradientBoostingClassifier(learning_rate=0.025, max_depth=8, early_stopping=True)
GB.fit(X_train, y_train)

y_pred2 = GB.predict(X_test)
print(classification_report(y_test, y_pred2, digits=5))

              precision    recall  f1-score   support

         0.0       0.51      0.83      0.63      1083
         1.0       0.26      0.02      0.03       620
         2.0       0.49      0.42      0.45       758

    accuracy                           0.50      2461
   macro avg       0.42      0.42      0.37      2461
weighted avg       0.44      0.50      0.42      2461


### Submission

In [80]:
predictions = GB.predict(X_test_standard_without_NaN_PCA)
predictions = pd.DataFrame(predictions)
predictions.head()

Unnamed: 0,0
0,2.0
1,2.0
2,0.0
3,0.0
4,0.0
...,...
25363,2.0
25364,2.0
25365,0.0
25366,0.0


In [81]:
submission = pd.DataFrame(index=predictions.index)
submission['HOME_WINS'] = 0
submission['DRAW'] = 0
submission['AWAY_WINS'] = 0

submission = submission.join(predictions)
submission.columns = ["HOME_WINS", "DRAW", "AWAY_WINS", 'INDEX']
submission.loc[submission['INDEX']==0, 'HOME_WINS'] = 1
submission.loc[submission['INDEX']==1, 'DRAW'] = 1
submission.loc[submission['INDEX']==2, 'AWAY_WINS'] = 1

submission = submission.drop(columns=['INDEX'])

Y = pd.read_csv('Y_test_random_sEE2QeA.csv', index_col=0)

submission.index = Y.index
submission.to_csv('submission.csv', index=True)

### AdaBoost Classifier

In [68]:
ABC = AdaBoostClassifier()
ABC.fit(X_train, y_train)

y_pred3 = ABC.predict(X_test)
print(classification_report(y_test, y_pred3))



              precision    recall  f1-score   support

         0.0       0.51      0.71      0.59      1083
         1.0       0.32      0.10      0.15       620
         2.0       0.44      0.44      0.44       758

    accuracy                           0.47      2461
   macro avg       0.42      0.42      0.39      2461
weighted avg       0.44      0.47      0.43      2461


### Bagging Classifier

In [69]:
Bg = BaggingClassifier()
Bg.fit(X_train, y_train)

y_pred3 = Bg.predict(X_test)
print(classification_report(y_test, y_pred3))

              precision    recall  f1-score   support

         0.0       0.50      0.67      0.57      1083
         1.0       0.27      0.19      0.22       620
         2.0       0.41      0.31      0.35       758

    accuracy                           0.44      2461
   macro avg       0.39      0.39      0.38      2461
weighted avg       0.41      0.44      0.41      2461


### RandomForest Classifier

In [70]:
RF = RandomForestClassifier()
RF.fit(X_train, y_train)

y_pred3 = RF.predict(X_test)
print(classification_report(y_test, y_pred3))

              precision    recall  f1-score   support

         0.0       0.48      0.88      0.62      1083
         1.0       0.27      0.04      0.06       620
         2.0       0.47      0.23      0.31       758

    accuracy                           0.47      2461
   macro avg       0.40      0.38      0.33      2461
weighted avg       0.42      0.47      0.38      2461


### MLP

In [85]:
MLP = MLPClassifier(hidden_layer_sizes=(200,100,20,), activation='logistic', solver='adam')
MLP.fit(X_train, y_train)

y_pred = MLP.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.51      0.55      0.53      1083
         1.0       0.26      0.25      0.25       620
         2.0       0.41      0.38      0.40       758

    accuracy                           0.42      2461
   macro avg       0.39      0.39      0.39      2461
weighted avg       0.42      0.42      0.42      2461
