### Libraries

In [419]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier, AdaBoostClassifier, \
    BaggingClassifier, StackingClassifier
from sklearn.decomposition import PCA
from sklearn.impute import KNNImputer
from sklearn.neural_network import MLPClassifier

### Import Data

In [420]:
train_away_player_statistics_df = pd.read_csv('Train_Data/train_away_player_statistics_df.csv', index_col=0)
train_away_team_statistics_df = pd.read_csv('Train_Data/train_away_team_statistics_df.csv', index_col=0)
train_home_player_statistics_df = pd.read_csv('Train_Data/train_home_player_statistics_df.csv', index_col=0)
train_home_team_statistics_df = pd.read_csv('Train_Data/train_home_team_statistics_df.csv', index_col=0)

Y_train = pd.read_csv('Y_train_1rknArQ.csv', index_col=0)

test_away_player_statistics_df = pd.read_csv('Test_Data/test_away_player_statistics_df.csv', index_col=0)
test_away_team_statistics_df = pd.read_csv('Test_Data/test_away_team_statistics_df.csv', index_col=0)
test_home_player_statistics_df = pd.read_csv('Test_Data/test_home_player_statistics_df.csv', index_col=0)
test_home_team_statistics_df = pd.read_csv('Test_Data/test_home_team_statistics_df.csv', index_col=0)

### Train Data processing

##### Remove some useless features

In [421]:
train_away_player_statistics_drop_df = train_away_player_statistics_df.drop(labels=['PLAYER_NAME', 'PLAYER_CAPTAIN_season_sum', 'PLAYER_LONG_BALLS_season_sum', 'PLAYER_LONG_BALLS_WON_season_sum', 'PLAYER_SHOTS_OFF_TARGET_season_sum', 'PLAYER_CAPTAIN_season_average', 'PLAYER_PUNCHES_season_average', 'PLAYER_LONG_BALLS_season_average', 'PLAYER_LONG_BALLS_WON_season_average', 'PLAYER_SHOTS_OFF_TARGET_season_average', 'PLAYER_CAPTAIN_season_std', 'PLAYER_PUNCHES_season_std', 'PLAYER_LONG_BALLS_season_std', 'PLAYER_LONG_BALLS_WON_season_std', 'PLAYER_SHOTS_OFF_TARGET_season_std', 'PLAYER_CAPTAIN_5_last_match_sum', 'PLAYER_PUNCHES_5_last_match_sum', 'PLAYER_LONG_BALLS_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_5_last_match_sum', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_sum', 'PLAYER_CAPTAIN_5_last_match_average', 'PLAYER_PUNCHES_5_last_match_average', 'PLAYER_LONG_BALLS_5_last_match_average', 'PLAYER_LONG_BALLS_WON_5_last_match_average', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_average', 'PLAYER_CAPTAIN_5_last_match_std', 'PLAYER_PUNCHES_5_last_match_std', 'PLAYER_LONG_BALLS_5_last_match_std', 'PLAYER_LONG_BALLS_WON_5_last_match_std', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_std'], axis=1)

train_home_player_statistics_drop_df= train_home_player_statistics_df.drop(labels=['PLAYER_NAME', 'PLAYER_CAPTAIN_season_sum', 'PLAYER_LONG_BALLS_season_sum', 'PLAYER_LONG_BALLS_WON_season_sum', 'PLAYER_SHOTS_OFF_TARGET_season_sum', 'PLAYER_CAPTAIN_season_average', 'PLAYER_PUNCHES_season_average', 'PLAYER_LONG_BALLS_season_average', 'PLAYER_LONG_BALLS_WON_season_average', 'PLAYER_SHOTS_OFF_TARGET_season_average', 'PLAYER_CAPTAIN_season_std', 'PLAYER_PUNCHES_season_std', 'PLAYER_LONG_BALLS_season_std', 'PLAYER_LONG_BALLS_WON_season_std', 'PLAYER_SHOTS_OFF_TARGET_season_std', 'PLAYER_CAPTAIN_5_last_match_sum', 'PLAYER_PUNCHES_5_last_match_sum', 'PLAYER_LONG_BALLS_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_5_last_match_sum', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_sum', 'PLAYER_CAPTAIN_5_last_match_average', 'PLAYER_PUNCHES_5_last_match_average', 'PLAYER_LONG_BALLS_5_last_match_average', 'PLAYER_LONG_BALLS_WON_5_last_match_average', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_average', 'PLAYER_CAPTAIN_5_last_match_std', 'PLAYER_PUNCHES_5_last_match_std', 'PLAYER_LONG_BALLS_5_last_match_std', 'PLAYER_LONG_BALLS_WON_5_last_match_std', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_std'], axis=1)

In [422]:
train_home_team_statistics_drop_df = train_home_team_statistics_df.iloc[:,2:]
train_away_team_statistics_drop_df = train_away_team_statistics_df.iloc[:,2:]

train_home_team_statistics_drop_df.columns = 'HOME_' + train_home_team_statistics_drop_df.columns
train_away_team_statistics_drop_df.columns = 'AWAY_' + train_away_team_statistics_drop_df.columns

In [423]:
train_home_player_statistics_drop2_df = train_home_player_statistics_drop_df.drop(columns=['POSITION', 'LEAGUE', 'TEAM_NAME'])

train_away_player_statistics_drop2_df = train_away_player_statistics_drop_df.drop(columns=['POSITION', 'LEAGUE', 'TEAM_NAME'])

##### Mean of the statistics of the players of each team

In [424]:
train_home_player_statistics_groupbyID_df = train_home_player_statistics_drop2_df.groupby(["ID"]).mean()

train_away_player_statistics_groupbyID_df = train_away_player_statistics_drop2_df.groupby(["ID"]).mean()

In [425]:
train_home_player_statistics_groupbyID_df.columns = "HOME_" + train_home_player_statistics_groupbyID_df.columns

train_away_player_statistics_groupbyID_df.columns = "AWAY_" + train_away_player_statistics_groupbyID_df.columns

##### Join the information about the players and the team together

In [426]:
X_train_home = train_home_player_statistics_groupbyID_df.join(train_home_team_statistics_drop_df, on=['ID'], how='outer')

X_train_away = train_away_player_statistics_groupbyID_df.join(train_away_team_statistics_drop_df, on=['ID'], how='outer')

##### Join the information about the home and away team

In [427]:
X_train =  pd.concat([X_train_home,X_train_away],join='outer',axis=1)
Y_train = Y_train.loc[X_train.index]

X_train = X_train.replace({np.inf:np.nan,-np.inf:np.nan})

In [428]:
columns_name = X_train.columns

In [429]:
X_train.head()

Unnamed: 0_level_0,HOME_PLAYER_ACCURATE_CROSSES_season_sum,HOME_PLAYER_ACCURATE_PASSES_season_sum,HOME_PLAYER_AERIALS_WON_season_sum,HOME_PLAYER_ASSISTS_season_sum,HOME_PLAYER_BIG_CHANCES_CREATED_season_sum,HOME_PLAYER_BIG_CHANCES_MISSED_season_sum,HOME_PLAYER_BLOCKED_SHOTS_season_sum,HOME_PLAYER_CLEARANCES_season_sum,HOME_PLAYER_CLEARANCE_OFFLINE_season_sum,HOME_PLAYER_DISPOSSESSED_season_sum,...,AWAY_TEAM_YELLOWCARDS_5_last_match_std,AWAY_TEAM_REDCARDS_5_last_match_std,AWAY_TEAM_OFFSIDES_5_last_match_std,AWAY_TEAM_ATTACKS_5_last_match_std,AWAY_TEAM_PENALTIES_5_last_match_std,AWAY_TEAM_SUBSTITUTIONS_5_last_match_std,AWAY_TEAM_BALL_SAFE_5_last_match_std,AWAY_TEAM_DANGEROUS_ATTACKS_5_last_match_std,AWAY_TEAM_INJURIES_5_last_match_std,AWAY_TEAM_GOALS_5_last_match_std
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,10.222222,21.333333,15.277778,7.777778,7.222222,6.166667,9.222222,16.722222,2.777778,17.611111,...,5.0,5.0,4.0,0.0,6.0,8.0,4.0,3.0,2.0,3.0
1,9.222222,31.555556,14.555556,16.666667,10.277778,9.0,11.888889,17.166667,2.777778,21.0,...,0.0,0.0,3.0,1.0,8.0,4.0,10.0,0.0,5.0,3.0
2,8.434783,16.608696,15.347826,2.826087,3.652174,3.869565,8.826087,12.826087,0.0,9.869565,...,6.0,10.0,4.0,4.0,0.0,8.0,3.0,0.0,9.0,6.0
3,8.888889,34.055556,15.833333,12.0,14.444444,8.722222,7.444444,19.333333,4.166667,9.388889,...,0.0,0.0,1.0,2.0,0.0,5.0,6.0,3.0,,2.0
4,9.722222,25.888889,15.111111,8.166667,9.5,10.666667,9.611111,17.555556,0.0,11.277778,...,1.0,0.0,4.0,4.0,9.0,4.0,1.0,4.0,6.0,5.0


In [430]:
Y_train.head()

Unnamed: 0_level_0,HOME_WINS,DRAW,AWAY_WINS
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,0,1
1,0,1,0
2,0,0,1
3,1,0,0
4,0,1,0


### Test Data processing

##### Remove some useless features

In [431]:
test_away_player_statistics_drop_df = test_away_player_statistics_df.drop(labels=['PLAYER_CAPTAIN_season_sum', 'PLAYER_LONG_BALLS_season_sum', 'PLAYER_LONG_BALLS_WON_season_sum', 'PLAYER_SHOTS_OFF_TARGET_season_sum', 'PLAYER_CAPTAIN_season_average', 'PLAYER_PUNCHES_season_average', 'PLAYER_LONG_BALLS_season_average', 'PLAYER_LONG_BALLS_WON_season_average', 'PLAYER_SHOTS_OFF_TARGET_season_average', 'PLAYER_CAPTAIN_season_std', 'PLAYER_PUNCHES_season_std', 'PLAYER_LONG_BALLS_season_std', 'PLAYER_LONG_BALLS_WON_season_std', 'PLAYER_SHOTS_OFF_TARGET_season_std', 'PLAYER_CAPTAIN_5_last_match_sum', 'PLAYER_PUNCHES_5_last_match_sum', 'PLAYER_LONG_BALLS_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_5_last_match_sum', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_sum', 'PLAYER_CAPTAIN_5_last_match_average', 'PLAYER_PUNCHES_5_last_match_average', 'PLAYER_LONG_BALLS_5_last_match_average', 'PLAYER_LONG_BALLS_WON_5_last_match_average', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_average', 'PLAYER_CAPTAIN_5_last_match_std', 'PLAYER_PUNCHES_5_last_match_std', 'PLAYER_LONG_BALLS_5_last_match_std', 'PLAYER_LONG_BALLS_WON_5_last_match_std', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_std'], axis=1)

test_home_player_statistics_drop_df = test_home_player_statistics_df.drop(labels=['PLAYER_CAPTAIN_season_sum', 'PLAYER_LONG_BALLS_season_sum', 'PLAYER_LONG_BALLS_WON_season_sum', 'PLAYER_SHOTS_OFF_TARGET_season_sum', 'PLAYER_CAPTAIN_season_average', 'PLAYER_PUNCHES_season_average', 'PLAYER_LONG_BALLS_season_average', 'PLAYER_LONG_BALLS_WON_season_average', 'PLAYER_SHOTS_OFF_TARGET_season_average', 'PLAYER_CAPTAIN_season_std', 'PLAYER_PUNCHES_season_std', 'PLAYER_LONG_BALLS_season_std', 'PLAYER_LONG_BALLS_WON_season_std', 'PLAYER_SHOTS_OFF_TARGET_season_std', 'PLAYER_CAPTAIN_5_last_match_sum', 'PLAYER_PUNCHES_5_last_match_sum', 'PLAYER_LONG_BALLS_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_5_last_match_sum', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_sum', 'PLAYER_CAPTAIN_5_last_match_average', 'PLAYER_PUNCHES_5_last_match_average', 'PLAYER_LONG_BALLS_5_last_match_average', 'PLAYER_LONG_BALLS_WON_5_last_match_average', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_average', 'PLAYER_CAPTAIN_5_last_match_std', 'PLAYER_PUNCHES_5_last_match_std', 'PLAYER_LONG_BALLS_5_last_match_std', 'PLAYER_LONG_BALLS_WON_5_last_match_std', 'PLAYER_SHOTS_OFF_TARGET_5_last_match_std'], axis=1)

In [432]:
test_home_player_statistics_drop2_df = test_home_player_statistics_drop_df.drop(columns=['POSITION'])

test_away_player_statistics_drop2_df = test_away_player_statistics_drop_df.drop(columns=['POSITION'])

##### Mean of the statistics of the players of each team

In [433]:
test_home_player_statistics_groupbyID_df = test_home_player_statistics_drop2_df.groupby(["ID"]).mean()

test_away_player_statistics_groupbyID_df = test_away_player_statistics_drop2_df.groupby(["ID"]).mean()

In [434]:
test_home_team_statistics_df.columns = 'HOME_' + test_home_team_statistics_df.columns
test_away_team_statistics_df.columns = 'AWAY_' + test_away_team_statistics_df.columns

In [435]:
test_home_player_statistics_groupbyID_df.columns = "HOME_" + test_home_player_statistics_groupbyID_df.columns

test_away_player_statistics_groupbyID_df.columns = "AWAY_" + test_away_player_statistics_groupbyID_df.columns

##### Join the information about the players and the team together

In [436]:
X_test_home = test_home_player_statistics_groupbyID_df.join(test_home_team_statistics_df, on=['ID'], how='outer')

X_test_away = test_away_player_statistics_groupbyID_df.join(test_away_team_statistics_df, on=['ID'], how='outer')

##### Join the information about the home and away team

In [437]:
X_test =  pd.concat([X_test_home,X_test_away],join='outer',axis=1)

X_test = X_test.replace({np.inf:np.nan,-np.inf:np.nan})

In [438]:
X_test.head()

Unnamed: 0_level_0,HOME_PLAYER_ACCURATE_CROSSES_season_sum,HOME_PLAYER_ACCURATE_PASSES_season_sum,HOME_PLAYER_AERIALS_WON_season_sum,HOME_PLAYER_ASSISTS_season_sum,HOME_PLAYER_BIG_CHANCES_CREATED_season_sum,HOME_PLAYER_BIG_CHANCES_MISSED_season_sum,HOME_PLAYER_BLOCKED_SHOTS_season_sum,HOME_PLAYER_CLEARANCES_season_sum,HOME_PLAYER_CLEARANCE_OFFLINE_season_sum,HOME_PLAYER_DISPOSSESSED_season_sum,...,AWAY_TEAM_YELLOWCARDS_5_last_match_std,AWAY_TEAM_REDCARDS_5_last_match_std,AWAY_TEAM_OFFSIDES_5_last_match_std,AWAY_TEAM_ATTACKS_5_last_match_std,AWAY_TEAM_PENALTIES_5_last_match_std,AWAY_TEAM_SUBSTITUTIONS_5_last_match_std,AWAY_TEAM_BALL_SAFE_5_last_match_std,AWAY_TEAM_DANGEROUS_ATTACKS_5_last_match_std,AWAY_TEAM_INJURIES_5_last_match_std,AWAY_TEAM_GOALS_5_last_match_std
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12303,8.818182,20.272727,8.136364,8.181818,6.363636,2.0,8.727273,9.227273,0.0,18.5,...,6.0,0.0,1.0,4.0,0.0,8.0,7.0,2.0,7.0,0.0
12304,6.444444,18.055556,14.333333,6.722222,10.833333,5.222222,10.888889,11.166667,1.833333,17.0,...,0.0,0.0,3.0,6.0,8.0,5.0,1.0,1.0,7.0,9.0
12305,10.235294,27.529412,11.882353,19.647059,17.882353,8.823529,8.882353,11.0,2.941176,19.411765,...,10.0,0.0,0.0,2.0,0.0,4.0,2.0,3.0,,9.0
12306,6.722222,26.055556,19.944444,13.333333,13.944444,9.222222,9.055556,12.0,1.833333,21.611111,...,1.0,5.0,4.0,5.0,10.0,9.0,6.0,9.0,7.0,3.0
12307,9.722222,15.833333,15.277778,6.222222,12.388889,8.222222,11.777778,16.777778,13.888889,12.611111,...,3.0,0.0,6.0,5.0,0.0,4.0,3.0,10.0,3.0,1.0


### Data normalisation 

In [439]:
scaler = preprocessing.StandardScaler().fit(X_train)

X_train_standard = scaler.transform(X_train)
X_test_standard = scaler.transform(X_test)

In [440]:
imputer = KNNImputer(n_neighbors=50)

X_train_standard_without_NaN = imputer.fit_transform(X_train_standard)
X_test_standard_without_NaN = imputer.fit_transform(X_test_standard)

In [460]:
pca = PCA(n_components=400, random_state=42)

X_train_standard_without_NaN_PCA = pca.fit_transform(X_train_standard_without_NaN)
X_test_standard_without_NaN_PCA = pca.transform(X_test_standard_without_NaN)

### We try to model if AWAY wins

In [442]:
Y_train_new = Y_train['AWAY_WINS'] 

### train, valid test split


In [461]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X_train_standard_without_NaN_PCA, Y_train_new, test_size=0.2, random_state=42)
X_test, X_valid, y_test, y_valid = model_selection.train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [462]:
X_train = pd.DataFrame(X_train)
#X_train.columns = columns_name
X_valid = pd.DataFrame(X_valid)
#X_valid.columns = columns_name
X_test = pd.DataFrame(X_test)
#X_test.columns = columns_name

### xgb model

In [463]:
params_1 = {
    'booster': 'gbtree',
    'tree_method':'hist',
    'max_depth': 8, 
    'learning_rate': 0.025,
    'objective': 'multi:softprob',
    'num_class': 2,
    'eval_metric':'mlogloss'
    }


d_train = xgb.DMatrix(X_train.replace({0:np.nan}), y_train)
d_valid = xgb.DMatrix(X_valid.replace({0:np.nan}), y_valid)

num_round = 10000
evallist = [(d_train, 'train'), (d_valid, 'eval')]

bst = xgb.train(params_1, d_train, num_round, evallist, early_stopping_rounds=100)

[0]	train-mlogloss:0.68346	eval-mlogloss:0.68799




[1]	train-mlogloss:0.67430	eval-mlogloss:0.68275
[2]	train-mlogloss:0.66579	eval-mlogloss:0.67789
[3]	train-mlogloss:0.65741	eval-mlogloss:0.67318
[4]	train-mlogloss:0.64954	eval-mlogloss:0.66838
[5]	train-mlogloss:0.64222	eval-mlogloss:0.66420
[6]	train-mlogloss:0.63456	eval-mlogloss:0.66059
[7]	train-mlogloss:0.62725	eval-mlogloss:0.65665
[8]	train-mlogloss:0.62025	eval-mlogloss:0.65247
[9]	train-mlogloss:0.61322	eval-mlogloss:0.64910
[10]	train-mlogloss:0.60690	eval-mlogloss:0.64578
[11]	train-mlogloss:0.60033	eval-mlogloss:0.64305
[12]	train-mlogloss:0.59433	eval-mlogloss:0.63955
[13]	train-mlogloss:0.58832	eval-mlogloss:0.63675
[14]	train-mlogloss:0.58278	eval-mlogloss:0.63433
[15]	train-mlogloss:0.57705	eval-mlogloss:0.63182
[16]	train-mlogloss:0.57157	eval-mlogloss:0.62912
[17]	train-mlogloss:0.56629	eval-mlogloss:0.62702
[18]	train-mlogloss:0.56113	eval-mlogloss:0.62452
[19]	train-mlogloss:0.55562	eval-mlogloss:0.62243
[20]	train-mlogloss:0.55041	eval-mlogloss:0.62053
[21]	trai

### Score of the model 

In [470]:
train_scores = pd.read_csv('Y_train_1rknArQ.csv', index_col=0)
target = train_scores.loc[X_test.index].copy()

In [471]:
X_test_xgb = xgb.DMatrix(X_test)

predictions = bst.predict(X_test_xgb, iteration_range=(0, bst.best_iteration))
predictions = pd.DataFrame(predictions)

predictions[2] = 0
predictions.columns = [0,2,1]
predictions = (predictions.reindex(columns=[0,1,2]).rank(1,ascending=False)==1).astype(int).values
np.round(accuracy_score(predictions,target),4)

0.4447

### Submission

In [472]:
X_submission = pd.DataFrame(X_test_standard_without_NaN_PCA)
#X_submission.columns = columns_name

In [473]:
X_bench = xgb.DMatrix(X_submission)

predictions = bst.predict(X_bench, iteration_range=(0, bst.best_iteration))
predictions = pd.DataFrame(predictions)

predictions[2] = 0
predictions.columns = [0,2,1]
predictions = (predictions.reindex(columns=[0,1,2]).rank(1,ascending=False)==1).astype(int)

Y = pd.read_csv('Y_test_random_sEE2QeA.csv', index_col=0)

predictions.columns = ['HOME_WINS', 'DRAW', 'AWAY_WINS']
predictions.index = Y.index
submission = predictions
submission.to_csv('submission.csv', index=True)

### Gradient Boosting with sklearn

In [474]:
GB = HistGradientBoostingClassifier()
GB.fit(X_train, y_train)

y_pred2 = GB.predict(X_test)
print(classification_report(y_test, y_pred2))

              precision    recall  f1-score   support

           0       0.71      0.93      0.81       843
           1       0.56      0.19      0.28       387

    accuracy                           0.70      1230
   macro avg       0.64      0.56      0.54      1230
weighted avg       0.66      0.70      0.64      1230


### AdaBoost Classifier

In [475]:
ABC = AdaBoostClassifier()
ABC.fit(X_train, y_train)

y_pred3 = ABC.predict(X_test)
print(classification_report(y_test, y_pred3))



              precision    recall  f1-score   support

           0       0.71      0.89      0.79       843
           1       0.48      0.22      0.30       387

    accuracy                           0.68      1230
   macro avg       0.59      0.55      0.55      1230
weighted avg       0.64      0.68      0.64      1230


### Bagging Classifier

In [476]:
Bg = BaggingClassifier()
Bg.fit(X_train, y_train)

y_pred3 = Bg.predict(X_test)
print(classification_report(y_test, y_pred3))

              precision    recall  f1-score   support

           0       0.70      0.90      0.79       843
           1       0.45      0.17      0.24       387

    accuracy                           0.67      1230
   macro avg       0.57      0.54      0.52      1230
weighted avg       0.62      0.67      0.62      1230


### RandomForest Classifier

In [477]:
RF = RandomForestClassifier()
RF.fit(X_train, y_train)

y_pred3 = RF.predict(X_test)
print(classification_report(y_test, y_pred3))

              precision    recall  f1-score   support

           0       0.69      1.00      0.82       843
           1       0.75      0.02      0.05       387

    accuracy                           0.69      1230
   macro avg       0.72      0.51      0.43      1230
weighted avg       0.71      0.69      0.57      1230
