In [21]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder

from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import StackingClassifier

from sklearn.model_selection import KFold

In [22]:
pd.reset_option('display.max_rows')

In [23]:
df_train = pd.read_csv('train_no_na.csv')
X_train = df_train.drop(['outcome'], axis=1)
y_train = pd.get_dummies(df_train["outcome"], drop_first=True)
y_train = np.ravel(y_train).reshape((-1,))

In [24]:
numeric_columns = X_train.select_dtypes(include=[float, int]).columns

categorical_columns = list(set(X_train.columns) - set(numeric_columns))

In [25]:
numeric_transformer = Pipeline([('imputer', SimpleImputer(strategy='median'))])
categorical_transformer = Pipeline([('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('ohe', OneHotEncoder(handle_unknown = 'ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_columns),
        ("cat", categorical_transformer, categorical_columns),
    ]
)

In [26]:
#XGBoost

xgb = XGBClassifier(learning_rate=0.01, n_estimators=481, max_depth=5, min_child_weight=3, gamma=0.2,
                    colsample_bytree=1, subsample=0.9, reg_alpha=0.00001, reg_lambda=0.1,
                    random_state=1, n_jobs=-1)

xgb_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', xgb)])

xgb_pipeline.fit(X_train, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='median'))]),
                                                  Index(['r_kd', 'r_sub.att', 'r_rev.', 'r_ctrl_s', 'r_sig_str_percent',
       'r_sig_str_att', 'r_total_str_percent', 'r_total_str_att',
       'r_td_percent', 'r_td_att', 'r_head_percent', 'r_head_att',
       'r_body_percent', 'r_body_att', 'r_leg_p...
                               colsample_bytree=1, gamma=0.2, gpu_id=-1,
                               importance_type='gain',
                               interaction_constraints='', learning_rate=0.01,
                               max_delta_step=0, max_depth=5,
                               min_child_weight=3, missing=nan,
                               monotone_constraints='()', n_estimators=481,
              

In [27]:
df_294 = pd.read_csv('294.csv')
fighters_294 = pd.read_csv('294_fighters.csv')

In [28]:
predictions_xgb_294 = {'predicted': xgb_pipeline.predict(df_294),
                            'blue_fighter': fighters_294['b_fighter'],
                            'b_prob': [arr[0] for arr in xgb_pipeline.predict_proba(df_294)],
                            'red_fighter': fighters_294['r_fighter'],
                            'r_prob': [arr[1] for arr in xgb_pipeline.predict_proba(df_294)]}
predictions_xgb_294 = pd.DataFrame(predictions_xgb_294)

In [29]:
predictions_xgb_294

Unnamed: 0,predicted,blue_fighter,b_prob,red_fighter,r_prob
0,1,Alexander Volkanovski,0.227377,Islam Makhachev,0.772623
1,1,Khamzat Chimaev,0.418139,Kamaru Usman,0.581861
2,1,Johnny Walker,0.373566,Magomed Ankalaev,0.626434
3,1,Warlley Alves,0.329638,Ikram Aliskerov,0.670362
4,1,Muin Gafurov,0.453668,Said Nurmagomedov,0.546332
5,0,Muhammad Mokaev,0.682317,Tim Elliot,0.317683
6,0,Trevor Peek,0.635556,Mohammad Yahya,0.364444
7,1,Victor Henry,0.237254,Javid Basharat,0.762746
8,0,Sedriques Dumas,0.608322,Abu Azaitar,0.391678
9,0,Anshul Jubli,0.621732,Mike Breeden,0.378268
