## UFC Fight Model

In [None]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_selector as selector
from sklearn import set_config
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV


In [None]:
# Read in the CSV file (data.csv) as a DataFrame
ufc_df = pd.read_csv("Resources/clean_scraped_data.csv", low_memory=False, index_col=0)
ufc_df

## Select Features

In [None]:
numerical_cols = selector(dtype_include="number")
categorical_cols = selector(dtype_include="category")

# numerical_cols = [
    # "Max_Rounds",
    # "Ending_Round",
    # "B_Age",
    # "R_Age",
    # "B_Reach",
    # "R_Reach",
    # "B_Height",
    # "R_Height",
    # "B_Weight",
    # "R_Weight",
    # "R_BMI",
    # "B_BMI",
    # "R_BMI_proposed",
    # "B_BMI_proposed",
    # "R_Body_Fat_Percentage",
    # "B_Body_Fat_Percentage",
    # "R_Lean_Body_Mass",
    # "B_Lean_Body_Mass",
    # "B_Career_Significant_Strikes_Landed_PM",
    # "R_Career_Significant_Strikes_Landed_PM",
    # "R_Career_Striking_Accuracy",
    # "R_Career_Significant_Strike_Defence",
    # "R_Career_Takedown_Average",
    # "R_Career_Takedown_Accuracy",
    # "R_Career_Takedown_Defence",
    # "R_Career_Submission_Average",
    # "B_Career_Striking_Accuracy",
    # "B_Career_Significant_Strike_Defence",
    # "B_Career_Takedown_Average",
    # "B_Career_Takedown_Accuracy",
    # "B_Career_Takedown_Defence",
    # "B_Career_Submission_Average",
# ]
# categorical_cols = ["Win_By", "B_Stance", "R_Stance", "Weight_Class", "Gender"]

# target = "Winner"

## Split Train-Test


In [None]:
X = ufc_df.drop("Winner", axis=1)
y = ufc_df["Winner"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)


## VotingClassifier Pipeline

### Hard Voting

In [None]:
# Imputation transformer to replace missing values using null values along each column.
# Standardize features by removing the mean and scaling to unit variance with `StandardScalar()`.
numeric_transformer = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="constant", add_indicator=True)),
        ("scaler", StandardScaler()),
    ]
)

# categorical_transformer = OneHotEncoder(handle_unknown="ignore")
categorical_transformer = Pipeline(
    steps=[
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ]
)


preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numerical_cols),
        ("cat", categorical_transformer, categorical_cols),
    ]
)

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("classifier", VotingClassifier(estimators=[
            ("gbc", GradientBoostingClassifier(random_state=0)),
            ("rf", RandomForestClassifier(random_state=0)), 
            ("mlp", MLPClassifier(random_state=0)),
            ("svc", SVC(random_state=0)),
            ("xgb", XGBClassifier(random_state=0))], voting="hard")),
    ]
)


clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
score = accuracy_score(y_test, y_pred)
print("Hard Voting Score: %.3f" % score)

### Display Diagram of Pipeline


In [None]:
set_config(display="diagram")
clf

### Classification Report


In [None]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

### Confusion Matrix


In [None]:
disp = ConfusionMatrixDisplay.from_predictions(
    y_test, y_pred, cmap="Blues", values_format="d"
)
plt.show()

### Soft Voting

In [None]:
# Imputation transformer to replace missing values using null values along each column.
# Standardize features by removing the mean and scaling to unit variance with `StandardScalar()`.
numeric_transformer = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="constant", add_indicator=True)),
        ("scaler", StandardScaler()),
    ]
)

# categorical_transformer = OneHotEncoder(handle_unknown="ignore")
categorical_transformer = Pipeline(
    steps=[
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ]
)


preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numerical_cols),
        ("cat", categorical_transformer, categorical_cols),
    ]
)

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.
clf = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("classifier", VotingClassifier(estimators=[
            ("gbc", GradientBoostingClassifier(random_state=0)),
            ("rf", RandomForestClassifier(random_state=0)), 
            ("mlp", MLPClassifier(random_state=0)),
            ("svc", SVC(random_state=0, probability=True)),
            ("xgb", XGBClassifier(random_state=0))], voting="soft")),
    ]
)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
score = accuracy_score(y_test, y_pred)
print("Soft Voting Score: %.3f" % score)

### Display Diagram of Pipeline

In [None]:
set_config(display="diagram")
clf

### Classification Report

In [None]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

### Confusion Matrix

In [None]:
disp = ConfusionMatrixDisplay.from_predictions(
    y_test, y_pred, cmap="Blues", values_format="d"
)
plt.show()

### Model Persistence


In [None]:
# from joblib import dump, load

# dump(clf, "clf.joblib")

# !!! WARNING: DO NOT LOAD RANDOM OBJECTS !!!
# clf = load("clf.joblib")

# # Prediction based on saved pipeline.
# selection = X.iloc[[5]]
# clf.predict(selection)
