# 🔹 UFC Deployment Notebook

<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

## 1. Import Libraries and Setup Environment

In [1]:
# Import necessary libraries
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

pd.set_option('display.max_colwidth', 200) 

# Get the current working directory
current_dir = os.getcwd()

# Navigate to the project root
project_root = os.path.abspath(os.path.join(current_dir, '..'))

# Import from /src
sys.path.append(os.path.join(project_root, 'src'))
from metrics import *
from model_factory import model_factory
from model import UFCModel
from data import UFCData
from config import *
from io_model import load_data, load_model
from helpers import *

<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

## 2. Load Data

In [2]:
# Load the UFCData object
try:
    ufc_data = load_data()
    ufc_data.summary()
except Exception as e:
    print_header(f"Error loading training data: {e}", color='bright_red')

/home/mlioi/ufc-predictor/data/processed/ufc_data.pkl
📦 UFCData object loaded from: /home/mlioi/ufc-predictor/data/processed/ufc_data.pkl
📊 UFC Dataset Summary
----------------------------------------
🧪 Total samples      : 6001
🧪 Train/Test split  : 4800 / 1201
🧪 Total features     : 18

🔢 Numerical features : 16
🔠 Categorical features: 2
    - Binary          : 1
    - Multiclass      : 1

🏷 Label distribution (raw):
   - Class 0: 3484 (58.1%)
   - Class 1: 2517 (41.9%)

✅ No missing values detected

📈 Feature summary statistics (train set):
                           mean      std       min       max
LoseStreakDif             0.059    1.012    -6.000     6.000
WinStreakDif             -0.162    1.921   -18.000    10.000
KODif                    -0.536    2.181   -21.000    14.000
SubDif                   -0.334    1.877   -15.000    10.000
HeightDif                -0.066    6.843  -187.960    30.480
AgeDif                    0.196    5.213   -17.000    17.000
SigStrDif              

In [3]:
# Define the path to the CSV file
file_path = os.path.join(project_root, 'data', 'processed', 'ufc_etl.csv')

# Load the CSV into a DataFrame
try:
    ufc_df = pd.read_csv(file_path)
    print_header(f"Data successfully loaded: {ufc_df.shape[0]} rows, {ufc_df.shape[1]} columns.", color='bright_green')
except Exception as e:
    print_header(f"Error loading data: {e}", color='bright_red')

[92m╔════════════════════════════════════════════════════╗
║  Data successfully loaded: 6057 rows, 70 columns.  ║
╚════════════════════════════════════════════════════╝[0m


<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

## 3. Load Models

In [4]:
# ✅ Load all models into a dictionary
model_dict = {
    name: UFCModel(model=load_model(name, verbose=True))
    for name in pretty_model_name
}

# ✅ Print summary for each model
for name, ufc_model in model_dict.items():
    print(f"\n🛡️ Model: {name}")
    ufc_model.summary()

📦 Model Logistic Regression loaded from: /home/mlioi/ufc-predictor/models/lr_best.pkl
📦 Model Random Forest loaded from: /home/mlioi/ufc-predictor/models/rf_best.pkl
📦 Model Support Vector Machine loaded from: /home/mlioi/ufc-predictor/models/svm_best.pkl
📦 Model K-Nearest Neighbors loaded from: /home/mlioi/ufc-predictor/models/knn_best.pkl
📦 Model AdaBoost loaded from: /home/mlioi/ufc-predictor/models/ab_best.pkl
📦 Model Naive Bayes loaded from: /home/mlioi/ufc-predictor/models/nb_best.pkl
📦 Model Extra Trees loaded from: /home/mlioi/ufc-predictor/models/et_best.pkl
📦 Model Gradient Boosting loaded from: /home/mlioi/ufc-predictor/models/gb_best.pkl
📦 Model Quadratic Discriminant Analysis loaded from: /home/mlioi/ufc-predictor/models/qda_best.pkl
📦 Model Neural Network loaded from: /home/mlioi/ufc-predictor/models/nn_best.pkl
📦 Model XGBoost loaded from: /home/mlioi/ufc-predictor/models/xgb_best.pkl

🛡️ Model: lr_best
[94m╔══════════════════════════════╗
║  Model: Logistic Regression 

## 4. Cleaning

In [5]:
ufc_df = ufc_df[ufc_df['WeightClass'] != 'Catch Weight']

In [6]:
ufc_df['WeightClass'].unique()

array(['Welterweight', 'Featherweight', 'Flyweight', 'Light Heavyweight',
       'Bantamweight', 'Lightweight', "Women's Flyweight",
       "Women's Strawweight", 'Heavyweight', 'Middleweight',
       "Women's Bantamweight", "Women's Featherweight"], dtype=object)

In [7]:
# Diccionario de mapeo de WeightClass → WeightGroup
weight_class_map = {
    'Flyweight': 'Light',
    'Bantamweight': 'Light',
    'Featherweight': 'Light',
    'Lightweight': 'Light',
    'Welterweight': 'Medium',
    'Middleweight': 'Medium',
    'Light Heavyweight': 'Heavy',
    'Heavyweight': 'Heavy',
    "Women's Flyweight": 'Women',
    "Women's Strawweight": 'Women',
    "Women's Bantamweight": 'Women',
    "Women's Featherweight": 'Women',
}
# Crear nueva columna con el grupo
ufc_df['WeightGroupMap'] = ufc_df['WeightClass'].map(weight_class_map)

In [8]:
ufc_df.columns

Index(['RedFighter', 'BlueFighter', 'RedOdds', 'BlueOdds', 'RedExpectedValue',
       'BlueExpectedValue', 'Date', 'Location', 'Country', 'TitleBout',
       'WeightClass', 'Gender', 'NumberOfRounds', 'BlueCurrentLoseStreak',
       'BlueCurrentWinStreak', 'BlueDraws', 'BlueLongestWinStreak',
       'BlueLosses', 'BlueTotalRoundsFought', 'BlueTotalTitleBouts',
       'BlueWinsByDecisionMajority', 'BlueWinsByDecisionSplit',
       'BlueWinsByDecisionUnanimous', 'BlueWinsByKO', 'BlueWinsBySubmission',
       'BlueWinsByTKODoctorStoppage', 'BlueWins', 'BlueStance',
       'BlueHeightCms', 'BlueReachCms', 'BlueWeightLbs',
       'RedCurrentLoseStreak', 'RedCurrentWinStreak', 'RedDraws',
       'RedLongestWinStreak', 'RedLosses', 'RedTotalRoundsFought',
       'RedTotalTitleBouts', 'RedWinsByDecisionMajority',
       'RedWinsByDecisionSplit', 'RedWinsByDecisionUnanimous', 'RedWinsByKO',
       'RedWinsBySubmission', 'RedWinsByTKODoctorStoppage', 'RedWins',
       'RedStance', 'RedHeightCms'

In [9]:
ufc_df

Unnamed: 0,RedFighter,BlueFighter,RedOdds,BlueOdds,RedExpectedValue,BlueExpectedValue,Date,Location,Country,TitleBout,...,ReachDif,AgeDif,SigStrDif,AvgSubAttDif,AvgTDDif,BetterRank,Finish,FightStance,label,WeightGroupMap
0,Colby Covington,Joaquin Buckley,205.0,-250.0,205.0000,40.0000,2024-12-14,"Tampa, Florida, USA",USA,False,...,10.16,-6,0.2500,-0.2000,-1.8300,Red,KO/TKO,Open Stance,1,Medium
1,Cub Swanson,Billy Quarantillo,124.0,-148.0,124.0000,67.5676,2024-12-14,"Tampa, Florida, USA",USA,False,...,0.00,-5,2.6900,0.7000,0.2000,neither,KO/TKO,Closed Stance,0,Light
2,Manel Kape,Bruno Silva,-395.0,310.0,25.3165,310.0000,2024-12-14,"Tampa, Florida, USA",USA,False,...,-7.62,3,-1.1200,-0.2000,1.7200,Red,KO/TKO,Open Stance,0,Light
3,Vitor Petrino,Dustin Jacoby,-340.0,270.0,29.4118,270.0000,2024-12-14,"Tampa, Florida, USA",USA,False,...,-2.54,9,2.6800,-0.8000,-3.6200,neither,KO/TKO,Closed Stance,1,Heavy
4,Adrian Yanez,Daniel Marcos,185.0,-225.0,185.0000,44.4444,2024-12-14,"Tampa, Florida, USA",USA,False,...,-2.54,0,-0.5700,0.0000,0.2500,neither,S-DEC,Closed Stance,1,Light
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6052,Duane Ludwig,Darren Elkins,-155.0,135.0,64.5161,135.0000,2010-03-21,"Broomfield, Colorado, USA",USA,False,...,2.54,6,-13.6667,0.0000,0.0000,neither,KO/TKO,Closed Stance,1,Light
6053,John Howard,Daniel Roberts,-210.0,175.0,47.6190,175.0000,2010-03-21,"Broomfield, Colorado, USA",USA,False,...,7.62,-2,-18.0000,-1.0000,-4.6667,neither,KO/TKO,Open Stance,0,Medium
6054,Brendan Schaub,Chase Gormley,-260.0,220.0,38.4615,220.0000,2010-03-21,"Broomfield, Colorado, USA",USA,False,...,-2.12,0,-4.0000,1.0000,1.0000,neither,KO/TKO,Closed Stance,0,Heavy
6055,Mike Pierce,Julio Paulino,-420.0,335.0,23.8095,335.0000,2010-03-21,"Broomfield, Colorado, USA",USA,False,...,7.62,-5,-40.5000,0.0000,-3.5000,neither,U-DEC,Closed Stance,0,Medium


## 5. Create Fighters Database

In [34]:
import pandas as pd

# Ensure 'Date' is datetime
ufc_df['Date'] = pd.to_datetime(ufc_df['Date'])

# Filter columns
blue_columns = [col for col in ufc_df.columns if col.startswith('Blue') and col != 'BlueFighter']
red_columns  = [col for col in ufc_df.columns if col.startswith('Red') and col != 'RedFighter']

# Build separate DataFrames (add shared columns + label)
shared_columns = ['Date', 'Gender', 'WeightGroupMap', 'WeightClass', 'Location' ,'label']

ufc_blue = ufc_df[shared_columns + ['BlueFighter'] + blue_columns].copy()
ufc_red  = ufc_df[shared_columns + ['RedFighter'] + red_columns].copy()

# Rename columns
ufc_blue.columns = shared_columns + ['Fighter'] + [col.replace('Blue', '') for col in blue_columns]
ufc_red.columns  = shared_columns + ['Fighter'] + [col.replace('Red', '') for col in red_columns]

# Add Year and Corner
ufc_blue['Year'] = ufc_blue['Date'].dt.year
ufc_red['Year']  = ufc_red['Date'].dt.year

ufc_blue['Corner'] = 'Blue'
ufc_red['Corner']  = 'Red'

# Combine and sort chronologically (modern to old), Red first then Blue
fighters_df = pd.concat([ufc_red, ufc_blue], ignore_index=True)
fighters_df = fighters_df.sort_values(by=['Date', 'Corner'], ascending=[False, True]).reset_index(drop=True)

# Compute Win column with 'Yes' / 'No'
fighters_df['Win'] = ((fighters_df['Corner'] == 'Red') & (fighters_df['label'] == 0)) | \
                     ((fighters_df['Corner'] == 'Blue') & (fighters_df['label'] == 1))
fighters_df['Win'] = fighters_df['Win'].map({True: 'Yes', False: 'No'})
fighters_df[['Wins', 'Losses', 'Draws']] = fighters_df[['Wins', 'Losses', 'Draws']].fillna(0).astype(int)

# Create 'Record' column as string 'Wins-Losses-Draws'
fighters_df['Record'] = (
    fighters_df['Wins'].astype(str) + '-' +
    fighters_df['Losses'].astype(str) + '-' +
    fighters_df['Draws'].astype(str)
)

# Create 'WinRate' column
fighters_df['WinRate'] = fighters_df['Wins'] / (fighters_df['Wins'] + fighters_df['Losses'] + fighters_df['Draws'])
fighters_df['WinRate'] = (fighters_df['WinRate'] * 100).round(1).astype(str) + '%'

# Preview result
fighters_df[['Date', 'Fighter', 'Corner', 'Gender', 'WeightGroupMap', 'WeightClass', 'Year', 'Win', 'Record', 'WinRate']]

Unnamed: 0,Date,Fighter,Corner,Gender,WeightGroupMap,WeightClass,Year,Win,Record,WinRate
0,2024-12-14,Joaquin Buckley,Blue,MALE,Medium,Welterweight,2024,Yes,10-4-0,71.4%
1,2024-12-14,Billy Quarantillo,Blue,MALE,Light,Featherweight,2024,No,7-4-0,63.6%
2,2024-12-14,Bruno Silva,Blue,MALE,Light,Flyweight,2024,No,4-2-0,66.7%
3,2024-12-14,Dustin Jacoby,Blue,MALE,Heavy,Light Heavyweight,2024,Yes,8-6-1,53.3%
4,2024-12-14,Daniel Marcos,Blue,MALE,Light,Bantamweight,2024,Yes,4-0-0,100.0%
...,...,...,...,...,...,...,...,...,...,...
11997,2010-03-21,Duane Ludwig,Red,MALE,Light,Lightweight,2010,No,2-1-0,66.7%
11998,2010-03-21,John Howard,Red,MALE,Medium,Welterweight,2010,Yes,3-0-0,100.0%
11999,2010-03-21,Brendan Schaub,Red,MALE,Heavy,Heavyweight,2010,Yes,0-1-0,0.0%
12000,2010-03-21,Mike Pierce,Red,MALE,Medium,Welterweight,2010,Yes,1-1-0,50.0%


In [32]:
fighters_df.columns

Index(['Date', 'Gender', 'WeightGroupMap', 'WeightClass', 'Location', 'label',
       'Fighter', 'Odds', 'ExpectedValue', 'CurrentLoseStreak',
       'CurrentWinStreak', 'Draws', 'LongestWinStreak', 'Losses',
       'TotalRoundsFought', 'TotalTitleBouts', 'WinsByDecisionMajority',
       'WinsByDecisionSplit', 'WinsByDecisionUnanimous', 'WinsByKO',
       'WinsBySubmission', 'WinsByTKODoctorStoppage', 'Wins', 'Stance',
       'HeightCms', 'ReachCms', 'WeightLbs', 'Age', 'Year', 'Corner', 'Win',
       'Record'],
      dtype='object')

In [36]:
fighters_df['Fighter'].unique()

array(['Joaquin Buckley', 'Billy Quarantillo', 'Bruno Silva', ...,
       'Paul Buentello', 'Shannon Gugerty', 'Chase Gormley'],
      shape=(2007,), dtype=object)