# 🔹UFC Feature Engineering

## 1. Import Libraries and Setup Environment

In [35]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Get the current working directory
current_dir = os.getcwd()

# Navigate to the project root
project_root = os.path.abspath(os.path.join(current_dir, '..'))

# Import from /src
sys.path.append(os.path.join(project_root, 'src'))
from utils.helpers import get_predictions

## 2. Load Data

In [43]:
# Define the path to the CSV file
file_path = os.path.join(project_root, 'data', 'processed', 'ufc_etl.csv')

# Load the CSV into a DataFrame
ufc_data = pd.read_csv(file_path)
print(f"Data successfully loaded: {ufc_data.shape[0]} rows, {ufc_data.shape[1]} columns.")

Data successfully loaded: 6541 rows, 60 columns.


In [44]:
ufc_data['label'] = ufc_data['Winner'].apply(lambda x: 1 if x == 'Blue' else 0)
ufc_data=ufc_data.drop('Winner', axis=1)
ufc_data = pd.get_dummies(ufc_data, columns=['TitleBout', 'Gender'], drop_first=True)


In [45]:
#categorical_cols = ['RedFighter', 'BlueFighter', 'WeightClass', 'BetterRank']
#binary_cols = ['TitleBout_True', 'Gender_MALE', 'label']

#for col in categorical_cols:
    #ufc_raw[col] = ufc_raw[col].astype('category')

#for col in binary_cols:
    #ufc_raw[col] = ufc_raw[col].astype(int)  # asegúrate de que son 0/1

In [46]:
# Preview the first few records
display(ufc_data.head())
display(ufc_data.columns)
# Para ver los tipos de dato de cada columna:
display(ufc_data.dtypes)

Unnamed: 0,RedFighter,BlueFighter,NumberOfRounds,BlueCurrentLoseStreak,BlueCurrentWinStreak,BlueDraws,BlueLongestWinStreak,BlueLosses,BlueTotalRoundsFought,BlueTotalTitleBouts,...,HeightDif,ReachDif,AgeDif,SigStrDif,AvgSubAttDif,AvgTDDif,BetterRank,label,TitleBout_True,Gender_MALE
0,Colby Covington,Joaquin Buckley,5,0,5,0,5,4,34,0,...,-2.54,10.16,-6,0.25,-0.2,-1.83,Red,1,False,True
1,Cub Swanson,Billy Quarantillo,3,1,0,0,4,4,28,0,...,5.08,0.0,-5,2.69,0.7,0.2,neither,0,False,True
2,Manel Kape,Bruno Silva,3,0,4,0,4,2,16,0,...,-2.54,-7.62,3,-1.12,-0.2,1.72,Red,0,False,True
3,Vitor Petrino,Dustin Jacoby,3,2,0,1,4,6,35,0,...,2.54,-2.54,9,2.68,-0.8,-3.62,neither,1,False,True
4,Adrian Yanez,Daniel Marcos,3,0,4,0,4,0,13,0,...,0.0,-2.54,0,-0.57,0.0,0.25,neither,1,False,True


Index(['RedFighter', 'BlueFighter', 'NumberOfRounds', 'BlueCurrentLoseStreak',
       'BlueCurrentWinStreak', 'BlueDraws', 'BlueLongestWinStreak',
       'BlueLosses', 'BlueTotalRoundsFought', 'BlueTotalTitleBouts',
       'BlueWinsByDecisionMajority', 'BlueWinsByDecisionSplit',
       'BlueWinsByDecisionUnanimous', 'BlueWinsByKO', 'BlueWinsBySubmission',
       'BlueWinsByTKODoctorStoppage', 'BlueWins', 'BlueStance',
       'BlueHeightCms', 'BlueReachCms', 'BlueWeightLbs',
       'RedCurrentLoseStreak', 'RedCurrentWinStreak', 'RedDraws',
       'RedLongestWinStreak', 'RedLosses', 'RedTotalRoundsFought',
       'RedTotalTitleBouts', 'RedWinsByDecisionMajority',
       'RedWinsByDecisionSplit', 'RedWinsByDecisionUnanimous', 'RedWinsByKO',
       'RedWinsBySubmission', 'RedWinsByTKODoctorStoppage', 'RedWins',
       'RedStance', 'RedHeightCms', 'RedReachCms', 'RedWeightLbs', 'RedAge',
       'BlueAge', 'LoseStreakDif', 'WinStreakDif', 'LongestWinStreakDif',
       'WinDif', 'LossDif', 'T

RedFighter                      object
BlueFighter                     object
NumberOfRounds                   int64
BlueCurrentLoseStreak            int64
BlueCurrentWinStreak             int64
BlueDraws                        int64
BlueLongestWinStreak             int64
BlueLosses                       int64
BlueTotalRoundsFought            int64
BlueTotalTitleBouts              int64
BlueWinsByDecisionMajority       int64
BlueWinsByDecisionSplit          int64
BlueWinsByDecisionUnanimous      int64
BlueWinsByKO                     int64
BlueWinsBySubmission             int64
BlueWinsByTKODoctorStoppage      int64
BlueWins                         int64
BlueStance                      object
BlueHeightCms                  float64
BlueReachCms                   float64
BlueWeightLbs                    int64
RedCurrentLoseStreak             int64
RedCurrentWinStreak              int64
RedDraws                         int64
RedLongestWinStreak              int64
RedLosses                

In [49]:
ufc_data=ufc_data[['LossDif', 'TotalRoundDif', 'TotalTitleBoutDif', 'KODif', 'SubDif', 'HeightDif',
       'ReachDif', 'AgeDif', 'SigStrDif', 'AvgSubAttDif', 'AvgTDDif','label']]

display(ufc_data.head())

Unnamed: 0,LossDif,TotalRoundDif,TotalTitleBoutDif,KODif,SubDif,HeightDif,ReachDif,AgeDif,SigStrDif,AvgSubAttDif,AvgTDDif,label
0,0,-24,-4,4,-2,-2.54,10.16,-6,0.25,-0.2,-1.83,1
1,-9,-54,0,-2,-1,5.08,0.0,-5,2.69,0.7,0.2,0
2,-1,-1,0,1,1,-2.54,-7.62,3,-1.12,-0.2,1.72,0
3,5,21,0,2,-1,2.54,-2.54,9,2.68,-0.8,-3.62,1
4,-2,-2,0,-5,0,0.0,-2.54,0,-0.57,0.0,0.25,1


In [48]:
# Save the cleaned file
ufc_data.to_csv(f'{project_root}/data/processed/ufc_processed.csv', index=False)
print("\nFeature Engineering file saved as 'ufc_processed.csv'.")


Feature Engineering file saved as 'ufc_processed.csv'.
