# 🔹 UFC Model Experiments

<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

## 1. Import Libraries and Setup Environment

In [1]:
# Import necessary libraries
import os
import sys
import pandas as pd
import numpy as np

# Get the current working directory
current_dir = os.getcwd()

# Navigate to the project root
project_root = os.path.abspath(os.path.join(current_dir, '..'))

# Import from /src
sys.path.append(os.path.join(project_root, 'src'))
from utils.metrics import *
from models.model_factory import *
from models.config import *
from utils.io_model import *
from utils.helpers import *

# Define the path to the CSV file
train_path = os.path.join(project_root, 'data', 'processed', 'ufc_train.csv')
test_path = os.path.join(project_root, 'data', 'processed', 'ufc_test.csv')

<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

## 2. Load Data

In [2]:
# Load the CSV into a DataFrame
try:
    ufc_train = pd.read_csv(train_path)
    print_header(f"Data Training successfully loaded: {ufc_train.shape[0]} rows, {ufc_train.shape[1]} columns.", color='bright_green')
except Exception as e:
    print_header(f"Error loading training data: {e}", color='bright_red')

[92m╔═════════════════════════════════════════════════════════════╗
║  Data Training successfully loaded: 5232 rows, 47 columns.  ║
╚═════════════════════════════════════════════════════════════╝[0m


In [3]:
try:
    ufc_test = pd.read_csv(test_path)
    print_header(f"Data Testing successfully loaded: {ufc_test.shape[0]} rows, {ufc_test.shape[1]} columns.", color='bright_green')
except Exception as e:
    print_header(f"Error loading testing data: {e}", color='bright_red')

[92m╔════════════════════════════════════════════════════════════╗
║  Data Testing successfully loaded: 1309 rows, 47 columns.  ║
╚════════════════════════════════════════════════════════════╝[0m


<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

## 3. Load Models

In [4]:
models_dict = {pretty_model_names[name]: load_model(name) for name in list(pretty_model_names.keys())}

📦 Model Logistic Regression loaded from: /home/mfourier/ufc-predictor/models/lr_best.pkl
📦 Model Random Forest loaded from: /home/mfourier/ufc-predictor/models/rf_best.pkl
📦 Model Support Vector Machine loaded from: /home/mfourier/ufc-predictor/models/svm_best.pkl
📦 Model K-Nearest Neighbors loaded from: /home/mfourier/ufc-predictor/models/knn_best.pkl
📦 Model AdaBoost loaded from: /home/mfourier/ufc-predictor/models/ab_best.pkl
📦 Model Naive Bayes loaded from: /home/mfourier/ufc-predictor/models/nb_best.pkl
📦 Model Extra Trees loaded from: /home/mfourier/ufc-predictor/models/et_best.pkl
📦 Model Gradient Boosting loaded from: /home/mfourier/ufc-predictor/models/gb_best.pkl
📦 Model Quadratic Discriminant Analysis loaded from: /home/mfourier/ufc-predictor/models/qda_best.pkl


<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

## 4. Evaluate Models

### Best Parameters Dictionary

In [6]:
parameters = parameters_dict(models_dict)

In [7]:
parameters

{'Logistic Regression': {'C': 1, 'solver': 'liblinear'},
 'Random Forest': {'max_depth': 10, 'n_estimators': 100},
 'Support Vector Machine': {'C': 1, 'gamma': 'auto', 'kernel': 'rbf'},
 'K-Nearest Neighbors': {'metric': 'euclidean',
  'n_neighbors': 7,
  'weights': 'distance'},
 'AdaBoost': {'learning_rate': 1.0, 'n_estimators': 100},
 'Naive Bayes': {'var_smoothing': 1e-06},
 'Extra Trees': {'max_depth': 10, 'n_estimators': 100},
 'Gradient Boosting': {'learning_rate': 0.1,
  'max_depth': 3,
  'n_estimators': 50},
 'Quadratic Discriminant Analysis': {'reg_param': 1}}

### Metrics Comparison

In [8]:
model_metrics = compare_metrics(models_dict, ufc_test)

[92m╔══════════════════════════════════════╗
║  Model Performance Metrics Computed  ║
╚══════════════════════════════════════╝[0m


In [9]:
model_metrics

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Logistic Regression,0.62796,0.611307,0.314545,0.415366
Random Forest,0.608862,0.589623,0.227273,0.328084
Support Vector Machine,0.61039,0.596154,0.225455,0.327177
K-Nearest Neighbors,0.546218,0.452174,0.378182,0.411881
AdaBoost,0.607334,0.555901,0.325455,0.41055
Naive Bayes,0.598167,0.52765,0.416364,0.465447
Extra Trees,0.595111,0.635135,0.085455,0.150641
Gradient Boosting,0.617265,0.613953,0.24,0.345098
Quadratic Discriminant Analysis,0.591291,0.517007,0.414545,0.460141


### Best Model per Metric

In [10]:
best_models_by_metric = best_model_per_metric(model_metrics)

In [22]:
best_models_df = pd.DataFrame(list(best_models_by_metric.items()), columns=['Metric', 'Best Model'], index= [1,2,3,4])
best_models_df

Unnamed: 0,Metric,Best Model
1,Accuracy,Logistic Regression
2,Precision,Extra Trees
3,Recall,Naive Bayes
4,F1 Score,Naive Bayes


<div style="text-align: center;">
     <img src="../img/ufc_logo.png" width="800" /> 
</div>