# 🔹 UFC Fight Predictor Model Training

<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

#  Import Libraries and Setup Environment

In [1]:
# Import necessary libraries
import os
import sys
import pandas as pd

pd.set_option('display.max_colwidth', 200) 

# Get the current working directory
current_dir = os.getcwd()

# Navigate to the project root
project_root = os.path.abspath(os.path.join(current_dir, '..'))

# Import from /src
sys.path.append(os.path.join(project_root))
from src.metrics import *
from src.model_factory import *
from src.io_model import *
from src.helpers import *
from src.config import *

<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

#  Load Data

In [2]:
# Load UFCData
try:
    ufc_data = load_data(name='ufc_data')
    ufc_data_no_odds = load_data(name='ufc_data_no_odds')
    logger.info("✅ UFCData objects loaded successfully.")
except Exception as e:
    logger.error(f"❌ Error loading training data: {e}")

INFO:src.io_model:📦 UFCData object loaded from: /home/mlioi/ufc-predictor/data/processed/ufc_data.pkl
INFO:src.io_model:📦 UFCData object loaded from: /home/mlioi/ufc-predictor/data/processed/ufc_data_no_odds.pkl
INFO:src.helpers:✅ UFCData objects loaded successfully.


In [3]:
ufc_data

📊 UFC Dataset Summary
----------------------------------------
🧪 Total samples      : 6001
🧪 Train/Test split  : 4800 / 1201
🧪 Total features     : 28

🔢 Numerical features : 25
🔠 Categorical features: 3
    - Binary          : 2
    - Multiclass      : 1

🏷 Label distribution (raw):
   - Class 0: 3484 (58.1%)
   - Class 1: 2517 (41.9%)

✅ No missing values detected

📈 Feature summary statistics (train set):
                        mean      std      min       max
BlueTotalTitleBouts    0.260    1.111     0.00    16.000
RedTotalTitleBouts     0.572    1.574     0.00    16.000
LoseStreakDif          0.059    1.012    -6.00     6.000
WinStreakDif          -0.162    1.921   -18.00    10.000
LongestWinStreakDif   -0.772    2.053   -12.00    14.000
KODif                 -0.536    2.181   -21.00    14.000
SubDif                -0.334    1.877   -15.00    10.000
HeightDif             -0.030    6.286   -33.02    30.480
ReachDif              -0.260    8.266   -33.02    30.480
AgeDif            

In [4]:
ufc_data_no_odds

📊 UFC Dataset Summary
----------------------------------------
🧪 Total samples      : 6001
🧪 Train/Test split  : 4800 / 1201
🧪 Total features     : 27

🔢 Numerical features : 24
🔠 Categorical features: 3
    - Binary          : 2
    - Multiclass      : 1

🏷 Label distribution (raw):
   - Class 0: 3484 (58.1%)
   - Class 1: 2517 (41.9%)

✅ No missing values detected

📈 Feature summary statistics (train set):
                      mean     std     min      max
BlueTotalTitleBouts  0.260   1.111    0.00   16.000
RedTotalTitleBouts   0.572   1.574    0.00   16.000
LoseStreakDif        0.059   1.012   -6.00    6.000
WinStreakDif        -0.162   1.921  -18.00   10.000
LongestWinStreakDif -0.772   2.053  -12.00   14.000
KODif               -0.536   2.181  -21.00   14.000
SubDif              -0.334   1.877  -15.00   10.000
HeightDif           -0.030   6.286  -33.02   30.480
ReachDif            -0.260   8.266  -33.02   30.480
AgeDif              -0.611   5.181  -17.00   16.000
SigStrDif       

<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

# 🔧 Hyperparameters Tuning 🔧

In [5]:
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score

hyperparameter_tuning = True

if hyperparameter_tuning:
    logger.info("💡 Hyperparameters Tuning is Activated")

    model_params = {
    "Support Vector Machine": (
        SVC(probability=True),
        {'C': [0.01, 0.1, 1], 'kernel': ['linear'], 'gamma': ['auto']}
    ),
    "Random Forest": (
        RandomForestClassifier(),
        {'n_estimators': [10, 50, 100], 'max_depth': [5, 10, 100]}
    ),
    "Logistic Regression": (
        LogisticRegression(),
        {'C': [0.001, 0.001, 0.01, 0.05, 0.1, 1, 2, 5, 7, 8, 8.5, 9, 10, 20, 40, 80], 'solver': ['liblinear'], 'penalty': ['l1','l2']}
    ),
    "K-Nearest Neighbors": (
        KNeighborsClassifier(),
        {'n_neighbors': [3, 6, 9, 12, 15], 'weights': ['uniform', 'distance'], 'metric': ['euclidean', 'manhattan']}
    ),
    "AdaBoost": (
        AdaBoostClassifier(),
        {'n_estimators': [10, 50, 100], 'learning_rate': [0.01, 0.1, 1.0]}
    ),
    "Naive Bayes": (
        GaussianNB(),
        {'var_smoothing': [1e-8, 1e-7, 1e-6, 1e-5]}
    ),
    "Gradient Boosting": (
        GradientBoostingClassifier(),
        {'n_estimators': [100, 200], 'learning_rate': [0.01, 0.1, 1.0], 'max_depth': [3, 5, 7]}
    ),
    "Extra Trees": (
        ExtraTreesClassifier(),
        {'n_estimators': [50, 100], 'max_depth': [None, 10, 20]}
    ),
    "Quadratic Discriminant Analysis": (
        QuadraticDiscriminantAnalysis(),
        {'reg_param': [0.1, 0.3, 0.5, 0.9]}
    ),
    "Neural Network": (
        MLPClassifier(max_iter=200, random_state=42),
        {
        # Architecture: number and size of hidden layers
        'hidden_layer_sizes': [
            (200,), (50, 50),
        ],
        # Activation function for hidden layers
        'activation': ['relu', 'logistic'],
        # Optimizer for gradient descent
        'solver': ['adam', 'sgd'],
        # L2 regularization strength (higher alpha reduces overfitting but can cause underfitting)
        'alpha': [0.001, 0.01],
        # Learning rate schedule
        'learning_rate': ['adaptive'],
        # Initial learning rate (for adam and sgd solvers)
        'learning_rate_init': [0.01],
        # Early stopping based on validation performance to prevent overfitting
        'early_stopping': [True],
        # Mini-batch size for training with adam/sgd
        'batch_size': [32, 64],
        # Momentum for SGD (controls contribution of previous updates)
        'momentum': [0.2, 0.8],
        # Proportion of training set used as validation for early stopping
        'validation_fraction': [0.2]
        }
    ),
    "XGBoost": (
            XGBClassifier(eval_metric='logloss'),
            {
                'n_estimators': [50, 150],
                'learning_rate': [0.01, 0.1],
                'max_depth': [3, 5, 7],
                'subsample': [0.8, 1.0],
                'colsample_bytree': [0.8, 1.0]
            }
        )
}
    display_model_params_table(model_params)
else:
    logger.info("💡 Hyperparameters Tuning is Deactivated")
    model_params = None # Default Settings

INFO:src.helpers:💡 Hyperparameters Tuning is Activated


Unnamed: 0,Model,Hyperparameters
0,Support Vector Machine,"C: [0.01, 0.1, 1]; kernel: ['linear']; gamma: ['auto']"
1,Random Forest,"n_estimators: [10, 50, 100]; max_depth: [5, 10, 100]"
2,Logistic Regression,"C: [0.001, 0.001, 0.01, 0.05, 0.1, 1, 2, 5, 7, 8, 8.5, 9, 10, 20, 40, 80]; solver: ['liblinear']; penalty: ['l1', 'l2']"
3,K-Nearest Neighbors,"n_neighbors: [3, 6, 9, 12, 15]; weights: ['uniform', 'distance']; metric: ['euclidean', 'manhattan']"
4,AdaBoost,"n_estimators: [10, 50, 100]; learning_rate: [0.01, 0.1, 1.0]"
5,Naive Bayes,"var_smoothing: [1e-08, 1e-07, 1e-06, 1e-05]"
6,Gradient Boosting,"n_estimators: [100, 200]; learning_rate: [0.01, 0.1, 1.0]; max_depth: [3, 5, 7]"
7,Extra Trees,"n_estimators: [50, 100]; max_depth: [None, 10, 20]"
8,Quadratic Discriminant Analysis,"reg_param: [0.1, 0.3, 0.5, 0.9]"
9,Neural Network,"hidden_layer_sizes: [(200,), (50, 50)]; activation: ['relu', 'logistic']; solver: ['adam', 'sgd']; alpha: [0.001, 0.01]; learning_rate: ['adaptive']; learning_rate_init: [0.01]; early_stopping: [T..."


# Notes
- For SVM, kernel='linear' has better performance.
- For Logistic Regression, performs well for lower values of c
- Neural network with shape (50x50 works great)
- SVM has a more rich feature importance results with odds
- SVM Linear without odds turns out to be a dummy model
- SVM RBF is better for no odds model

<div style="text-align: center;">
  🔹 <img src="../img/ufc_logo.png" width="50" /> 🔹
</div>

# 🔹 UFC Machine Learning Training (with Odds)

##  KNN Model Training

### 🚀 KNN Training 

In [88]:
model_name = 'K-Nearest Neighbors'
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[K-Nearest Neighbors] UFC GridSearchCV Training (F1_macro)
INFO:root:[K-Nearest Neighbors] 🤖 Training...


Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.549 total time=   0.1s
[CV 2/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.543 total time=   0.1s
[CV 3/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.587 total time=   0.1s
[CV 4/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.556 total time=   0.1s
[CV 5/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.576 total time=   0.1s
[CV 1/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.548 total time=   0.0s
[CV 2/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.545 total time=   0.0s
[CV 3/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.588 total time=   0.0s
[CV 4/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.555 total time=   0.0s
[CV 5/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.575 to

INFO:root:[K-Nearest Neighbors] 🔍 Best F1_macro: 0.5844
[K-Nearest Neighbors] 🔍 Best Params: {'metric': 'manhattan', 'n_neighbors': 12, 'weights': 'distance'}


### 🔍 KNN Metrics

In [89]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [90]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [91]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'K-Nearest Neighbors' saved to: /home/mlioi/ufc-predictor/models/knn_best.pkl


## Support Vector Machine Model 

### 🚀 Support Vector Machine Training 

In [14]:
model_name = 'Support Vector Machine'
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Support Vector Machine] UFC GridSearchCV Training (F1_macro)
INFO:root:[Support Vector Machine] 🤖 Training...


Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV 1/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.661 total time=   3.0s
[CV 2/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.647 total time=   2.8s
[CV 3/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.644 total time=   2.8s
[CV 4/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.652 total time=   2.8s
[CV 5/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.635 total time=   2.8s
[CV 1/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.664 total time=   3.4s
[CV 2/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.649 total time=   3.3s
[CV 3/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.645 total time=   3.3s
[CV 4/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.656 total time=   3.3s
[CV 5/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.636 total time=   3.2s
[CV 1/5] END ....C=1, gamma=auto, kernel=linear;, score=0.664 total time=   6.1s
[CV 2/5] END ....C=1, gamma=auto, kernel=linear;,

INFO:root:[Support Vector Machine] 🔍 Best F1_macro: 0.6506
[Support Vector Machine] 🔍 Best Params: {'C': 1, 'gamma': 'auto', 'kernel': 'linear'}


### 🔍 Support Vector Machine Metrics

In [15]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [16]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [17]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'Support Vector Machine' saved to: /home/mlioi/ufc-predictor/models/svm_best.pkl


## Logistic Regression Model

### 🚀 Logistic Regression Training

In [8]:
model_name = 'Logistic Regression'
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Logistic Regression] UFC GridSearchCV Training (F1_macro)
INFO:root:[Logistic Regression] 🤖 Training...


Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV 1/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.368 total time=   0.0s
[CV 2/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.368 total time=   0.0s
[CV 3/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.367 total time=   0.0s
[CV 4/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.367 total time=   0.0s
[CV 5/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.367 total time=   0.0s
[CV 1/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.625 total time=   0.0s
[CV 2/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.618 total time=   0.0s
[CV 3/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.655 total time=   0.0s
[CV 4/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.643 total time=   0.0s
[CV 5/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.641 total time=   0.0s
[CV 1/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.368 total time=   0.0s
[CV 2/5] END C

INFO:root:[Logistic Regression] 🔍 Best F1_macro: 0.6440
[Logistic Regression] 🔍 Best Params: {'C': 0.01, 'penalty': 'l1', 'solver': 'liblinear'}


### 🔍 Logistic Regression Metrics

In [9]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [10]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [11]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'Logistic Regression' saved to: /home/mlioi/ufc-predictor/models/lr_best.pkl


## Random Forest Model

### 🚀 Random Forest Training

In [12]:
model_name = 'Random Forest'
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Random Forest] UFC GridSearchCV Training (F1_macro)
INFO:root:[Random Forest] 🤖 Training...


Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ......max_depth=5, n_estimators=10;, score=0.580 total time=   0.0s
[CV 2/5] END ......max_depth=5, n_estimators=10;, score=0.608 total time=   0.0s
[CV 3/5] END ......max_depth=5, n_estimators=10;, score=0.592 total time=   0.0s
[CV 4/5] END ......max_depth=5, n_estimators=10;, score=0.583 total time=   0.0s
[CV 5/5] END ......max_depth=5, n_estimators=10;, score=0.614 total time=   0.0s
[CV 1/5] END ......max_depth=5, n_estimators=50;, score=0.617 total time=   0.2s
[CV 2/5] END ......max_depth=5, n_estimators=50;, score=0.622 total time=   0.2s
[CV 3/5] END ......max_depth=5, n_estimators=50;, score=0.657 total time=   0.2s
[CV 4/5] END ......max_depth=5, n_estimators=50;, score=0.638 total time=   0.2s
[CV 5/5] END ......max_depth=5, n_estimators=50;, score=0.616 total time=   0.2s
[CV 1/5] END .....max_depth=5, n_estimators=100;, score=0.601 total time=   0.4s
[CV 2/5] END .....max_depth=5, n_estimators=100;,

INFO:root:[Random Forest] 🔍 Best F1_macro: 0.6374
[Random Forest] 🔍 Best Params: {'max_depth': 10, 'n_estimators': 50}


### 🔍 Random Forest Metrics

In [13]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [14]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [15]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'Random Forest' saved to: /home/mlioi/ufc-predictor/models/rf_best.pkl


##  AdaBoost Model

### 🚀 AdaBoost Training

In [16]:
model_name = 'AdaBoost'
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[AdaBoost] UFC GridSearchCV Training (F1_macro)
INFO:root:[AdaBoost] 🤖 Training...


Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END learning_rate=0.01, n_estimators=10;, score=0.619 total time=   0.1s
[CV 2/5] END learning_rate=0.01, n_estimators=10;, score=0.619 total time=   0.1s
[CV 3/5] END learning_rate=0.01, n_estimators=10;, score=0.648 total time=   0.1s
[CV 4/5] END learning_rate=0.01, n_estimators=10;, score=0.637 total time=   0.1s
[CV 5/5] END learning_rate=0.01, n_estimators=10;, score=0.610 total time=   0.1s
[CV 1/5] END learning_rate=0.01, n_estimators=50;, score=0.619 total time=   0.3s
[CV 2/5] END learning_rate=0.01, n_estimators=50;, score=0.619 total time=   0.3s
[CV 3/5] END learning_rate=0.01, n_estimators=50;, score=0.648 total time=   0.3s
[CV 4/5] END learning_rate=0.01, n_estimators=50;, score=0.637 total time=   0.3s
[CV 5/5] END learning_rate=0.01, n_estimators=50;, score=0.610 total time=   0.3s
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.619 total time=   0.6s
[CV 2/5] END learning_rate=0.01, n_es

INFO:root:[AdaBoost] 🔍 Best F1_macro: 0.6356
[AdaBoost] 🔍 Best Params: {'learning_rate': 0.01, 'n_estimators': 100}


### 🔍 AdaBoost Metrics

In [17]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [18]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [19]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'AdaBoost' saved to: /home/mlioi/ufc-predictor/models/ab_best.pkl


## Naive Bayes Model

### 🚀 Naive Bayes Training

In [20]:
model_name = 'Naive Bayes'
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Naive Bayes] UFC GridSearchCV Training (F1_macro)
INFO:root:[Naive Bayes] 🤖 Training...


Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END ...............var_smoothing=1e-08;, score=0.630 total time=   0.0s
[CV 2/5] END ...............var_smoothing=1e-08;, score=0.630 total time=   0.0s
[CV 3/5] END ...............var_smoothing=1e-08;, score=0.629 total time=   0.0s
[CV 4/5] END ...............var_smoothing=1e-08;, score=0.593 total time=   0.0s
[CV 5/5] END ...............var_smoothing=1e-08;, score=0.608 total time=   0.0s
[CV 1/5] END ...............var_smoothing=1e-07;, score=0.630 total time=   0.0s
[CV 2/5] END ...............var_smoothing=1e-07;, score=0.630 total time=   0.0s
[CV 3/5] END ...............var_smoothing=1e-07;, score=0.629 total time=   0.0s
[CV 4/5] END ...............var_smoothing=1e-07;, score=0.593 total time=   0.0s
[CV 5/5] END ...............var_smoothing=1e-07;, score=0.608 total time=   0.0s
[CV 1/5] END ...............var_smoothing=1e-06;, score=0.630 total time=   0.0s
[CV 2/5] END ...............var_smoothing=1e-06;,

INFO:root:[Naive Bayes] 🔍 Best F1_macro: 0.6183
[Naive Bayes] 🔍 Best Params: {'var_smoothing': 1e-05}


### 🔍 Naive Bayes Metrics

In [21]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [22]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [23]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'Naive Bayes' saved to: /home/mlioi/ufc-predictor/models/nb_best.pkl


## Gradient Boosting Model

### 🚀 Gradient Boosting Training

In [24]:
model_name = "Gradient Boosting"
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Gradient Boosting] UFC GridSearchCV Training (F1_macro)
INFO:root:[Gradient Boosting] 🤖 Training...


Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.603 total time=   1.2s
[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.594 total time=   1.1s
[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.655 total time=   1.1s
[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.623 total time=   1.1s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.623 total time=   1.1s
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.611 total time=   2.3s
[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.617 total time=   2.3s
[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.679 total time=   2.3s
[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.644 total time=   2.3s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.62

INFO:root:[Gradient Boosting] 🔍 Best F1_macro: 0.6385
[Gradient Boosting] 🔍 Best Params: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}


### 🔍 Gradient Boosting Metrics

In [25]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [26]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [27]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'Gradient Boosting' saved to: /home/mlioi/ufc-predictor/models/gb_best.pkl


## Extra Trees Model

### 🚀 Extra Trees Training

In [28]:
model_name = "Extra Trees"
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Extra Trees] UFC GridSearchCV Training (F1_macro)
INFO:root:[Extra Trees] 🤖 Training...


Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV 1/5] END ...max_depth=None, n_estimators=50;, score=0.587 total time=   0.3s
[CV 2/5] END ...max_depth=None, n_estimators=50;, score=0.571 total time=   0.3s
[CV 3/5] END ...max_depth=None, n_estimators=50;, score=0.644 total time=   0.3s
[CV 4/5] END ...max_depth=None, n_estimators=50;, score=0.594 total time=   0.3s
[CV 5/5] END ...max_depth=None, n_estimators=50;, score=0.588 total time=   0.3s
[CV 1/5] END ..max_depth=None, n_estimators=100;, score=0.602 total time=   0.6s
[CV 2/5] END ..max_depth=None, n_estimators=100;, score=0.586 total time=   0.6s
[CV 3/5] END ..max_depth=None, n_estimators=100;, score=0.634 total time=   0.6s
[CV 4/5] END ..max_depth=None, n_estimators=100;, score=0.616 total time=   0.6s
[CV 5/5] END ..max_depth=None, n_estimators=100;, score=0.610 total time=   0.6s
[CV 1/5] END .....max_depth=10, n_estimators=50;, score=0.570 total time=   0.1s
[CV 2/5] END .....max_depth=10, n_estimators=50;,

INFO:root:[Extra Trees] 🔍 Best F1_macro: 0.6097
[Extra Trees] 🔍 Best Params: {'max_depth': None, 'n_estimators': 100}


### 🔍 Extra Trees Metrics

In [29]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [30]:
log_training_result(model_name, model.best_params_, metrics, duration)

  df = pd.concat([df, pd.DataFrame([log_entry])], ignore_index=True)
INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [31]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'Extra Trees' saved to: /home/mlioi/ufc-predictor/models/et_best.pkl


## Quadratic Discriminant Analysis Model

### 🚀 Quadratic Discriminant Analysis Training

In [32]:
model_name = "Quadratic Discriminant Analysis"
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Quadratic Discriminant Analysis] UFC GridSearchCV Training (F1_macro)
INFO:root:[Quadratic Discriminant Analysis] 🤖 Training...


Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END .....................reg_param=0.1;, score=0.621 total time=   0.0s
[CV 2/5] END .....................reg_param=0.1;, score=0.612 total time=   0.0s
[CV 3/5] END .....................reg_param=0.1;, score=0.638 total time=   0.0s
[CV 4/5] END .....................reg_param=0.1;, score=0.613 total time=   0.0s
[CV 5/5] END .....................reg_param=0.1;, score=0.612 total time=   0.0s
[CV 1/5] END .....................reg_param=0.3;, score=0.621 total time=   0.0s
[CV 2/5] END .....................reg_param=0.3;, score=0.628 total time=   0.0s
[CV 3/5] END .....................reg_param=0.3;, score=0.619 total time=   0.0s
[CV 4/5] END .....................reg_param=0.3;, score=0.610 total time=   0.0s
[CV 5/5] END .....................reg_param=0.3;, score=0.631 total time=   0.0s
[CV 1/5] END .....................reg_param=0.5;, score=0.629 total time=   0.0s
[CV 2/5] END .....................reg_param=0.5;,

INFO:root:[Quadratic Discriminant Analysis] 🔍 Best F1_macro: 0.6300
[Quadratic Discriminant Analysis] 🔍 Best Params: {'reg_param': 0.9}


### 🔍 Quadratic Discriminant Analysis Metrics

In [33]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [34]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [35]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'Quadratic Discriminant Analysis' saved to: /home/mlioi/ufc-predictor/models/qda_best.pkl


## Neural Network

### 🚀 Neural Network Training 

In [36]:
model_name = 'Neural Network'
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Neural Network] UFC GridSearchCV Training (F1_macro)
INFO:root:[Neural Network] 🤖 Training...


Fitting 5 folds for each of 64 candidates, totalling 320 fits
[CV 1/5] END activation=relu, alpha=0.001, batch_size=32, early_stopping=True, hidden_layer_sizes=(200,), learning_rate=adaptive, learning_rate_init=0.01, momentum=0.2, solver=adam, validation_fraction=0.2;, score=0.633 total time=   1.0s
[CV 2/5] END activation=relu, alpha=0.001, batch_size=32, early_stopping=True, hidden_layer_sizes=(200,), learning_rate=adaptive, learning_rate_init=0.01, momentum=0.2, solver=adam, validation_fraction=0.2;, score=0.630 total time=   0.6s
[CV 3/5] END activation=relu, alpha=0.001, batch_size=32, early_stopping=True, hidden_layer_sizes=(200,), learning_rate=adaptive, learning_rate_init=0.01, momentum=0.2, solver=adam, validation_fraction=0.2;, score=0.635 total time=   0.8s
[CV 4/5] END activation=relu, alpha=0.001, batch_size=32, early_stopping=True, hidden_layer_sizes=(200,), learning_rate=adaptive, learning_rate_init=0.01, momentum=0.2, solver=adam, validation_fraction=0.2;, score=0.638 t

INFO:root:[Neural Network] 🔍 Best F1_macro: 0.6446
[Neural Network] 🔍 Best Params: {'activation': 'logistic', 'alpha': 0.001, 'batch_size': 32, 'early_stopping': True, 'hidden_layer_sizes': (50, 50), 'learning_rate': 'adaptive', 'learning_rate_init': 0.01, 'momentum': 0.2, 'solver': 'adam', 'validation_fraction': 0.2}


### 🔍 Neural Network Metrics

In [37]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [38]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [39]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'Neural Network' saved to: /home/mlioi/ufc-predictor/models/nn_best.pkl


## XGBoost Model

### 🚀 XGBoost Training 

In [40]:
model_name = 'XGBoost'
start = time.time()
model = model_factory(model_name, ufc_data, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[XGBoost] UFC GridSearchCV Training (F1_macro)
INFO:root:[XGBoost] 🤖 Training...


Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 1/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.563 total time=   0.1s
[CV 2/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.560 total time=   0.1s
[CV 3/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.518 total time=   0.1s
[CV 4/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.524 total time=   0.1s
[CV 5/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.563 total time=   0.1s
[CV 1/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=1.0;, score=0.562 total time=   0.1s
[CV 2/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=1.0;, score=0.562 total time=   0.1s
[CV 3/5] END c

INFO:root:[XGBoost] 🔍 Best F1_macro: 0.6438
[XGBoost] 🔍 Best Params: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50, 'subsample': 1.0}


### 🔍 XGBoost Metrics

In [41]:
metrics = evaluate_metrics(model, ufc_data)
print(metrics)

### 📝 Log Training Results

In [42]:
log_training_result(model_name, model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [43]:
save_model(model, name=file_model_name[model_name])

INFO:src.io_model:✅ Model 'XGBoost' saved to: /home/mlioi/ufc-predictor/models/xgb_best.pkl


# 🔹 UFC Machine Learning Training (without Odds)

##  KNN Model Training

### 🚀 KNN Training 

In [44]:
model_name = 'K-Nearest Neighbors'
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[K-Nearest Neighbors] UFC GridSearchCV Training (F1_macro)
INFO:root:[K-Nearest Neighbors] 🤖 Training...


Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV 1/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.531 total time=   0.1s
[CV 2/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.498 total time=   0.1s
[CV 3/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.549 total time=   0.1s
[CV 4/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.541 total time=   0.1s
[CV 5/5] END metric=euclidean, n_neighbors=3, weights=uniform;, score=0.544 total time=   0.1s
[CV 1/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.530 total time=   0.0s
[CV 2/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.501 total time=   0.0s
[CV 3/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.551 total time=   0.0s
[CV 4/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.540 total time=   0.0s
[CV 5/5] END metric=euclidean, n_neighbors=3, weights=distance;, score=0.541 to

INFO:root:[K-Nearest Neighbors] 🔍 Best F1_macro: 0.5336
[K-Nearest Neighbors] 🔍 Best Params: {'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}


### 🔍 KNN Metrics

In [45]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [46]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [47]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'K-Nearest Neighbors' saved to: /home/mlioi/ufc-predictor/models/knn_best_no_odds.pkl


## Support Vector Machine Model 

### 🚀 Support Vector Machine Training 

In [48]:
model_name = 'Support Vector Machine'
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Support Vector Machine] UFC GridSearchCV Training (F1_macro)
INFO:root:[Support Vector Machine] 🤖 Training...


Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV 1/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.505 total time=   2.8s
[CV 2/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.505 total time=   2.9s
[CV 3/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.520 total time=   2.9s
[CV 4/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.483 total time=   2.9s
[CV 5/5] END .C=0.01, gamma=auto, kernel=linear;, score=0.519 total time=   2.8s
[CV 1/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.522 total time=   3.4s
[CV 2/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.503 total time=   3.4s
[CV 3/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.543 total time=   3.4s
[CV 4/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.507 total time=   3.4s
[CV 5/5] END ..C=0.1, gamma=auto, kernel=linear;, score=0.540 total time=   3.4s
[CV 1/5] END ....C=1, gamma=auto, kernel=linear;, score=0.526 total time=   5.8s
[CV 2/5] END ....C=1, gamma=auto, kernel=linear;,

INFO:root:[Support Vector Machine] 🔍 Best F1_macro: 0.5269
[Support Vector Machine] 🔍 Best Params: {'C': 1, 'gamma': 'auto', 'kernel': 'linear'}


### 🔍 Support Vector Machine Metrics

In [49]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [50]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [51]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'Support Vector Machine' saved to: /home/mlioi/ufc-predictor/models/svm_best_no_odds.pkl


## Logistic Regression Model

### 🚀 Logistic Regression Training

In [52]:
model_name = 'Logistic Regression'
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Logistic Regression] UFC GridSearchCV Training (F1_macro)
INFO:root:[Logistic Regression] 🤖 Training...


Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV 1/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.368 total time=   0.0s
[CV 2/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.368 total time=   0.0s
[CV 3/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.367 total time=   0.0s
[CV 4/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.367 total time=   0.0s
[CV 5/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.367 total time=   0.0s
[CV 1/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.560 total time=   0.0s
[CV 2/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.539 total time=   0.0s
[CV 3/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.580 total time=   0.0s
[CV 4/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.568 total time=   0.0s
[CV 5/5] END C=0.001, penalty=l2, solver=liblinear;, score=0.568 total time=   0.0s
[CV 1/5] END C=0.001, penalty=l1, solver=liblinear;, score=0.368 total time=   0.0s
[CV 2/5] END C

INFO:root:[Logistic Regression] 🔍 Best F1_macro: 0.5650
[Logistic Regression] 🔍 Best Params: {'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}


### 🔍 Logistic Regression Metrics

In [53]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [54]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [55]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'Logistic Regression' saved to: /home/mlioi/ufc-predictor/models/lr_best_no_odds.pkl


## Random Forest Model

### 🚀 Random Forest Training

In [56]:
model_name = 'Random Forest'
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Random Forest] UFC GridSearchCV Training (F1_macro)
INFO:root:[Random Forest] 🤖 Training...


Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ......max_depth=5, n_estimators=10;, score=0.527 total time=   0.0s
[CV 2/5] END ......max_depth=5, n_estimators=10;, score=0.519 total time=   0.0s
[CV 3/5] END ......max_depth=5, n_estimators=10;, score=0.510 total time=   0.0s
[CV 4/5] END ......max_depth=5, n_estimators=10;, score=0.511 total time=   0.0s
[CV 5/5] END ......max_depth=5, n_estimators=10;, score=0.534 total time=   0.0s
[CV 1/5] END ......max_depth=5, n_estimators=50;, score=0.495 total time=   0.2s
[CV 2/5] END ......max_depth=5, n_estimators=50;, score=0.493 total time=   0.2s
[CV 3/5] END ......max_depth=5, n_estimators=50;, score=0.528 total time=   0.2s
[CV 4/5] END ......max_depth=5, n_estimators=50;, score=0.506 total time=   0.2s
[CV 5/5] END ......max_depth=5, n_estimators=50;, score=0.526 total time=   0.2s
[CV 1/5] END .....max_depth=5, n_estimators=100;, score=0.510 total time=   0.4s
[CV 2/5] END .....max_depth=5, n_estimators=100;,

INFO:root:[Random Forest] 🔍 Best F1_macro: 0.5597
[Random Forest] 🔍 Best Params: {'max_depth': 100, 'n_estimators': 100}


### 🔍 Random Forest Metrics

In [57]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [58]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [59]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'Random Forest' saved to: /home/mlioi/ufc-predictor/models/rf_best_no_odds.pkl


##  AdaBoost Model

### 🚀 AdaBoost Training

In [60]:
model_name = 'AdaBoost'
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[AdaBoost] UFC GridSearchCV Training (F1_macro)
INFO:root:[AdaBoost] 🤖 Training...


Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END learning_rate=0.01, n_estimators=10;, score=0.368 total time=   0.1s
[CV 2/5] END learning_rate=0.01, n_estimators=10;, score=0.368 total time=   0.1s
[CV 3/5] END learning_rate=0.01, n_estimators=10;, score=0.367 total time=   0.1s
[CV 4/5] END learning_rate=0.01, n_estimators=10;, score=0.367 total time=   0.1s
[CV 5/5] END learning_rate=0.01, n_estimators=10;, score=0.367 total time=   0.1s
[CV 1/5] END learning_rate=0.01, n_estimators=50;, score=0.368 total time=   0.3s
[CV 2/5] END learning_rate=0.01, n_estimators=50;, score=0.368 total time=   0.3s
[CV 3/5] END learning_rate=0.01, n_estimators=50;, score=0.367 total time=   0.3s
[CV 4/5] END learning_rate=0.01, n_estimators=50;, score=0.367 total time=   0.3s
[CV 5/5] END learning_rate=0.01, n_estimators=50;, score=0.367 total time=   0.3s
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.368 total time=   0.6s
[CV 2/5] END learning_rate=0.01, n_es

INFO:root:[AdaBoost] 🔍 Best F1_macro: 0.5616
[AdaBoost] 🔍 Best Params: {'learning_rate': 1.0, 'n_estimators': 100}


### 🔍 AdaBoost Metrics

In [61]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [62]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [63]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'AdaBoost' saved to: /home/mlioi/ufc-predictor/models/ab_best_no_odds.pkl


## Naive Bayes Model

### 🚀 Naive Bayes Training

In [64]:
model_name = 'Naive Bayes'
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Naive Bayes] UFC GridSearchCV Training (F1_macro)
INFO:root:[Naive Bayes] 🤖 Training...


Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END ...............var_smoothing=1e-08;, score=0.582 total time=   0.0s
[CV 2/5] END ...............var_smoothing=1e-08;, score=0.552 total time=   0.0s
[CV 3/5] END ...............var_smoothing=1e-08;, score=0.575 total time=   0.0s
[CV 4/5] END ...............var_smoothing=1e-08;, score=0.546 total time=   0.0s
[CV 5/5] END ...............var_smoothing=1e-08;, score=0.550 total time=   0.0s
[CV 1/5] END ...............var_smoothing=1e-07;, score=0.582 total time=   0.0s
[CV 2/5] END ...............var_smoothing=1e-07;, score=0.552 total time=   0.0s
[CV 3/5] END ...............var_smoothing=1e-07;, score=0.575 total time=   0.0s
[CV 4/5] END ...............var_smoothing=1e-07;, score=0.546 total time=   0.0s
[CV 5/5] END ...............var_smoothing=1e-07;, score=0.550 total time=   0.0s
[CV 1/5] END ...............var_smoothing=1e-06;, score=0.582 total time=   0.0s
[CV 2/5] END ...............var_smoothing=1e-06;,

INFO:root:[Naive Bayes] 🔍 Best F1_macro: 0.5609
[Naive Bayes] 🔍 Best Params: {'var_smoothing': 1e-08}


### 🔍 Naive Bayes Metrics

In [65]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [66]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [67]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'Naive Bayes' saved to: /home/mlioi/ufc-predictor/models/nb_best_no_odds.pkl


## Gradient Boosting Model

### 🚀 Gradient Boosting Training

In [68]:
model_name = "Gradient Boosting"
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Gradient Boosting] UFC GridSearchCV Training (F1_macro)
INFO:root:[Gradient Boosting] 🤖 Training...


Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.496 total time=   1.1s
[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.487 total time=   1.1s
[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.508 total time=   1.1s
[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.474 total time=   1.1s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=100;, score=0.501 total time=   1.1s
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.521 total time=   2.2s
[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.543 total time=   2.1s
[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.546 total time=   2.1s
[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.531 total time=   2.1s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=200;, score=0.56

INFO:root:[Gradient Boosting] 🔍 Best F1_macro: 0.5620
[Gradient Boosting] 🔍 Best Params: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}


### 🔍 Gradient Boosting Metrics

In [69]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [70]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [71]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'Gradient Boosting' saved to: /home/mlioi/ufc-predictor/models/gb_best_no_odds.pkl


## Extra Trees Model

### 🚀 Extra Trees Training

In [72]:
model_name = "Extra Trees"
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Extra Trees] UFC GridSearchCV Training (F1_macro)
INFO:root:[Extra Trees] 🤖 Training...


Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV 1/5] END ...max_depth=None, n_estimators=50;, score=0.533 total time=   0.3s
[CV 2/5] END ...max_depth=None, n_estimators=50;, score=0.537 total time=   0.3s
[CV 3/5] END ...max_depth=None, n_estimators=50;, score=0.570 total time=   0.3s
[CV 4/5] END ...max_depth=None, n_estimators=50;, score=0.556 total time=   0.3s
[CV 5/5] END ...max_depth=None, n_estimators=50;, score=0.530 total time=   0.3s
[CV 1/5] END ..max_depth=None, n_estimators=100;, score=0.553 total time=   0.6s
[CV 2/5] END ..max_depth=None, n_estimators=100;, score=0.553 total time=   0.6s
[CV 3/5] END ..max_depth=None, n_estimators=100;, score=0.581 total time=   0.6s
[CV 4/5] END ..max_depth=None, n_estimators=100;, score=0.552 total time=   0.6s
[CV 5/5] END ..max_depth=None, n_estimators=100;, score=0.515 total time=   0.6s
[CV 1/5] END .....max_depth=10, n_estimators=50;, score=0.469 total time=   0.1s
[CV 2/5] END .....max_depth=10, n_estimators=50;,

INFO:root:[Extra Trees] 🔍 Best F1_macro: 0.5508
[Extra Trees] 🔍 Best Params: {'max_depth': None, 'n_estimators': 100}


### 🔍 Extra Trees Metrics

In [73]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [74]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

  df = pd.concat([df, pd.DataFrame([log_entry])], ignore_index=True)
INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [75]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'Extra Trees' saved to: /home/mlioi/ufc-predictor/models/et_best_no_odds.pkl


## Quadratic Discriminant Analysis Model

### 🚀 Quadratic Discriminant Analysis Training

In [76]:
model_name = "Quadratic Discriminant Analysis"
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Quadratic Discriminant Analysis] UFC GridSearchCV Training (F1_macro)
INFO:root:[Quadratic Discriminant Analysis] 🤖 Training...


Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END .....................reg_param=0.1;, score=0.563 total time=   0.0s
[CV 2/5] END .....................reg_param=0.1;, score=0.557 total time=   0.0s
[CV 3/5] END .....................reg_param=0.1;, score=0.571 total time=   0.0s
[CV 4/5] END .....................reg_param=0.1;, score=0.549 total time=   0.0s
[CV 5/5] END .....................reg_param=0.1;, score=0.551 total time=   0.0s
[CV 1/5] END .....................reg_param=0.3;, score=0.570 total time=   0.0s
[CV 2/5] END .....................reg_param=0.3;, score=0.557 total time=   0.0s
[CV 3/5] END .....................reg_param=0.3;, score=0.573 total time=   0.0s
[CV 4/5] END .....................reg_param=0.3;, score=0.567 total time=   0.0s
[CV 5/5] END .....................reg_param=0.3;, score=0.573 total time=   0.0s
[CV 1/5] END .....................reg_param=0.5;, score=0.569 total time=   0.0s
[CV 2/5] END .....................reg_param=0.5;,

INFO:root:[Quadratic Discriminant Analysis] 🔍 Best F1_macro: 0.5778
[Quadratic Discriminant Analysis] 🔍 Best Params: {'reg_param': 0.9}


### 🔍 Quadratic Discriminant Analysis Metrics

In [77]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [78]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [79]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'Quadratic Discriminant Analysis' saved to: /home/mlioi/ufc-predictor/models/qda_best_no_odds.pkl


## Neural Network

### 🚀 Neural Network Training 

In [80]:
model_name = 'Neural Network'
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[Neural Network] UFC GridSearchCV Training (F1_macro)
INFO:root:[Neural Network] 🤖 Training...


Fitting 5 folds for each of 64 candidates, totalling 320 fits
[CV 1/5] END activation=relu, alpha=0.001, batch_size=32, early_stopping=True, hidden_layer_sizes=(200,), learning_rate=adaptive, learning_rate_init=0.01, momentum=0.2, solver=adam, validation_fraction=0.2;, score=0.561 total time=   0.6s
[CV 2/5] END activation=relu, alpha=0.001, batch_size=32, early_stopping=True, hidden_layer_sizes=(200,), learning_rate=adaptive, learning_rate_init=0.01, momentum=0.2, solver=adam, validation_fraction=0.2;, score=0.549 total time=   0.7s
[CV 3/5] END activation=relu, alpha=0.001, batch_size=32, early_stopping=True, hidden_layer_sizes=(200,), learning_rate=adaptive, learning_rate_init=0.01, momentum=0.2, solver=adam, validation_fraction=0.2;, score=0.564 total time=   0.7s
[CV 4/5] END activation=relu, alpha=0.001, batch_size=32, early_stopping=True, hidden_layer_sizes=(200,), learning_rate=adaptive, learning_rate_init=0.01, momentum=0.2, solver=adam, validation_fraction=0.2;, score=0.543 t

INFO:root:[Neural Network] 🔍 Best F1_macro: 0.5610
[Neural Network] 🔍 Best Params: {'activation': 'logistic', 'alpha': 0.001, 'batch_size': 32, 'early_stopping': True, 'hidden_layer_sizes': (50, 50), 'learning_rate': 'adaptive', 'learning_rate_init': 0.01, 'momentum': 0.2, 'solver': 'adam', 'validation_fraction': 0.2}


### 🔍 Neural Network Metrics

In [81]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [82]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [83]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'Neural Network' saved to: /home/mlioi/ufc-predictor/models/nn_best_no_odds.pkl


## XGBoost Model

### 🚀 XGBoost Training 

In [84]:
model_name = 'XGBoost'
start = time.time()
model = model_factory(model_name, ufc_data_no_odds, model_params, scoring="f1_macro")
end = time.time()
duration = end - start

INFO:src.model_factory:[XGBoost] UFC GridSearchCV Training (F1_macro)
INFO:root:[XGBoost] 🤖 Training...


Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 1/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.372 total time=   0.2s
[CV 2/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.370 total time=   0.2s
[CV 3/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.384 total time=   0.2s
[CV 4/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.378 total time=   0.2s
[CV 5/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.381 total time=   0.2s
[CV 1/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=1.0;, score=0.374 total time=   0.1s
[CV 2/5] END colsample_bytree=0.8, learning_rate=0.01, max_depth=3, n_estimators=50, subsample=1.0;, score=0.379 total time=   0.1s
[CV 3/5] END c

INFO:root:[XGBoost] 🔍 Best F1_macro: 0.5673
[XGBoost] 🔍 Best Params: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 150, 'subsample': 0.8}


### 🔍 XGBoost Metrics

In [85]:
metrics = evaluate_metrics(model, ufc_data_no_odds)
print(metrics)

### 📝 Log Training Results

In [86]:
log_training_result(f"{model_name} (without odds)", model.best_params_, metrics, duration)

INFO:src.helpers:✅ Training logged to ../data/results/training_log_v2.csv


### 💾 Save Model 

In [87]:
save_model(model, name=f"{file_model_name[model_name]}_no_odds")

INFO:src.io_model:✅ Model 'XGBoost' saved to: /home/mlioi/ufc-predictor/models/xgb_best_no_odds.pkl


<div style="text-align: center;">
     <img src="../img/ufc_logo.png" width="800" /> 
</div>