In [1]:
# Input a string pathway to a pip requirements.txt file or a conda environment.yml file. 
# You can remove this variable or leave it blank to have nb2app attempt to determine the dependencies automatically.
# Examples:
# requirements_file = 'requirements.txt'
# requirements_file = 'my/path/environment.yml'
requirements_file: str = '/Users/christopherpuglisi/Netrias/Projects/hyperparameter_optimizer/src/environment.yml'

# Input string pathways to any files that the notebook will need to function when it is
# containerized. 
# You can delete/leave this empty if there are no external files to include. 
# Example:
# my_resource_files ['/absolute/path/encoders/my_encoder.h5', '/absolute/path/decoders/my_decoder.h5']
resource_files: list = [
]

from pathlib import Path
data_path: Path = Path('/Users/christopherpuglisi/Netrias/Projects/hyperparameter_optimizer/tests/cares_test_data.pkl')
objective_type: str = 'Classification'
model_type: str = 'Logistic'
loss_type_input: str = 'binary_cross_entropy'

weight_fp: int = 1
weight_fn: int = 1
threshold: float = 0.5
serialized_loss_name: str = 'binary_cross_entropy_loss.json'

In [2]:
# Adjust the Python path to include the 'src' directory
import sys
import os
import ast
import warnings
# Suppress all warnings
warnings.filterwarnings("ignore")
# Get the absolute path to the 'src' directory
src_path = os.path.abspath('../src')
# Add 'src' to the Python path
if src_path not in sys.path:
    sys.path.append(src_path)
# Import custom modules
from models.knn_models import KNNClassifierModel
from optimizers.grid_search import GridSearchOptimizer
from loss_functions.factory import LossFunctionFactory
from models.bayesian_models import GaussianNBModel
from optimizers.random_search import RandomSearchOptimizer
from models.ensemble_models import RandomForestModel
from models.linear_models import LogisticRegression

from models.ensemble_models import RandomForestModel, GradientBoostingModel
from models.bayesian_models import GaussianNBModel, BernoulliNBModel
from models.knn_models import KNNClassifierModel
from models.svm_models import SVCModel

# ---------- Regression Models --------------
from models.bayesian_models import BayesianRidgeModel
from models.linear_models import LogisticModel, RidgeModel, LassoModel
from models.knn_models import KNNRegressorModel
from models.svm_models import SVRModel

# ---------- Unsupervised Models -----------
from models.clustering_models import (
    KMeansModel,
    DBSCANModel,
    AgglomerativeClusteringModel,
    SpectralClusteringModel
)


# Import additional libraries
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import numpy as np
import pickle
import pandas as pd
import json

%load_ext autoreload
%autoreload 2


In [3]:
# Create a custom Binary Cross-Entropy Loss function
loss_fn = LossFunctionFactory.create_loss_function(
    loss_type=loss_type_input,
    weight_fp= weight_fp,   # Penalize false positives more
    weight_fn=weight_fn,   # Penalize false negatives even more
    threshold=threshold   # Set prediction threshold to 0.6
)

# Display the loss function parameters
print("Custom Loss Function Parameters:")
print(json.dumps(loss_fn.parameters, indent=4))

Custom Loss Function Parameters:
{
    "weight_fp": 1,
    "weight_fn": 1,
    "threshold": 0.5
}


In [4]:
with open(data_path, 'rb') as f:
    data = pickle.load(f)

X = data.data
y = data.target

# Split the dataset into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training samples: {X_train.shape[0]}")
print(f"Validation samples: {X_val.shape[0]}")

Training samples: 455
Validation samples: 114


In [5]:
# src/experiment_controller.py

import numpy as np


def run_model_experiment(objective_type,
                         model_type,
                         X_train,
                         y_train,
                         X_val,
                         y_val,
                         loss_fn=None):
    """
    Orchestrates model building and hyperparameter optimization
    based on the provided objective_type (Classification, Regression, Unsupervised)
    and model_type. Returns a dict of { 'model': ..., 'best_params': ..., etc. }.
    """

    # ------------------- CLASSIFICATION -------------------
    if objective_type == 'Classification':

        # ---- Random Forest (Grid Search) ----
        if model_type == 'RandomForest':
            rf_model = RandomForestModel(loss_function=loss_fn)
            hyperparam_grid = {
                'n_estimators': [100, 250, 500],
                'max_depth': [None, 10, 25, 50],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4]
            }

            rf_grid_optimizer = GridSearchOptimizer(
                model_instance=rf_model,
                param_grid=hyperparam_grid,
                loss_function=loss_fn,
                cv=5,
                scoring='accuracy'
            )
            # Unpack 3 values so we can use the fitted best_estimator
            best_params, best_score, best_estimator = rf_grid_optimizer.optimize(X_train, y_train)
            
            # Evaluate using the fitted best_estimator
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,     # store the fitted model
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'GaussianNB':
            # If your RandomSearchOptimizer now also returns 3 values:
            # best_params, best_score, best_estimator = ...
            # otherwise, ensure it sets gnb_model to the fitted params.
            gnb_model = GaussianNBModel(loss_function=loss_fn)
            gnb_param_distributions = {
                'var_smoothing': np.logspace(0, -9, num=100)
            }

            gnb_random_optimizer = RandomSearchOptimizer(
                model_instance=gnb_model,
                param_distributions=gnb_param_distributions,
                loss_function=loss_fn,
                n_iter=20,
                scoring='accuracy',
                random_state=42
            )
            # Adjust if your RandomSearchOptimizer also returns best_score, best_estimator
            best_params, best_score, best_estimator = gnb_random_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'BernoulliNB':
            bnb_model = BernoulliNBModel(loss_function=loss_fn)
            bnb_param_grid = {
                'alpha': [0.1, 1.0, 10.0],
                'binarize': [0.0, 0.5, 1.0],
                'fit_prior': [True, False]
            }

            bnb_grid_optimizer = GridSearchOptimizer(
                model_instance=bnb_model,
                param_grid=bnb_param_grid,
                loss_function=loss_fn,
                cv=5,
                scoring='accuracy'
            )
            best_params, best_score, best_estimator = bnb_grid_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'GradientBoosting':
            gb_model = GradientBoostingModel(loss_function=loss_fn)
            gb_param_grid = {
                'n_estimators': [50, 100],
                'learning_rate': [0.01, 0.1],
                'max_depth': [3, 5]
            }

            gb_grid_optimizer = GridSearchOptimizer(
                model_instance=gb_model,
                param_grid=gb_param_grid,
                loss_function=loss_fn,
                cv=5,
                scoring='accuracy'
            )
            best_params, best_score, best_estimator = gb_grid_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'KNNClassifier':
            knn_clf_model = KNNClassifierModel(loss_function=loss_fn)
            knn_param_grid = {
                'n_neighbors': [3, 5, 7],
                'weights': ['uniform', 'distance'],
                'algorithm': ['auto', 'ball_tree', 'kd_tree']
            }

            knn_grid_optimizer = GridSearchOptimizer(
                model_instance=knn_clf_model,
                param_grid=knn_param_grid,
                loss_function=loss_fn,
                cv=5,
                scoring='accuracy'
            )
            best_params, best_score, best_estimator = knn_grid_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'SVC':
            svc_model = SVCModel(loss_function=loss_fn)
            svc_param_grid = {
                'C': [0.1, 1.0, 10.0],
                'kernel': ['linear', 'rbf'],
                'gamma': ['scale', 'auto']
            }

            svc_grid_optimizer = GridSearchOptimizer(
                model_instance=svc_model,
                param_grid=svc_param_grid,
                loss_function=loss_fn,
                cv=5,
                scoring='accuracy'
            )
            best_params, best_score, best_estimator = svc_grid_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        else:
            raise ValueError(f"Unknown classification model_type={model_type}")

    # ------------------- REGRESSION -------------------
    elif objective_type == 'Regression':

        if model_type == 'Logistic':
            lr_model = LogisticModel(loss_function=loss_fn)
            hyperparam_grid = [
                {
                    'penalty': ['l1', 'l2'],
                    'C': [0.01, 0.1, 1.0, 10],
                    'max_iter': [2500, 5000, 10000]
                },
                {
                    'penalty': [None],
                    'max_iter': [1000, 10000]
                }
            ]

            lr_grid_optimizer = GridSearchOptimizer(
                model_instance=lr_model,
                param_grid=hyperparam_grid,
                loss_function=None,
                cv=5,
                scoring='accuracy'
            )
            best_params, best_score, best_estimator = lr_grid_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'BayesianRidge':
            br_model = BayesianRidgeModel(loss_function=loss_fn)
            br_param_grid = {
                'max_iter': [100, 300],
                'alpha_1': [1e-6, 1e-5],
                'alpha_2': [1e-6, 1e-5],
            }

            br_grid_optimizer = GridSearchOptimizer(
                model_instance=br_model,
                param_grid=br_param_grid,
                loss_function=loss_fn,
                cv=5,
                scoring='r2'
            )
            best_params, best_score, best_estimator = br_grid_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'Ridge':
            ridge_model = RidgeModel(loss_function=loss_fn)
            ridge_param_grid = {
                'alpha': [0.1, 1.0, 10.0]
            }

            ridge_grid_optimizer = GridSearchOptimizer(
                model_instance=ridge_model,
                param_grid=ridge_param_grid,
                loss_function=loss_fn,
                cv=5,
                scoring='r2'
            )
            best_params, best_score, best_estimator = ridge_grid_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'Lasso':
            lasso_model = LassoModel(loss_function=loss_fn)
            lasso_param_grid = {
                'alpha': [0.01, 0.1, 1.0, 10.0]
            }

            lasso_grid_optimizer = GridSearchOptimizer(
                model_instance=lasso_model,
                param_grid=lasso_param_grid,
                loss_function=loss_fn,
                cv=5,
                scoring='r2'
            )
            best_params, best_score, best_estimator = lasso_grid_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'KNNRegressor':
            knn_reg_model = KNNRegressorModel(loss_function=loss_fn)
            knn_param_distributions = {
                'n_neighbors': [2, 3, 5, 7, 10],
                'weights': ['uniform', 'distance'],
                'algorithm': ['auto', 'ball_tree', 'kd_tree']
            }

            # If your random search returns 3 values, do the same here.
            knn_random_optimizer = RandomSearchOptimizer(
                model_instance=knn_reg_model,
                param_distributions=knn_param_distributions,
                loss_function=loss_fn,
                n_iter=10,
                scoring='r2',
                random_state=42
            )
            best_params, best_score, best_estimator = knn_random_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'SVR':
            svr_model = SVRModel(loss_function=loss_fn)
            svr_param_grid = {
                'C': [0.1, 1.0],
                'kernel': ['linear', 'rbf'],
                'gamma': ['scale', 'auto']
            }

            svr_grid_optimizer = GridSearchOptimizer(
                model_instance=svr_model,
                param_grid=svr_param_grid,
                loss_function=loss_fn,
                cv=5,
                scoring='r2'
            )
            best_params, best_score, best_estimator = svr_grid_optimizer.optimize(X_train, y_train)
            val_score = best_estimator.score(X_val, y_val)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        else:
            raise ValueError(f"Unknown regression model_type={model_type}")

    # ------------------- UNSUPERVISED -------------------
    elif objective_type == 'Unsupervised':

        if model_type == 'KMeans':
            km_model = KMeansModel(loss_function=loss_fn)
            km_param_grid = {
                'n_clusters': [2, 3, 5, 8],
                'init': ['k-means++', 'random'],
                'max_iter': [100, 300],
            }

            km_grid_optimizer = GridSearchOptimizer(
                model_instance=km_model,
                param_grid=km_param_grid,
                loss_function=loss_fn,  
                cv=None,
                scoring=None
            )
            # We assume 3 values returned:
            best_params, best_score, best_estimator = km_grid_optimizer.optimize(X_train, None)
            val_score = best_estimator.score(X_val, None)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'DBSCAN':
            dbscan_model = DBSCANModel(loss_function=loss_fn)
            dbscan_param_distributions = {
                'eps': [0.1, 0.2, 0.5, 1.0],
                'min_samples': [3, 5, 10]
            }

            dbscan_random_optimizer = RandomSearchOptimizer(
                model_instance=dbscan_model,
                param_distributions=dbscan_param_distributions,
                loss_function=loss_fn,
                n_iter=5,
                scoring=None,
                random_state=42
            )
            best_params, best_score, best_estimator = dbscan_random_optimizer.optimize(X_train, None)
            val_score = best_estimator.score(X_val, None)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'AgglomerativeClustering':
            agg_model = AgglomerativeClusteringModel(loss_function=loss_fn)
            agg_param_grid = {
                'n_clusters': [2, 3, 5, 8],
                'affinity': ['euclidean', 'manhattan'],
                'linkage': ['ward', 'complete', 'average']
            }

            agg_grid_optimizer = GridSearchOptimizer(
                model_instance=agg_model,
                param_grid=agg_param_grid,
                loss_function=loss_fn,
                cv=None,
                scoring=None
            )
            best_params, best_score, best_estimator = agg_grid_optimizer.optimize(X_train, None)
            val_score = best_estimator.score(X_val, None)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        elif model_type == 'SpectralClustering':
            spectral_model = SpectralClusteringModel(loss_function=loss_fn)
            spectral_param_grid = {
                'n_clusters': [2, 3, 5, 8],
                'n_init': [5, 10],
                'gamma': [0.1, 1.0, 10.0]
            }

            spectral_grid_optimizer = GridSearchOptimizer(
                model_instance=spectral_model,
                param_grid=spectral_param_grid,
                loss_function=loss_fn,
                cv=None,
                scoring=None
            )
            best_params, best_score, best_estimator = spectral_grid_optimizer.optimize(X_train, None)
            val_score = best_estimator.score(X_val, None)

            return {
                'model': best_estimator,
                'best_params': best_params,
                'best_score': best_score,
                'validation_score': val_score
            }

        else:
            raise ValueError(f"Unknown unsupervised model_type={model_type}")

    else:
        raise ValueError(f"Unknown combination of objective_type={objective_type} "
                         f"and model_type={model_type}")


In [6]:
print("--- Random Forest Classification ---")
rf_results = run_model_experiment('Classification', 'RandomForest', X_train, y_train, X_val, y_val)
print(rf_results)

--- Random Forest Classification ---
Before GridSearchCV: {'model_cls': <class 'sklearn.ensemble._forest.RandomForestClassifier'>, 'hyperparameters': {'n_estimators': 100, 'max_depth': None}, 'loss_function': None}
After GridSearchCV: {'model_cls': <class 'sklearn.ensemble._forest.RandomForestClassifier'>, 'hyperparameters': {'n_estimators': 100, 'max_depth': None}, 'loss_function': None}
{'model': RandomForestModel(hyperparameters={'max_depth': None, 'n_estimators': 100}), 'best_params': {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 500}, 'best_score': 0.9670329670329672, 'validation_score': 0.9473684210526315}


In [7]:
print("\n--- Gaussian Naive Bayes Classification ---")
gnb_results = run_model_experiment('Classification', 'GaussianNB', X_train, y_train, X_val, y_val)
print(gnb_results)


--- Gaussian Naive Bayes Classification ---
{'model': GaussianNB(var_smoothing=5.336699231206302e-08), 'best_params': {'var_smoothing': 5.336699231206302e-08}, 'best_score': 0.9362637362637363, 'validation_score': 0.9385964912280702}


In [8]:

print("\n--- Bernoulli Naive Bayes Classification ---")
bnb_results = run_model_experiment('Classification', 'BernoulliNB', X_train, y_train, X_val, y_val)
print(bnb_results)


--- Bernoulli Naive Bayes Classification ---
Before GridSearchCV: {'model_cls': <class 'sklearn.naive_bayes.BernoulliNB'>, 'hyperparameters': {'alpha': 1.0, 'binarize': 0.0, 'fit_prior': True}, 'loss_function': None}
After GridSearchCV: {'model_cls': <class 'sklearn.naive_bayes.BernoulliNB'>, 'hyperparameters': {'alpha': 1.0, 'binarize': 0.0, 'fit_prior': True}, 'loss_function': None}
{'model': BernoulliNBModel(hyperparameters={'alpha': 1.0, 'binarize': 0.0,
                                  'fit_prior': True}), 'best_params': {'alpha': 0.1, 'binarize': 0.0, 'fit_prior': True}, 'best_score': 0.6263736263736264, 'validation_score': 0.631578947368421}


In [9]:
print("\n--- Gradient Boosting Classification ---")
gb_results = run_model_experiment('Classification', 'GradientBoosting', X_train, y_train, X_val, y_val)
print(gb_results)


--- Gradient Boosting Classification ---
Before GridSearchCV: {'model_cls': <class 'sklearn.ensemble._gb.GradientBoostingClassifier'>, 'hyperparameters': {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 3}, 'loss_function': None}


KeyboardInterrupt: 

In [None]:
print("\n--- KNN Classifier ---")
knn_clf_results = run_model_experiment('Classification', 'KNNClassifier', X_train, y_train, X_val, y_val)
print(knn_clf_results)


--- KNN Classifier ---
{'model': KNNClassifierModel(hyperparameters={'algorithm': 'auto', 'n_neighbors': 5,
                                    'weights': 'uniform'}), 'best_params': {'algorithm': 'auto', 'n_neighbors': 3, 'weights': 'uniform'}, 'best_score': 0.9362637362637362, 'validation_score': 0.9122807017543859}


In [None]:
print("\n--- SVC Classification ---")
svc_results = run_model_experiment('Classification', 'SVC', X_train, y_train, X_val, y_val)
print(svc_results)


--- SVC Classification ---
{'model': SVCModel(hyperparameters={'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}), 'best_params': {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}, 'best_score': 0.9142857142857143, 'validation_score': 0.9298245614035088}


In [None]:
# Regression
print("\n--- Logistic Regression ---")
logistic_results = run_model_experiment('Regression', 'Logistic', X_train, y_train, X_val, y_val)
print(logistic_results)


--- Logistic Regression ---
{'model': LogisticModel(hyperparameters={'C': 1.0, 'max_iter': 200, 'penalty': 'l2'}), 'best_params': {'C': 0.01, 'max_iter': 2500, 'penalty': 'l1'}, 'best_score': 0.9494505494505494, 'validation_score': 0.9649122807017544}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [None]:
print("\n--- Bayesian Ridge Regression ---")
br_results = run_model_experiment('Regression', 'BayesianRidge', X_train, y_train, X_val, y_val)
print(br_results)


--- Bayesian Ridge Regression ---
{'model': BayesianRidgeModel(hyperparameters={'alpha_1': 1e-06, 'alpha_2': 1e-06,
                                    'lambda_1': 1e-06, 'lambda_2': 1e-06,
                                    'max_iter': 300, 'tol': 0.001}), 'best_params': {'alpha_1': 1e-06, 'alpha_2': 1e-06, 'max_iter': 100}, 'best_score': 0.728009614661208, 'validation_score': 0.7245630058416277}


In [None]:
print("\n--- Ridge Regression ---")
ridge_results = run_model_experiment('Regression', 'Ridge', X_train, y_train, X_val, y_val)
print(ridge_results)


--- Ridge Regression ---
{'model': RidgeModel(hyperparameters={'alpha': 1.0}), 'best_params': {'alpha': 0.1}, 'best_score': 0.729443107467395, 'validation_score': 0.720884424460905}


In [None]:
print("\n--- Lasso Regression ---")
lasso_results = run_model_experiment('Regression', 'Lasso', X_train, y_train, X_val, y_val)
print(lasso_results)



--- Lasso Regression ---
{'model': LassoModel(hyperparameters={'alpha': 1.0}), 'best_params': {'alpha': 0.01}, 'best_score': 0.5397134395825278, 'validation_score': 0.5152328225099112}


In [None]:
print("\n--- KNN Regressor ---")
knn_reg_results = run_model_experiment('Regression', 'KNNRegressor', X_train, y_train, X_val, y_val)
print(knn_reg_results)


--- KNN Regressor ---
{'model': KNeighborsRegressor(n_neighbors=10), 'best_params': {'weights': 'uniform', 'n_neighbors': 10, 'algorithm': 'auto'}, 'best_score': 0.7720441855064196, 'validation_score': 0.7587301587301587}


In [None]:
print("\n--- SVR Regression ---")
svr_results = run_model_experiment('Regression', 'SVR', X_train, y_train, X_val, y_val)
print(svr_results)



--- SVR Regression ---
{'model': SVRModel(hyperparameters={'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}), 'best_params': {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}, 'best_score': 0.72894485326802, 'validation_score': 0.7569349754978032}


In [None]:
# Unsupervised
print("\n--- KMeans Clustering ---")
kmeans_results = run_model_experiment('Unsupervised', 'KMeans', X_train, y_train, X_val, y_val)
print(kmeans_results)


--- KMeans Clustering ---


{'model': KMeansModel(hyperparameters={'init': 'k-means++', 'max_iter': 300,
                             'n_clusters': 8, 'n_init': 10,
                             'random_state': 42}), 'best_params': {'init': 'k-means++', 'max_iter': 100, 'n_clusters': 2}, 'best_score': -1621368.7728735632, 'validation_score': -4596112.728467412}


In [None]:
# Serialize the custom loss function to a JSON string
serialized_loss = loss_fn.serialize()
print("Serialized Loss Function:")
print(serialized_loss)

# Optionally, save the serialized loss function to a file
with open(serialized_loss_name, 'w') as f:
    f.write(serialized_loss)


Serialized Loss Function:
{"name": "BinaryCrossEntropyLoss", "parameters": {"weight_fp": 1, "weight_fn": 1, "threshold": 0.5}}


In [None]:
# Load the serialized loss function from the file
with open(serialized_loss_name, 'r') as f:
    loaded_serialized_loss = f.read()

# Deserialize the loss function
deserialized_loss_fn = LossFunctionFactory.deserialize_loss_function(loaded_serialized_loss)

# Display deserialized loss function parameters
print("Deserialized Loss Function Parameters:")
print(json.dumps(deserialized_loss_fn.parameters, indent=4))


Deserialized Loss Function Parameters:
{
    "weight_fp": 1,
    "weight_fn": 1,
    "threshold": 0.5
}
