# Hyperparameter Tuning with Katib SDK

Este notebook demonstra como usar o Katib SDK para realizar a otimização de hiperparâmetros do modelo XGBoost.

In [None]:
from kubeflow.katib import KatibClient
from kubernetes import client
import yaml
import datetime
import time
import os

# Obter namespace do ambiente ou usar valor padrão
NAMESPACE = os.getenv('KUBEFLOW_WORKSPACE', 'workspace-francisco')
print(f"Usando namespace: {NAMESPACE}")

# Inicializar o cliente Katib
katib_client = KatibClient()

## Definir Experimento

Vamos criar o experimento usando a API do Katib.

In [None]:
def create_experiment_spec():
    experiment_name = f"xgboost-hpo-{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
    
    experiment_spec = {
        "apiVersion": "kubeflow.org/v1beta1",
        "kind": "Experiment",
        "metadata": {
            "name": experiment_name,
            "namespace": NAMESPACE  # Usar namespace do ambiente
        },
        "spec": {
            "objective": {
                "type": "maximize",
                "goal": 0.90,
                "objectiveMetricName": "Validation-Accuracy",
                "additionalMetricNames": [
                    "Training-Accuracy",
                    "Training-AUC",
                    "Validation-AUC"
                ]
            },
            "algorithm": {
                "algorithmName": "bayesianoptimization",
                "algorithmSettings": [
                    {
                        "name": "random_state",
                        "value": "42"
                    }
                ]
            },
            "parameters": [
                {
                    "name": "learning_rate",
                    "parameterType": "double",
                    "feasibleSpace": {
                        "min": "0.01",
                        "max": "0.3"
                    }
                },
                {
                    "name": "max_depth",
                    "parameterType": "int",
                    "feasibleSpace": {
                        "min": "3",
                        "max": "10"
                    }
                },
                {
                    "name": "n_estimators",
                    "parameterType": "int",
                    "feasibleSpace": {
                        "min": "100",
                        "max": "1000"
                    }
                },
                {
                    "name": "min_child_weight",
                    "parameterType": "int",
                    "feasibleSpace": {
                        "min": "1",
                        "max": "7"
                    }
                },
                {
                    "name": "subsample",
                    "parameterType": "double",
                    "feasibleSpace": {
                        "min": "0.6",
                        "max": "1.0"
                    }
                }
            ],
            "trialTemplate": {
                "primaryContainerName": "training",
                "trialParameters": [
                    {
                        "name": "learningRate",
                        "description": "Learning rate for XGBoost",
                        "reference": "learning_rate"
                    },
                    {
                        "name": "maxDepth",
                        "description": "Maximum tree depth",
                        "reference": "max_depth"
                    },
                    {
                        "name": "numEstimators",
                        "description": "Number of trees",
                        "reference": "n_estimators"
                    },
                    {
                        "name": "minChildWeight",
                        "description": "Minimum child weight",
                        "reference": "min_child_weight"
                    },
                    {
                        "name": "subsample",
                        "description": "Subsample ratio",
                        "reference": "subsample"
                    }
                ],
                "trialSpec": {
                    "apiVersion": "batch/v1",
                    "kind": "Job",
                    "spec": {
                        "template": {
                            "spec": {
                                "containers": [
                                    {
                                        "name": "training",
                                        "image": "xgboost-training:latest",
                                        "command": [
                                            "python",
                                            "/opt/training/train.py",
                                            "--learning_rate=${trialParameters.learningRate}",
                                            "--max_depth=${trialParameters.maxDepth}",
                                            "--n_estimators=${trialParameters.numEstimators}",
                                            "--min_child_weight=${trialParameters.minChildWeight}",
                                            "--subsample=${trialParameters.subsample}"
                                        ]
                                    }
                                ],
                                "restartPolicy": "Never"
                            }
                        }
                    }
                }
            },
            "maxTrialCount": 25,
            "maxFailedTrialCount": 3,
            "parallelTrialCount": 3
        }
    }
    
    return experiment_name, experiment_spec

## Criar e Executar o Experimento

In [None]:
# Criar experimento
experiment_name, experiment_spec = create_experiment_spec()
katib_client.create_experiment(experiment_spec)
print(f"Experimento criado: {experiment_name}")

## Monitorar o Progresso

In [None]:
def monitor_experiment(experiment_name, timeout_minutes=60):
    print(f"Monitorando experimento: {experiment_name}")
    start_time = time.time()
    timeout = timeout_minutes * 60
    
    while True:
        experiment = katib_client.get_experiment(experiment_name, NAMESPACE)
        status = experiment.status
        
        print(f"\nStatus atual: {status.conditions[-1].type if status.conditions else 'Running'}")
        
        if status.conditions and status.conditions[-1].type in ['Succeeded', 'Failed']:
            print("Experimento finalizado!")
            return experiment
        
        if time.time() - start_time > timeout:
            print("Timeout atingido!")
            return experiment
        
        print(f"Trials completados: {status.trials_succeeded}/{status.trials}")
        time.sleep(30)

# Monitorar o experimento
experiment = monitor_experiment(experiment_name)

# Verificar resultados
if experiment.status.conditions and experiment.status.conditions[-1].type == "Succeeded":
    best_trial = experiment.status.current_optimal_trial
    
    print("\nMelhores Hiperparâmetros:")
    for param in best_trial.parameter_assignments:
        print(f"{param.name}: {param.value}")
        
    print("\nMétricas Obtidas:")
    for metric in best_trial.observation.metrics:
        print(f"{metric.name}: {metric.value}")
else:
    print("\nExperimento não concluído com sucesso")

## Salvar os Melhores Hiperparâmetros

In [None]:
import json

def save_best_parameters(experiment):
    if not (experiment.status.conditions and 
            experiment.status.conditions[-1].type == "Succeeded" and 
            experiment.status.current_optimal_trial):
        print("Não há resultados ótimos disponíveis")
        return
        
    best_trial = experiment.status.current_optimal_trial
    best_params = {
        'objective': 'binary:logistic',
        'eval_metric': ['error', 'auc', 'logloss'],
        'random_state': 42
    }
    
    # Converter parâmetros para o formato correto
    for param in best_trial.parameter_assignments:
        if param.name in ['max_depth', 'n_estimators', 'min_child_weight']:
            best_params[param.name] = int(float(param.value))
        else:
            best_params[param.name] = float(param.value)
    
    # Salvar parâmetros
    os.makedirs('../models/hyperparameters', exist_ok=True)
    with open('../models/hyperparameters/best_parameters.json', 'w') as f:
        json.dump(best_params, f, indent=2)
        
    print("Melhores hiperparâmetros salvos em '../models/hyperparameters/best_parameters.json'")
    return best_params

# Salvar os melhores parâmetros
best_params = save_best_parameters(experiment)