In [1]:
import os
path = "/Users/patricia/Documents/code/python-code/behavior-detection/src"
os.chdir(path)  # Muda o diretório para o nível anterior (a raiz do projeto)
print(os.getcwd())  # Verifique se agora está na raiz

/Users/patricia/Documents/code/python-code/behavior-detection/src


# Load data

In [2]:
from behavior.data.behavior_data_loader import BehaviorDataLoader

data_path = '../data/new_logs_labels.csv'

data = BehaviorDataLoader.load_data(data_path, delimiter=';')
print(data.shape)
data.head(5)

(5525, 372)


Unnamed: 0,id_log,aluno,grupo,num_dia,num_log,log_type,ultimo_passo_correto,verificado_com_mouse,verificado_com_teclado,idle_time_acumulado,...,comportamento_off_task,comportamento_on_system,comportamento_indefinido,ultimo_comportamento,ultimo_comportamento_on_task,ultimo_comportamento_on_task_conversation,ultimo_comportamento_on_task_out,ultimo_comportamento_off_task,ultimo_comportamento_on_system,ultimo_comportamento_indefinido
0,8224,1,2,1,1,step_verification,0,0,1,0,...,0,0,0,?,0,0,0,0,0,1
1,527786,1,2,1,2,user_idle,1,0,0,2,...,0,0,0,ON TASK,1,0,0,0,0,0
2,527787,1,2,1,3,user_idle,0,0,0,4,...,0,0,0,ON TASK,1,0,0,0,0,0
3,527788,1,2,1,4,user_idle,0,0,0,6,...,0,0,0,ON TASK,1,0,0,0,0,0
4,527789,1,2,1,5,user_idle,0,0,0,8,...,0,0,0,ON TASK,1,0,0,0,0,0


In [3]:
from core.preprocessors.data_cleaner import DataCleaner

print("Valores da coluna 'comportamento' antes da remoção:", data['comportamento'].value_counts())

# Remove instances where 'comportamento' is '?'
data = DataCleaner.remove_instances_with_value(data, 'comportamento', '?')

print("\nValores da coluna 'comportamento' depois da remoção:", data['comportamento'].value_counts())

Valores da coluna 'comportamento' antes da remoção: comportamento
ON TASK                 3159
ON SYSTEM                907
OFF TASK                 629
ON TASK CONVERSATION     414
ON TASK OUT              380
?                         36
Name: count, dtype: int64

Valores da coluna 'comportamento' depois da remoção: comportamento
ON TASK                 3159
ON SYSTEM                907
OFF TASK                 629
ON TASK CONVERSATION     414
ON TASK OUT              380
Name: count, dtype: int64


In [4]:
data.head(5)

Unnamed: 0,id_log,aluno,grupo,num_dia,num_log,log_type,ultimo_passo_correto,verificado_com_mouse,verificado_com_teclado,idle_time_acumulado,...,comportamento_off_task,comportamento_on_system,comportamento_indefinido,ultimo_comportamento,ultimo_comportamento_on_task,ultimo_comportamento_on_task_conversation,ultimo_comportamento_on_task_out,ultimo_comportamento_off_task,ultimo_comportamento_on_system,ultimo_comportamento_indefinido
0,8224,1,2,1,1,step_verification,0,0,1,0,...,0,0,0,?,0,0,0,0,0,1
1,527786,1,2,1,2,user_idle,1,0,0,2,...,0,0,0,ON TASK,1,0,0,0,0,0
2,527787,1,2,1,3,user_idle,0,0,0,4,...,0,0,0,ON TASK,1,0,0,0,0,0
3,527788,1,2,1,4,user_idle,0,0,0,6,...,0,0,0,ON TASK,1,0,0,0,0,0
4,527789,1,2,1,5,user_idle,0,0,0,8,...,0,0,0,ON TASK,1,0,0,0,0,0


In [5]:
from sklearn.model_selection import train_test_split

# Select a subset of the data only for testing purposes

print("Tamanho do dataframe antes:", data.shape)
data, _ = train_test_split(data, test_size=0.8, stratify=data['comportamento'], random_state=42)
data.reset_index(drop=True, inplace=True)
print("Tamanho do dataframe após:", data.shape)

Tamanho do dataframe antes: (5489, 372)
Tamanho do dataframe após: (1097, 372)


# Pre-processing

## Remove unnecessary columns

In [6]:
# Removing columns related to IDs, emotions, personality and behaviors, because 
# we want to classify behaviors only by the students' interactions with the system
columns_to_remove_ids = ['id_log', 'grupo', 'num_dia', 'num_log']
columns_to_remove_emotions = [
    'estado_afetivo', 'estado_engajamento_concentrado', 
    'estado_confusao', 'estado_frustracao', 'estado_tedio', 'estado_indefinido', 
    'ultimo_estado_afetivo', 'ultimo_engajamento_concentrado', 'ultimo_confusao', 
    'ultimo_frustracao', 'ultimo_tedio', 'ultimo_estado_indefinido'
]
columns_to_remove_personality = [
    'traco_amabilidade_fator', 'traco_extrovercao_fator', 'traco_conscienciosidade_fator', 
    'traco_abertura_fator', 'traco_neuroticismo_fator', 'traco_amabilidade_cat', 
    'traco_extrovercao_cat', 'traco_conscienciosidade_cat', 'traco_abertura_cat', 
    'traco_neuroticismo_cat']

columns_to_remove_behaviors = [
    'comportamento_on_task', 'comportamento_on_task_conversation', 'comportamento_on_task_out',
    'comportamento_off_task', 'comportamento_on_system', 'comportamento_indefinido',
    'ultimo_comportamento', 'ultimo_comportamento_on_task', 'ultimo_comportamento_on_task_conversation',
    'ultimo_comportamento_on_task_out', 'ultimo_comportamento_off_task', 'ultimo_comportamento_on_system',
    'ultimo_comportamento_indefinido'
]

columns_to_remove = columns_to_remove_ids + \
        columns_to_remove_emotions + \
        columns_to_remove_personality + \
        columns_to_remove_behaviors

cleaned_data = DataCleaner.remove_columns(data, columns_to_remove)


In [7]:
cleaned_data.head(5)

Unnamed: 0,aluno,log_type,ultimo_passo_correto,verificado_com_mouse,verificado_com_teclado,idle_time_acumulado,num_click_acumulado,num_click_passo,num_click_eq,type_step_verification,...,misc_OI_Dv_Plus_Sb_total,misc_EqSec_Distrib_MtTerm_total,misc_OI_Mt_Minus_Mt_Plus_total,misc_OI_Mt_Minus_Mt_Minus_total,misc_OI_Dv_Plus_Ad_total,misc_EqPrim_Mt_Inc_total,misc_EqPrim_Dv_Inc_total,misc_OI_Dv_Minus_Dv_Minus_total,misc_EqSec_OpFrac_MMC_MtNumerador_total,comportamento
0,18,user_idle,1,0,0,2,0,0,2,0,...,0,0,0,0,0,0,0,0,0,ON TASK OUT
1,1,user_idle,0,0,0,2,0,2,2,0,...,0,0,0,0,0,0,0,0,0,ON TASK
2,27,mouse_stop,0,0,0,0,0,20,23,0,...,0,0,0,0,0,0,0,0,0,ON TASK
3,19,user_idle,0,0,0,14,0,1,1,0,...,0,0,0,0,0,0,0,0,0,ON TASK CONVERSATION
4,4,user_idle,0,0,0,6,0,0,1,0,...,0,0,0,0,0,0,1,0,0,ON TASK OUT


In [8]:
# Preenche valores ausentes no DataFrame X com a string 'missing'.

cleaned_data = cleaned_data.fillna('missing')

## Split data by student level into training and test datasets

In [9]:
from core.preprocessors.data_splitter import DataSplitter

train_data, test_data = DataSplitter.split_by_student_level(cleaned_data, test_size=0.2, column_name='aluno')

In [10]:
# 2. Após o split por nível do estudante
print("\n=== Após split por nível do estudante ===")
print(f"Shape de train_data: {train_data.shape}")
print("Colunas em train_data:", train_data.columns.tolist())


=== Após split por nível do estudante ===
Shape de train_data: (902, 333)
Colunas em train_data: ['aluno', 'log_type', 'ultimo_passo_correto', 'verificado_com_mouse', 'verificado_com_teclado', 'idle_time_acumulado', 'num_click_acumulado', 'num_click_passo', 'num_click_eq', 'type_step_verification', 'type_click', 'type_typing', 'type_mouse_stop', 'type_mouse_fast', 'type_hint_request', 'type_user_idle', 'type_window_lost_focus', 'type_window_gained_focus', 'type_window_resize', 'type_left_window', 'type_entered_window', 'type_start_out_of_sinc_server_time', 'type_user_log_out', 'click_resolucao', 'click_planos', 'click_atalhos', 'click_bugs', 'click_gamificacao', 'click_dicas', 'click_erros', 'click_notificacoes', 'click_sistema', 'click_emocoes', 'click_indefinidos', 'pontuacao', 'cat_muito_facil', 'cat_facil_1', 'cat_facil_2', 'cat_facil_3', 'cat_facil_4', 'cat_medio_1', 'cat_medio_2', 'cat_medio_3', 'cat_dificil_1', 'cat_dificil_2', 'cat_dificil_3', 'cat_dificil_4', 'num_passos_equa

In [11]:
# removing the 'aluno' column from the data after splitting into train and test sets

# Remover 'aluno' do conjunto de treinamento
train_data = DataCleaner.remove_columns(train_data, ['aluno'])

# Remover 'aluno' do conjunto de teste
test_data = DataCleaner.remove_columns(test_data, ['aluno'])

In [12]:
# 1. Após remover coluna 'aluno'
print("\n1. Após remover 'aluno':")
print(f"Shape de train_data: {train_data.shape}")


1. Após remover 'aluno':
Shape de train_data: (902, 332)


## Split data into Features (X) and Target (y)

In [13]:
from core.preprocessors.data_splitter import DataSplitter

# Conjunto de treinamento
X_train, y_train = DataSplitter.split_into_x_y(train_data, 'comportamento')

# Conjunto de teste
X_test, y_test = DataSplitter.split_into_x_y(test_data, 'comportamento')

In [14]:
import pandas as pd
import numpy as np
# 2. Após split X/y
print("\n2. Após split X/y:")
if isinstance(X_train, pd.DataFrame):
    print(f"Shape de X_train: {X_train.shape}")
    print("Primeiras colunas de X_train:", list(X_train.columns)[:5])
else:
    print("X_train não é um DataFrame!")
    print(f"Tipo de X_train: {type(X_train)}")


2. Após split X/y:
Shape de X_train: (902, 331)
Primeiras colunas de X_train: ['log_type', 'ultimo_passo_correto', 'verificado_com_mouse', 'verificado_com_teclado', 'idle_time_acumulado']


In [15]:
print("Primeiras 5 instâncias de y_train:")
print(y_train[:5])

print("\nPrimeiras 5 instâncias de y_test:")
print(y_test[:5])

Primeiras 5 instâncias de y_train:
0             ON TASK OUT
1                 ON TASK
2                 ON TASK
3    ON TASK CONVERSATION
4             ON TASK OUT
Name: comportamento, dtype: object

Primeiras 5 instâncias de y_test:
8         ON TASK
9     ON TASK OUT
15    ON TASK OUT
16       OFF TASK
24      ON SYSTEM
Name: comportamento, dtype: object


## Encoding variables

### Encoding true labels (y)

In [16]:
import importlib
from core.preprocessors import column_selector, data_encoder
from behavior.data import behavior_data_encoder

# Recarregar o módulo para garantir que as alterações sejam aplicadas
importlib.reload(column_selector)
importlib.reload(data_encoder)
importlib.reload(behavior_data_encoder)

<module 'behavior.data.behavior_data_encoder' from '/Users/patricia/Documents/code/python-code/behavior-detection/src/behavior/data/behavior_data_encoder.py'>

In [17]:
# Encoding y_train and y_test
from behavior.data.behavior_data_encoder import BehaviorDataEncoder

# Codificar y_train
y_train = BehaviorDataEncoder.encode_y(y_train)

# Codificar y_test
y_test = BehaviorDataEncoder.encode_y(y_test)



### Encoding features (X)

In [18]:
# Importações necessárias
import pandas as pd
import numpy as np
from behavior.data.behavior_data_encoder import BehaviorDataEncoder

# Encoding do target (y)
y_train = BehaviorDataEncoder.encode_y(y_train)
y_test = BehaviorDataEncoder.encode_y(y_test)

# Encoding das features (X)
print("=== Iniciando encoding das features ===")
X_encoder = BehaviorDataEncoder(num_classes=5)
print("\nRealizando fit do encoder...")
X_encoder.fit(X_train)

print("\nRealizando transform...")
X_train = X_encoder.transform(X_train)

print("\nTransformando dados de teste...")
X_test = X_encoder.transform(X_test)

# Verificação final
print("\n=== Verificação após encoding ===")
print(f"Shape de X_train: {X_train.shape}")
print(f"Shape de X_test: {X_test.shape}")
print(f"Shape de y_train: {y_train.shape}")
print(f"Shape de y_test: {y_test.shape}")

=== Iniciando encoding das features ===

Realizando fit do encoder...
Entrada fit - Shape: (902, 331)
Fit realizado com sucesso
Colunas numéricas: 330
Colunas nominais: 179

Realizando transform...
Entrada transform - Shape: (902, 331)
Saída transform - Shape: (902, 470)

Transformando dados de teste...
Entrada transform - Shape: (195, 331)
Saída transform - Shape: (195, 470)

=== Verificação após encoding ===
Shape de X_train: (902, 470)
Shape de X_test: (195, 470)
Shape de y_train: (902,)
Shape de y_test: (195,)




In [19]:
print(X_test.shape)
print(X_test.head(10))

(195, 470)
    num_standard__ultimo_passo_correto  num_standard__verificado_com_mouse  \
8                            -0.239675                           -0.033315   
9                            -0.239675                           -0.033315   
15                           -0.239675                           -0.033315   
16                           -0.239675                           -0.033315   
24                           -0.239675                           -0.033315   
25                           -0.239675                           -0.033315   
26                           -0.239675                           -0.033315   
27                           -0.239675                           -0.033315   
28                           -0.239675                           -0.033315   
39                           -0.239675                           -0.033315   

    num_standard__verificado_com_teclado  num_standard__idle_time_acumulado  \
8                              -0.294528           

In [20]:
# Antes do SMOTE, adicione estas verificações
print("Verificando X_train antes do SMOTE:")
print("1. Shape de X_train:", X_train.shape)
print("2. Tipo de X_train:", type(X_train))
print("3. Shape de y_train:", y_train.shape)
print("4. Tipo de y_train:", type(y_train))

if isinstance(X_train, pd.DataFrame):
    print("5. Colunas em X_train:")
    print(X_train.columns.tolist())
    print("\n6. Primeiras linhas de X_train:")
    print(X_train.head())
    print("\n7. Tipos de dados das colunas:")
    print(X_train.dtypes)

Verificando X_train antes do SMOTE:
1. Shape de X_train: (902, 470)
2. Tipo de X_train: <class 'pandas.core.frame.DataFrame'>
3. Shape de y_train: (902,)
4. Tipo de y_train: <class 'numpy.ndarray'>
5. Colunas em X_train:
['num_standard__ultimo_passo_correto', 'num_standard__verificado_com_mouse', 'num_standard__verificado_com_teclado', 'num_standard__idle_time_acumulado', 'num_standard__num_click_acumulado', 'num_standard__num_click_passo', 'num_standard__num_click_eq', 'num_standard__type_step_verification', 'num_standard__type_click', 'num_standard__type_typing', 'num_standard__type_mouse_stop', 'num_standard__type_mouse_fast', 'num_standard__type_hint_request', 'num_standard__type_user_idle', 'num_standard__type_window_lost_focus', 'num_standard__type_window_gained_focus', 'num_standard__type_window_resize', 'num_standard__type_left_window', 'num_standard__type_entered_window', 'num_standard__type_start_out_of_sinc_server_time', 'num_standard__type_user_log_out', 'num_standard__clic

# Balanceamento dos dados

In [21]:
from core.preprocessors.data_balancer import DataBalancer

data_balancer = DataBalancer()
X_train, y_train = data_balancer.apply_smote(X_train, y_train)

In [22]:
from collections import Counter

print(f"Resampled dataset shape: {Counter(y_train)}")

Resampled dataset shape: Counter({4: 522, 2: 522, 3: 522, 1: 522, 0: 522})


# Treinamento dos Modelos

## Definindo parametros

In [23]:
# Importações e configuração de diretório permanecem iguais até a seção de treinamento

# Na seção "Definindo parametros", substituir:
from core.models.multiclass.behavior_model_params import BehaviorModelParams

# Criar instância dos parâmetros específicos para comportamentos
model_params = BehaviorModelParams()

# # Definir quais modelos e seletores utilizar
# selected_models = [ 
#     # 'Logistic Regression',
#     'Decision Tree',
#     # 'Random Forest',
#     # 'Gradient Boosting',
#     # 'SVM',
#     # 'KNN',
#     # 'XGBoost',
#     'Naive Bayes' 
#     # 'MLP'  
# ]

# # Definir quais seletores de features utilizar
# selected_selectors = [
#     # 'rfe',      # Recursive Feature Elimination
#     'pca',      # Principal Component Analysis
#     # 'rf',       # Random Forest Feature Selector
#     # 'mi',       # Mutual Information Feature Selector
#     'none'      # Sem seleção de features
# ]


# # Usar todos os modelos disponíveis
selected_models = model_params.get_available_models()  # ou lista específica

# # Usar todos os seletores disponíveis
selected_selectors = None  # None to use all selectors

# Configurar validação cruzada estratificada
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Parâmetros de otimização
n_iter = 50  # Reduzido para teste inicial
n_jobs = 6  # MacBook Air M2 tem 8 núclos CPUs e 10 GPUs. Como uso sciktlearn, só posso usar CPUs. Teria que usar Pytorch ou TensorFlow para usar GPUs
scoring_metric = 'balanced_accuracy'



## Usando Otimização Bayesiana (Optuna)

In [None]:
from core.training.optuna_bayesian_optimization_training import OptunaBayesianOptimizationTraining

# Instanciar e executar o treinamento
training = OptunaBayesianOptimizationTraining()
trained_models = training.train_model(
    X_train=X_train,
    y_train=y_train,
    model_params=model_params,  
    selected_models=selected_models,
    selected_selectors=selected_selectors,
    n_iter=n_iter,
    cv=cv,
    scoring=scoring_metric,
    n_jobs=n_jobs
)

# Exemplo de acesso aos modelos treinados
for model_key, model_info in trained_models.items():
    print(f"Modelo: {model_key}")
    print(f"Melhores Hiperparâmetros: {model_info['hyperparameters']}")
    print(f"Resultado CV: {model_info['cv_result']}\n")


2024-11-28 11:14:47,516 | optuna_training | INFO | Training and evaluating Logistic Regression with Optuna Optimization and rfe
INFO:optuna_training:Training and evaluating Logistic Regression with Optuna Optimization and rfe


Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-28 20:47:36,103 | optuna_training | INFO | Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'lbfgs', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced', 'feature_selection__n_features_to_select': 49}
INFO:optuna_training:Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'lbfgs', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced', 'feature_selection__n_features_to_select': 49}
2024-11-28 20:47:36,108 | optuna_training | INFO | Best cross-validation score: 0.8115529753265603
INFO:optuna_training:Best cross-validation score: 0.8115529753265603
2024-11-28 20:47:36,110 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-28 20:47:36,115 | optuna

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-28 20:48:54,878 | optuna_training | INFO | Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'newton-cg', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced', 'feature_selection__n_components': 0.9741054013902074}
INFO:optuna_training:Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'newton-cg', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced', 'feature_selection__n_components': 0.9741054013902074}
2024-11-28 20:48:54,881 | optuna_training | INFO | Best cross-validation score: 0.8613497822931786
INFO:optuna_training:Best cross-validation score: 0.8613497822931786
2024-11-28 20:48:54,886 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-28 20:52:44,591 | optuna_training | INFO | Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'lbfgs', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced', 'feature_selection__max_features': 204, 'feature_selection__threshold': '0.5*mean'}
INFO:optuna_training:Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'lbfgs', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced', 'feature_selection__max_features': 204, 'feature_selection__threshold': '0.5*mean'}
2024-11-28 20:52:44,593 | optuna_training | INFO | Best cross-validation score: 0.9111030478955009
INFO:optuna_training:Best cross-validation score: 0.9111030478955009
2024-11-28 20:52:44,596 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combina

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-28 21:05:42,418 | optuna_training | INFO | Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'newton-cg', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced', 'feature_selection__k': 'all'}
INFO:optuna_training:Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'newton-cg', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced', 'feature_selection__k': 'all'}
2024-11-28 21:05:42,428 | optuna_training | INFO | Best cross-validation score: 0.9188026124818578
INFO:optuna_training:Best cross-validation score: 0.9188026124818578
2024-11-28 21:05:42,431 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-28 21:05:42,442 | optuna_training | INFO | Param

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-28 21:16:32,351 | optuna_training | INFO | Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'newton-cg', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced'}
INFO:optuna_training:Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'newton-cg', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced'}
2024-11-28 21:16:32,356 | optuna_training | INFO | Best cross-validation score: 0.9188026124818578
INFO:optuna_training:Best cross-validation score: 0.9188026124818578
2024-11-28 21:16:32,362 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-28 21:16:32,367 | optuna_training | INFO | Params: {'classifier__penalty': 'l2', 'classifier__C': 0.01, 'class

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-28 21:27:36,034 | optuna_training | INFO | Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'lbfgs', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced'}
INFO:optuna_training:Best parameters: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__solver': 'lbfgs', 'classifier__max_iter': 5000, 'classifier__multi_class': 'multinomial', 'classifier__class_weight': 'balanced'}
2024-11-28 21:27:36,039 | optuna_training | INFO | Best cross-validation score: 0.9188026124818578
INFO:optuna_training:Best cross-validation score: 0.9188026124818578
2024-11-28 21:27:36,046 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-28 21:27:36,053 | optuna_training | INFO | Params: {'classifier__penalty': 'l2', 'classifier__C': 10.0, 'classifier__s

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 07:18:13,710 | optuna_training | INFO | Best parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'feature_selection__n_features_to_select': 47}
INFO:optuna_training:Best parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'feature_selection__n_features_to_select': 47}
2024-11-29 07:18:13,714 | optuna_training | INFO | Best cross-validation score: 0.8767997097242379
INFO:optuna_training:Best cross-validation score: 0.8767997097242379
2024-11-29 07:18:13,715 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-29 07:18:13,717 | optuna_training | INFO | Params: {'classifier__c

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 07:18:45,844 | optuna_training | INFO | Best parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'log2', 'feature_selection__n_components': 0.8383765484094009}
INFO:optuna_training:Best parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'log2', 'feature_selection__n_components': 0.8383765484094009}
2024-11-29 07:18:45,847 | optuna_training | INFO | Best cross-validation score: 0.8498403483309144
INFO:optuna_training:Best cross-validation score: 0.8498403483309144
2024-11-29 07:18:45,851 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-29 07:18:45,855 | optuna_training | INFO | Params:

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 07:20:05,011 | optuna_training | INFO | Best parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'log2', 'feature_selection__max_features': 275, 'feature_selection__threshold': '0.5*mean'}
INFO:optuna_training:Best parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'log2', 'feature_selection__max_features': 275, 'feature_selection__threshold': '0.5*mean'}
2024-11-29 07:20:05,012 | optuna_training | INFO | Best cross-validation score: 0.8828447024673439
INFO:optuna_training:Best cross-validation score: 0.8828447024673439
2024-11-29 07:20:05,012 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 07:26:26,142 | optuna_training | INFO | Best parameters: {'classifier__criterion': 'gini', 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'feature_selection__k': 'all'}
INFO:optuna_training:Best parameters: {'classifier__criterion': 'gini', 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'feature_selection__k': 'all'}
2024-11-29 07:26:26,147 | optuna_training | INFO | Best cross-validation score: 0.8816545718432511
INFO:optuna_training:Best cross-validation score: 0.8816545718432511
2024-11-29 07:26:26,147 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-29 07:26:26,150 | optuna_training | INFO | Params: {'classifier__criterion': 'gini', 'classifier__max_depth'

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 07:26:41,440 | optuna_training | INFO | Best parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': 30, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'log2'}
INFO:optuna_training:Best parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': 30, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'log2'}
2024-11-29 07:26:41,441 | optuna_training | INFO | Best cross-validation score: 0.8773657474600871
INFO:optuna_training:Best cross-validation score: 0.8773657474600871
2024-11-29 07:26:41,442 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-29 07:26:41,447 | optuna_training | INFO | Params: {'classifier__criterion': 'gini', 'classifier__max_depth': 30, 'classifier__min_samples_split': 2, 'classifier__m

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 07:26:58,155 | optuna_training | INFO | Best parameters: {'classifier__criterion': 'gini', 'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt'}
INFO:optuna_training:Best parameters: {'classifier__criterion': 'gini', 'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt'}
2024-11-29 07:26:58,157 | optuna_training | INFO | Best cross-validation score: 0.8766037735849057
INFO:optuna_training:Best cross-validation score: 0.8766037735849057
2024-11-29 07:26:58,157 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-29 07:26:58,160 | optuna_training | INFO | Params: {'classifier__criterion': 'gini', 'classifier__max_depth': 10, 'classifier__min_samples_split': 2, 'classifier__min

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 18:01:38,166 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 100, 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced_subsample', 'classifier__bootstrap': True, 'classifier__criterion': 'gini', 'feature_selection__n_features_to_select': 24}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 100, 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced_subsample', 'classifier__bootstrap': True, 'classifier__criterion': 'gini', 'feature_selection__n_features_to_select': 24}
2024-11-29 18:01:38,170 | optuna_training | INFO | Best cross-validation score: 0.9394847605224964
INFO:optuna_training:Best cross-validation score: 0.9394847605224964
2024-11-29 18:01:38,170 | optuna_training | INFO | All hyperpara

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 18:08:08,530 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 300, 'classifier__max_depth': 30, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'log2', 'classifier__class_weight': 'balanced_subsample', 'classifier__bootstrap': True, 'classifier__criterion': 'gini', 'feature_selection__n_components': 0.9888019689509284}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 300, 'classifier__max_depth': 30, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'log2', 'classifier__class_weight': 'balanced_subsample', 'classifier__bootstrap': True, 'classifier__criterion': 'gini', 'feature_selection__n_components': 0.9888019689509284}
2024-11-29 18:08:08,534 | optuna_training | INFO | Best cross-validation score: 0.9444702467343976
INFO:optuna_training:Best cross-validation score: 0.9444702467343976
2024-11-29 18:08:08,535 | optuna_training | INFO

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 18:14:16,847 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 200, 'classifier__max_depth': 30, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced', 'classifier__bootstrap': True, 'classifier__criterion': 'entropy', 'feature_selection__max_features': 407, 'feature_selection__threshold': 'median'}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 200, 'classifier__max_depth': 30, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced', 'classifier__bootstrap': True, 'classifier__criterion': 'entropy', 'feature_selection__max_features': 407, 'feature_selection__threshold': 'median'}
2024-11-29 18:14:16,852 | optuna_training | INFO | Best cross-validation score: 0.9421698113207547
INFO:optuna_training:Best cross-validation score: 0.9421698113207547
2024-11-

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 18:24:00,689 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 300, 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced', 'classifier__bootstrap': True, 'classifier__criterion': 'gini', 'feature_selection__k': 'all'}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 300, 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced', 'classifier__bootstrap': True, 'classifier__criterion': 'gini', 'feature_selection__k': 'all'}
2024-11-29 18:24:00,694 | optuna_training | INFO | Best cross-validation score: 0.9391146589259798
INFO:optuna_training:Best cross-validation score: 0.9391146589259798
2024-11-29 18:24:00,694 | optuna_training | INFO | All hyperparameter combinations and their cross-validation result

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 18:27:38,789 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 200, 'classifier__max_depth': 30, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced_subsample', 'classifier__bootstrap': True, 'classifier__criterion': 'entropy'}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 200, 'classifier__max_depth': 30, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced_subsample', 'classifier__bootstrap': True, 'classifier__criterion': 'entropy'}
2024-11-29 18:27:38,796 | optuna_training | INFO | Best cross-validation score: 0.9414296081277215
INFO:optuna_training:Best cross-validation score: 0.9414296081277215
2024-11-29 18:27:38,796 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperpar

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-29 18:31:21,507 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 300, 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced', 'classifier__bootstrap': True, 'classifier__criterion': 'gini'}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 300, 'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__class_weight': 'balanced', 'classifier__bootstrap': True, 'classifier__criterion': 'gini'}
2024-11-29 18:31:21,514 | optuna_training | INFO | Best cross-validation score: 0.9417851959361393
INFO:optuna_training:Best cross-validation score: 0.9417851959361393
2024-11-29 18:31:21,514 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and th

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-30 05:00:08,567 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 200, 'classifier__learning_rate': 0.1, 'classifier__max_depth': 7, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 4, 'classifier__subsample': 0.8, 'feature_selection__n_features_to_select': 22}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 200, 'classifier__learning_rate': 0.1, 'classifier__max_depth': 7, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 4, 'classifier__subsample': 0.8, 'feature_selection__n_features_to_select': 22}
2024-11-30 05:00:08,574 | optuna_training | INFO | Best cross-validation score: 0.944833091436865
INFO:optuna_training:Best cross-validation score: 0.944833091436865
2024-11-30 05:00:08,574 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-30 05:00:08,577 | optuna

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-30 06:13:38,466 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 200, 'classifier__learning_rate': 0.1, 'classifier__max_depth': 7, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.8, 'feature_selection__n_components': 0.9794996286638358}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 200, 'classifier__learning_rate': 0.1, 'classifier__max_depth': 7, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.8, 'feature_selection__n_components': 0.9794996286638358}
2024-11-30 06:13:38,471 | optuna_training | INFO | Best cross-validation score: 0.9525036284470246
INFO:optuna_training:Best cross-validation score: 0.9525036284470246
2024-11-30 06:13:38,472 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-30 06:

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-30 08:30:42,448 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 300, 'classifier__learning_rate': 0.1, 'classifier__max_depth': 7, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.8, 'feature_selection__max_features': 404, 'feature_selection__threshold': '1.5*mean'}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 300, 'classifier__learning_rate': 0.1, 'classifier__max_depth': 7, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.8, 'feature_selection__max_features': 404, 'feature_selection__threshold': '1.5*mean'}
2024-11-30 08:30:42,455 | optuna_training | INFO | Best cross-validation score: 0.9456386066763427
INFO:optuna_training:Best cross-validation score: 0.9456386066763427
2024-11-30 08:30:42,455 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combi

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-30 10:20:23,836 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 300, 'classifier__learning_rate': 0.05, 'classifier__max_depth': 7, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.9, 'feature_selection__k': 'all'}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 300, 'classifier__learning_rate': 0.05, 'classifier__max_depth': 7, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.9, 'feature_selection__k': 'all'}
2024-11-30 10:20:23,847 | optuna_training | INFO | Best cross-validation score: 0.9444847605224964
INFO:optuna_training:Best cross-validation score: 0.9444847605224964
2024-11-30 10:20:23,848 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-30 10:20:23,850 | optuna_training | INFO | Params: {

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-30 13:40:29,447 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 300, 'classifier__learning_rate': 0.05, 'classifier__max_depth': 7, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.8}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 300, 'classifier__learning_rate': 0.05, 'classifier__max_depth': 7, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 2, 'classifier__subsample': 0.8}
2024-11-30 13:40:29,456 | optuna_training | INFO | Best cross-validation score: 0.9437082728592163
INFO:optuna_training:Best cross-validation score: 0.9437082728592163
2024-11-30 13:40:29,457 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-30 13:40:29,459 | optuna_training | INFO | Params: {'classifier__n_estimators': 300, 'classifier__learning_rate': 

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-11-30 16:42:34,138 | optuna_training | INFO | Best parameters: {'classifier__n_estimators': 300, 'classifier__learning_rate': 0.1, 'classifier__max_depth': 5, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 4, 'classifier__subsample': 0.9}
INFO:optuna_training:Best parameters: {'classifier__n_estimators': 300, 'classifier__learning_rate': 0.1, 'classifier__max_depth': 5, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 4, 'classifier__subsample': 0.9}
2024-11-30 16:42:34,148 | optuna_training | INFO | Best cross-validation score: 0.9433309143686504
INFO:optuna_training:Best cross-validation score: 0.9433309143686504
2024-11-30 16:42:34,148 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-11-30 16:42:34,150 | optuna_training | INFO | Params: {'classifier__n_estimators': 300, 'classifier__learning_rate': 0.

Inside OptunaBayesianOptimizationTraining.optimize_model


2024-12-01 20:26:17,129 | optuna_training | INFO | Best parameters: {'classifier__C': 10.0, 'classifier__kernel': 'linear', 'classifier__gamma': 0.01, 'classifier__class_weight': 'balanced', 'feature_selection__n_features_to_select': 49}
INFO:optuna_training:Best parameters: {'classifier__C': 10.0, 'classifier__kernel': 'linear', 'classifier__gamma': 0.01, 'classifier__class_weight': 'balanced', 'feature_selection__n_features_to_select': 49}
2024-12-01 20:26:17,139 | optuna_training | INFO | Best cross-validation score: 0.8563134978229318
INFO:optuna_training:Best cross-validation score: 0.8563134978229318
2024-12-01 20:26:17,140 | optuna_training | INFO | All hyperparameter combinations and their cross-validation results:
INFO:optuna_training:All hyperparameter combinations and their cross-validation results:
2024-12-01 20:26:17,142 | optuna_training | INFO | Params: {'classifier__C': 1.0, 'classifier__kernel': 'linear', 'classifier__gamma': 0.1, 'classifier__class_weight': 'balanced'

Inside OptunaBayesianOptimizationTraining.optimize_model


# Avaliação e logging

In [None]:
from utils import notebook_utils as nb_utils

# Avaliação dos Modelos
class_metrics_results, avg_metrics_results = nb_utils.evaluate_models(trained_models, X_train, y_train, X_test, y_test)

# Geração dos Relatórios
nb_utils.generate_reports(class_metrics_results, avg_metrics_results, filename_prefix="_Optuna_behavior_")

# Salvando os modelos em arquivos para recuperação
nb_utils.save_models(trained_models, filename_prefix="_Optuna_behavior_")