In [None]:
import os
path = "/Users/patricia/Documents/code/python-code/behavior-detection/src"
os.chdir(path)  # Muda o diretório para o nível anterior (a raiz do projeto)
print(os.getcwd())  # Verifique se agora está na raiz


# Load data

In [30]:
from behavior.behavior_data_loader import BehaviorDataLoader

data_path = '../data/new_logs_labels.csv'

data_loader = BehaviorDataLoader(data_path, delimiter=';')
data_loader.load_data()

In [None]:
data_loader.data.head(5)

In [None]:
print("Valores da coluna 'comportamento' antes da remoção:", data_loader.data['comportamento'].value_counts())

# Remove instances where 'comportamento' is '?'
data_loader.remove_instances_with_value('comportamento', '?')

print("\nValores da coluna 'comportamento' depois da remoção:", data_loader.data['comportamento'].value_counts())

In [None]:
data = data_loader.get_data()
data.head(5)

In [36]:
## Select a subset of the data only for testing purposes

# Selecionar um subconjunto dos dados
subset_data = data.sample(n=40, random_state=42)

# Opcionalmente, redefina os índices
subset_data.reset_index(drop=True, inplace=True)


# Pre-processing

## Remove unnecessary columns

In [37]:
from core.preprocessors.column_remover import ColumnRemover

column_remover = ColumnRemover()
columns_to_remove = ['id_log', 'aluno', 'grupo', 'num_dia', 'num_log']
column_remover.set_columns_to_remove(columns_to_remove, data)
cleaned_data = column_remover.remove_columns(data)


In [None]:
cleaned_data.head(5)

In [39]:
# Preenche valores ausentes no DataFrame X com a string 'missing'.

cleaned_data = cleaned_data.fillna('missing')

## Split data into x and y

In [40]:
from core.preprocessors.data_preprocessor import DataPreprocessor
X, y = DataPreprocessor.split_data(cleaned_data, "comportamento")

In [None]:
print(y[:5])

## Encoding variables

In [None]:
# Encoding variables
from behavior.behavior_data_preprocessor import BehaviorDataPreprocessor
encoded_y = BehaviorDataPreprocessor.encode_y(y)
preprocessor = BehaviorDataPreprocessor(X)
preprocessor.preprocess()
encoded_X = preprocessor.get_preprocessed_X()

# Balanceamento dos dados

In [44]:
# Balance data
from core.preprocessors.data_balancer import DataBalancer
import pandas as pd
encoded_y_series = pd.Series(encoded_y, name="comportamento")
data_balancer = DataBalancer()
X_balanced, y_balanced = data_balancer.apply_smote(encoded_X, encoded_y_series)

In [None]:
from collections import Counter

print(f"Original dataset shape: {Counter(y)}")
print(f"Resampled dataset shape: {Counter(y_balanced)}")

## Split data into training and test datasets

In [46]:
from sklearn.model_selection import train_test_split

test_size = 0.2  # 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=test_size, random_state=42)


In [None]:
from collections import Counter

print("Nro de instancias de cada classe em y_train:\n")
print(Counter(y_train))
print("\n\nNro de instancias de cada classe em y_test:\n")
print(Counter(y_test))

# Treinamento dos Modelos usando Otimização Bayesiana (BayesSearchCV)

In [None]:
from core.training.bayesian_optimization_training import BayesianOptimizationTraining
cv = 10
n_iter = 100
n_jobs = 4  # Number of processors to be used in the execution: -1 to use all processors

# Choose a scoring metric
scoring_metric = 'roc_auc_ovr'  # Possible values: 'f1_macro', 'balanced_accuracy', 'roc_auc_ovr', etc.

training = BayesianOptimizationTraining()

trained_models = training.train_model(
    X_train, y_train, n_iter=n_iter, cv=cv, scoring=scoring_metric, n_jobs=n_jobs
)

# Avaliação dos Modelos

In [None]:
from core.evaluation.evaluation import Evaluation  

feature_names = X_train.columns  # Assumindo que os nomes das características são as colunas
class_metrics_results, avg_metrics_results = Evaluation.evaluate_all_models(trained_models, X_train, y_train, X_test, y_test, feature_names)

# Geração dos Relatórios

In [None]:
from core.logging.report_formatter import ReportFormatter
from core.logging.file_manager import FileManager

directory = "../output/"

# Gerar relatório textual a partir dos resultados de avaliação
text_report = ReportFormatter.generate_text_report(class_metrics_results, avg_metrics_results)

# Imprimir ou salvar o relatório
FileManager.save_file_with_timestamp(text_report, "bayesian_optimization_report.txt", directory)

# Gerar DataFrame detalhado dos relatórios por classe
class_report_df = ReportFormatter.generate_class_report_dataframe(class_metrics_results)

# Gerar DataFrame resumido dos relatórios de métricas médias
avg_metrics_report_df = ReportFormatter.generate_avg_metrics_report_dataframe(avg_metrics_results)

# Salvar os DataFrames como arquivos CSV, se necessário
FileManager.save_csv_file_with_timestamp(class_report_df, "class_report.csv", directory)
FileManager.save_csv_file_with_timestamp(avg_metrics_report_df, "avg_metrics_report.csv", directory)


# Salvando os modelos em arquivos para recuperação

In [None]:
from core.logging.model_manager import ModelManager

# Caminhos
model_dir = "../models/"

# Salvar todos os modelos
saved_models = ModelManager.save_all_models(trained_models, model_dir)
print("Modelos salvos:", saved_models)