### Configuração do Ambiente

Para configurar o ambiente necessário para este notebook, execute os seguintes comandos no terminal:

```bash
conda create -n pytorch_env python=3.9 matplotlib seaborn pandas scikit-learn -y
conda activate pytorch_env
pip install torch torchvision ipykernel
python -m ipykernel install --user --name=pytorch_env --display-name "Python (pytorch_env)"
```

Após executar os comandos, selecione o kernel **Python (pytorch_env)** no menu **Kernel > Change Kernel** do Jupyter Notebook.

In [None]:
import pandas as pd
import numpy as np
import datetime

timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

# #definindo o path para executar no LOCAL
path = "/home/pedro/projetoDL/dataset/processado/"
log_path_tmp = "/home/pedro/projetoDL/log/torch/"
# definindo o path para log com timestamp
log_path = log_path_tmp + "/exp_" + timestamp + ""

In [None]:
import torch

if torch.cuda.is_available():
    print("GPU está conectada")
    num_gpus = torch.cuda.device_count()
    for i in range(num_gpus):
        print(f"Nome da GPU: {torch.cuda.get_device_name(i)}")
else:
    print("Não conectado a uma GPU")

In [None]:
Y = np.load(path + 'Y_train_NewApproach_Injected_v2.npz')
Y= Y.f.arr_0

X = np.load(path + 'X_train_NewApproach_Injected_v2.npz')
X = X.f.arr_0

In [None]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, average_precision_score
from scipy.stats import ks_2samp
#import scikitplot as skplt
import matplotlib.pyplot as plt
from numpy import interp

from scipy.stats import ks_2samp
from sklearn.metrics import roc_curve, auc


def extract_final_losses(history):
    """Função para extrair o melhor loss de treino e validação.

    Argumento(s):
    history -- Objeto retornado pela função fit do keras.

    Retorno:
    Dicionário contendo o melhor loss de treino e de validação baseado
    no menor loss de validação.
    """
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']
    idx_min_val_loss = np.argmin(val_loss)
    return {'train_loss': train_loss[idx_min_val_loss], 'val_loss': val_loss[idx_min_val_loss]}

def plot_training_error_curves(history):
    """Função para plotar as curvas de erro do treinamento da rede neural.

    Argumento(s):
    history -- Objeto retornado pela função fit do keras.

    Retorno:
    A função gera o gráfico do treino da rede e retorna None.
    """
    train_loss = history.history['loss']
    val_loss = history.history['val_loss']

    fig, ax = plt.subplots()
    ax.plot(train_loss, label='Train')
    ax.plot(val_loss, label='Validation')
    ax.set(title='Training and Validation Error Curves', xlabel='Epochs', ylabel='Loss (MSE)')
    ax.legend()
    plt.show()

def compute_performance_metrics(y, y_pred_class, y_pred_scores=None):
    accuracy = accuracy_score(y, y_pred_class)
    recall = recall_score(y, y_pred_class)
    precision = precision_score(y, y_pred_class)
    f1 = f1_score(y, y_pred_class)
    performance_metrics = (accuracy, recall, precision, f1)
    if y_pred_scores is not None:
        skplt.metrics.plot_ks_statistic(y, y_pred_scores)
        plt.savefig("ks_plot.png")
        plt.show()
        y_pred_scores = y_pred_scores[:, 1]
        auroc = roc_auc_score(y, y_pred_scores)
        aupr = average_precision_score(y, y_pred_scores)
        performance_metrics = performance_metrics + (auroc, aupr)
    return performance_metrics

def print_metrics_summary(accuracy, recall, precision, f1, auroc=None, aupr=None):
    print()
    print("{metric:<18}{value:.4f}".format(metric="Accuracy:", value=accuracy))
    print("{metric:<18}{value:.4f}".format(metric="Recall:", value=recall))
    print("{metric:<18}{value:.4f}".format(metric="Precision:", value=precision))
    print("{metric:<18}{value:.4f}".format(metric="F1:", value=f1))
    if auroc is not None:
        print("{metric:<18}{value:.4f}".format(metric="AUROC:", value=auroc))
    if aupr is not None:
        print("{metric:<18}{value:.4f}".format(metric="AUPR:", value=aupr))

In [None]:
import gc

# Garbage collector para liberar memória RAM.
gc.collect()

In [None]:
!export CUDA_LAUNCH_BLOCKING=1

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
import numpy as np
import os
import datetime
from tqdm import tqdm
import gc

# Configurações iniciais
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Normalização dos dados
def normalize_data(data):
    return (data - data.mean()) / data.std()

# Inicializando pesos (Xavier/Glorot)
def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.zeros_(m.bias)

# Definindo a rede neural
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.flatten = nn.Flatten()
        self.dropout1 = nn.Dropout(0.3)
        self.fc1 = nn.Linear(64 * 11 * 29, 64)
        self.dropout2 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x = self.pool1(self.bn1(torch.relu(self.conv1(x))))
        x = self.pool2(self.bn2(torch.relu(self.conv2(x))))
        x = self.flatten(x)
        x = self.dropout1(torch.relu(self.fc1(x)))
        x = self.dropout2(self.fc2(x))  # Sem Sigmoid; BCEWithLogitsLoss aplica internamente
        return x

# EarlyStopping
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None or val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

# Função para calcular métricas
def compute_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    accuracy = np.sum(np.diag(cm)) / np.sum(cm)
    return accuracy

In [None]:

# Configurações do KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

fold_no = 0
for train_idx, val_idx in kf.split(X, Y):
    fold_no += 1
    # Criando o diretório para salvar os arquivos
    save_path = os.path.join(log_path, f"{fold_no}_fold")
    os.makedirs(save_path, exist_ok=True)

    # Dados de treino e validação
    print("Fold:", fold_no)
    x_train, x_val = X[train_idx], X[val_idx]
    y_train, y_val = Y[train_idx], Y[val_idx]

    # Normalizando os dados
    # print("Normalizando os dados...")
    # x_train = normalize_data(x_train)
    # x_val = normalize_data(x_val)

    # Convertendo para tensores do PyTorch
    print("Convertendo para tensores do PyTorch...")
    x_train, y_train = torch.tensor(x_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)
    x_val, y_val = torch.tensor(x_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32)

    # Criando DataLoaders

    print("Criando DataLoaders...")
    batch_size = 256
    train_dataset = TensorDataset(x_train.unsqueeze(1), y_train)
    val_dataset = TensorDataset(x_val.unsqueeze(1), y_val)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Inicializando o modelo, loss e otimizador
    print("Inicializando o modelo, loss e otimizador...")
    model = ConvNet().to(device)
    model.apply(init_weights)  # Aplicando inicialização de pesos
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

    # Redução de LR e EarlyStopping
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, min_lr=1e-5, verbose=True)
    early_stopping = EarlyStopping(patience=10, min_delta=0.001)

    # Treinando o modelo
    print("Treinando o modelo...")
    num_epochs = 30
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        # Loop de treinamento com tqdm
        train_loop = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs} [Train]")
        for inputs, targets in train_loop:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # Acurácia de treino
            preds = (torch.sigmoid(outputs) > 0.5).float()
            correct_train += (preds.squeeze() == targets).sum().item()
            total_train += targets.size(0)

            # Atualizando tqdm com a perda e o learning rate
            train_loop.set_postfix(loss=loss.item(), lr=optimizer.param_groups[0]['lr'])

        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)
        train_accuracy = correct_train / total_train
        train_accuracies.append(train_accuracy)

        # Validação
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0
        y_preds, y_trues = [], []

        val_loop = tqdm(val_loader, desc=f"Epoch {epoch + 1}/{num_epochs} [Val]")
        with torch.no_grad():
            for inputs, targets in val_loop:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                val_loss += criterion(outputs.squeeze(), targets).item()

                # Acurácia de validação
                preds = (torch.sigmoid(outputs) > 0.5).float()
                correct_val += (preds.squeeze() == targets).sum().item()
                total_val += targets.size(0)

                y_preds.extend(outputs.cpu().numpy())
                y_trues.extend(targets.cpu().numpy())

        val_loss /= len(val_loader)
        val_losses.append(val_loss)
        val_accuracy = correct_val / total_val
        val_accuracies.append(val_accuracy)

        print(f"Epoch {epoch + 1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - "
              f"Train Acc: {train_accuracy:.4f} - Val Acc: {val_accuracy:.4f} - LR: {optimizer.param_groups[0]['lr']:.6f}")

        # Atualizando scheduler
        scheduler.step(val_loss)

        # Checando EarlyStopping
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered!")
            break

    # Avaliação
    y_pred_class = (np.array(y_preds) > 0.5).astype(int)
    accuracy = compute_metrics(y_trues, y_pred_class)

    # Salvando o modelo
    torch.save(model.state_dict(), os.path.join(save_path, f"fold_{fold_no}_model.pth"))

    # Liberação de memória
    del model, x_train, x_val, y_train, y_val, train_loader, val_loader
    gc.collect()
    torch.cuda.empty_cache()


In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from torch.utils.data import DataLoader, TensorDataset
# from sklearn.model_selection import KFold
# from sklearn.metrics import confusion_matrix
# import numpy as np
# import os
# import datetime
# import matplotlib.pyplot as plt
# from tqdm import tqdm
# import gc

# # Configurações iniciais
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")


# # Definindo a rede neural
# class ConvNet(nn.Module):
#     def __init__(self):
#         super(ConvNet, self).__init__()
#         self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2)
#         self.bn1 = nn.BatchNorm2d(32)
#         self.pool1 = nn.MaxPool2d(kernel_size=2)
#         self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2)
#         self.bn2 = nn.BatchNorm2d(64)
#         self.pool2 = nn.MaxPool2d(kernel_size=2)
#         self.flatten = nn.Flatten()
#         self.dropout1 = nn.Dropout(0.3)
#         self.fc1 = nn.Linear(64 * 11 * 29, 64)
#         self.dropout2 = nn.Dropout(0.3)
#         self.fc2 = nn.Linear(64, 1)

#     def forward(self, x):
#         x = self.pool1(self.bn1(torch.relu(self.conv1(x))))
#         x = self.pool2(self.bn2(torch.relu(self.conv2(x))))
#         x = self.flatten(x)
#         x = self.dropout1(torch.relu(self.fc1(x)))
#         x = self.dropout2(torch.sigmoid(self.fc2(x)))
#         return x

# # Função para calcular métricas
# def compute_metrics(y_true, y_pred):
#     cm = confusion_matrix(y_true, y_pred)
#     accuracy = np.sum(np.diag(cm)) / np.sum(cm)
#     return accuracy

# # Configurações do KFold
# kf = KFold(n_splits=5, shuffle=True, random_state=42)

# fold_no = 0
# for train_idx, val_idx in kf.split(X, Y):
#     fold_no += 1
#     # Criando o diretório para salvar os arquivos
#     save_path = os.path.join(log_path, f"{fold_no}_fold")
    
#     os.makedirs(save_path, exist_ok=True)

#     # Dados de treino e validação
#     x_train, x_val = X[train_idx], X[val_idx]
#     y_train, y_val = Y[train_idx], Y[val_idx]

#     # Convertendo para tensores do PyTorch
#     x_train, y_train = torch.tensor(x_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)
#     x_val, y_val = torch.tensor(x_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32)

#     # Criando DataLoaders
#     batch_size = 128
#     train_dataset = TensorDataset(x_train.unsqueeze(1), y_train)
#     val_dataset = TensorDataset(x_val.unsqueeze(1), y_val)
#     train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
#     val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

#     # Inicializando o modelo, loss e otimizador
#     model = ConvNet().to(device)
#     criterion = nn.BCEWithLogitsLoss()
#     optimizer = optim.Adam(model.parameters(), lr=0.001)

#     # Treinando o modelo
#     num_epochs = 30
#     train_losses, val_losses = [], []

#     for epoch in range(num_epochs):
#         model.train()
#         running_loss = 0.0

#         # Loop de treinamento com tqdm
#         train_loop = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs} [Train]")
#         for inputs, targets in train_loop:
#             inputs, targets = inputs.to(device), targets.to(device)
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = criterion(outputs.squeeze(), targets)
#             loss.backward()
#             optimizer.step()
#             running_loss += loss.item()
#             train_loop.set_postfix(loss=loss.item())

#         train_loss = running_loss / len(train_loader)
#         train_losses.append(train_loss)

#         # Validação
#         model.eval()
#         val_loss = 0.0
#         y_preds, y_trues = [], []

#         val_loop = tqdm(val_loader, desc=f"Epoch {epoch + 1}/{num_epochs} [Val]")
#         with torch.no_grad():
#             for inputs, targets in val_loop:
#                 inputs, targets = inputs.to(device), targets.to(device)
#                 outputs = model(inputs)
#                 val_loss += criterion(outputs.squeeze(), targets).item()
#                 y_preds.extend(outputs.cpu().numpy())
#                 y_trues.extend(targets.cpu().numpy())

#         val_loss /= len(val_loader)
#         val_losses.append(val_loss)

#         print(f"Epoch {epoch + 1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f}")

#     # Avaliação
#     y_pred_class = (np.array(y_preds) > 0.5).astype(int)
#     accuracy = compute_metrics(y_trues, y_pred_class)

#     # Salvando o modelo
#     torch.save(model.state_dict(), os.path.join(save_path, f"fold_{fold_no}_model.pth"))

#     # Plotando as curvas de perda
#     plt.figure(figsize=(8, 4))
#     plt.plot(train_losses, label='Train Loss')
#     plt.plot(val_losses, label='Validation Loss')
#     plt.title('Loss per Epoch')
#     plt.legend()
#     plt.savefig(os.path.join(save_path, "loss_curve.png"))
#     plt.close()

#     # Liberação de memória
#     del model, x_train, x_val, y_train, y_val, train_loader, val_loader
#     gc.collect()
#     torch.cuda.empty_cache()