In [1]:
import os, glob
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torchvision
from torchvision.models import resnet50
from torchvision.transforms import transforms
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score, matthews_corrcoef, balanced_accuracy_score, roc_auc_score
import warnings
import plotly.graph_objects as go
import plotly.io as pio

In [2]:
warnings.filterwarnings('ignore') 

random_seed = 42
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)

In [3]:
device = torch.device("cuda:0")
main_dir = '/s/project/gene2bird/groupA/'
data_dir = main_dir + 'data'
metrics_dir = main_dir + 'cnn_results'
data_files = glob.glob(os.path.join(data_dir, '*.csv'))

### Create Folder

In [4]:
def create_folder(path):
    if not os.path.exists(path):
        os.makedirs(path)
        print('{} successfully created'.format(path))
    else:
        print('{} exists'.format(path))

### Data Splitting

In [5]:
def split_data(file, save_dir): 
    data = pd.read_csv(file)

    train_data, test_data = train_test_split(data, test_size=0.2, random_state=random_seed)
    train_data, valid_data = train_test_split(train_data, test_size=0.2, random_state=random_seed)
    
    train_data.to_csv(save_dir + '/train.csv', index=False)
    valid_data.to_csv(save_dir + '/val.csv', index=False)
    test_data.to_csv(save_dir + '/test.csv', index=False)
    
    return train_data, valid_data, test_data

### Data Loader

In [6]:
class GeneDataset(Dataset):
    def __init__(self, data):
        self.data = pd.read_csv(data)
        # self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        embed = np.array(row[0:512].values)
        embed=np.vstack(embed).astype(np.float32)        
        embeddings = torch.from_numpy(embed)
        labels = torch.tensor(np.array(row[512]), dtype=torch.float32)
        
        return embeddings, labels

### Set Dataloaders

In [7]:
def set_loaders(file, batch_size, save_dir, drop_last=False):
    split_data(file, save_dir) #splits into train-val-test and save as .csv file
    
    # train_file = pd.read_csv(save_dir + '/train.csv')
    # val_file = pd.read_csv(save_dir + '/val.csv')
    # test_file = pd.read_csv(save_dir + '/test.csv')
    
    tr_dataset = GeneDataset(save_dir + '/train.csv')
    val_dataset = GeneDataset(save_dir + '/val.csv')
    test_dataset = GeneDataset(save_dir + '/test.csv')
    
    train_loader = DataLoader(tr_dataset, batch_size=batch_size, shuffle=True, drop_last=drop_last)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, drop_last=drop_last)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=drop_last)
    
    return train_loader, val_loader, test_loader

### Training

In [5]:
def train(model, optimizer, criterion, batch_size, num_epochs, model_name, drop_last, file, sub_dir_metrics, sub_dir_data):

    train_loader, val_loader, test_loader = set_loaders(file, batch_size, sub_dir_data, drop_last=drop_last) #file is path of the .csv file

    print('\nThe training is starting using the {} \n'.format(fname))    
    train_losses = []
    valid_losses = []

    train_metrics = []
    valid_metrics = []

    best_loss = float('inf')


    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0
        y_true_train = []
        y_pred_train = []


        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            inputs = inputs.unsqueeze(2)
            inputs = inputs.permute((0, 2, 1, 3))

            # Forward pass
            outputs = model(inputs)
            # print('model output', outputs.shape)
            loss = criterion(outputs.squeeze(), labels.float())

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * inputs.size(0)
            train_total += inputs.size(0)
            y_true_train.extend(labels.tolist())
            y_pred_train.extend((outputs > 0.5).tolist())

        train_loss /= train_total
        train_acc = accuracy_score(y_true_train, y_pred_train)
        train_balanced_acc = balanced_accuracy_score(y_true_train, y_pred_train)
        train_precision = precision_score(y_true_train, y_pred_train)
        train_recall = recall_score(y_true_train, y_pred_train)
        train_f1 = f1_score(y_true_train, y_pred_train)
        train_roc_auc = roc_auc_score(y_true_train, y_pred_train)
        train_mcc = matthews_corrcoef(y_true_train, y_pred_train)

        model.eval()
        valid_loss = 0
        valid_correct = 0
        valid_total = 0
        y_true_valid = []
        y_pred_valid = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                inputs = inputs.unsqueeze(2)
                inputs = inputs.permute((0, 2, 1, 3))

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels.float())

                valid_loss += loss.item() * inputs.size(0)
                # valid_correct += torch.sum((outputs > 0.5).squeeze() == labels.byte()).item()
                valid_total += inputs.size(0)
                y_true_valid.extend(labels.tolist())
                y_pred_valid.extend((outputs > 0.5).tolist())

        valid_loss /= valid_total
        valid_acc = accuracy_score(y_true_valid, y_pred_valid)
        valid_balanced_acc = balanced_accuracy_score(y_true_valid, y_pred_valid)
        valid_precision = precision_score(y_true_valid, y_pred_valid)
        valid_recall = recall_score(y_true_valid, y_pred_valid)
        valid_f1 = f1_score(y_true_valid, y_pred_valid)
        valid_roc_auc = roc_auc_score(y_true_valid, y_pred_valid)
        valid_mcc = matthews_corrcoef(y_true_valid, y_pred_valid)


        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        train_metrics.append([train_acc, train_balanced_acc, train_precision, train_recall, train_f1, train_roc_auc, train_mcc])
        valid_metrics.append([valid_acc, valid_balanced_acc, valid_precision, valid_recall, valid_f1, valid_roc_auc, valid_mcc])

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, "
        f"Valid Loss: {valid_loss:.4f}, Train Acc: {train_acc:.4f}, Train Balanced Acc: {train_balanced_acc:.4f}, Valid Acc: {valid_acc:.4f}, Valid Balanced Acc: {valid_balanced_acc:.4f}")

        # Save the best model based on validation loss
        if valid_loss < best_loss:
            best_loss = valid_loss
            # torch.save(model.state_dict(), best_model_path)   
            torch.save(model, best_model_path)




    tr_metric = sub_dir_metrics + '/' + model_name + '_train_metrics.npy'
    val_metric = sub_dir_metrics + '/' + model_name + '_val_metrics.npy'
    np.save(tr_metric, np.array(train_metrics), allow_pickle=True)
    np.save(val_metric, np.array(valid_metrics), allow_pickle=True)

    tr_loss = sub_dir_metrics + '/' + model_name + '_train_loss.npy'
    val_loss = sub_dir_metrics + '/' + model_name + '_val_loss.npy'         
    np.save(tr_loss, np.array(train_losses), allow_pickle=True)
    np.save(val_loss, np.array(valid_losses), allow_pickle=True)

        


In [9]:
def test(batch_size, model_name, drop_last):
    
    for file in data_files:
        fname = file.split('/')[-1] #CORIN enzyme.csv
        protein_name = fname.split(' ')[0] #CORIN

        sub_dir_data = metrics_dir + '/' + protein_name + '/data'
        
        _, _, test_loader = set_loaders(file, batch_size, sub_dir_data, drop_last) #file is path of the .csv file

        sub_dir_metrics = metrics_dir + '/' + protein_name + '/metrics'
        sub_dir_model = metrics_dir + '/' + protein_name + '/model'
        
        best_model_path = sub_dir_model + '/' + model_name + '_' + 'best_model.pt'
        model = torch.load(best_model_path).to(device)

        print('\nThe testing is starting using the {} \n'.format(fname))    
        
        test_metrics = []
        y_true_test = []
        y_pred_test = []

        test_correct = 0
        test_total = 0        
        
        model.eval()
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                inputs = inputs.unsqueeze(2)
                inputs = inputs.permute((0, 2, 1, 3))

                # Forward pass
                outputs = model(inputs)

                test_total += inputs.size(0)
                y_true_test.extend(labels.tolist())
                y_pred_test.extend((outputs > 0.5).tolist())

    
        test_acc = accuracy_score(y_true_test, y_pred_test)
        test_balanced_acc = balanced_accuracy_score(y_true_test, y_pred_test)
        test_precision = precision_score(y_true_test, y_pred_test)
        test_recall = recall_score(y_true_test, y_pred_test)
        test_f1 = f1_score(y_true_test, y_pred_test)
        test_roc_auc = roc_auc_score(y_true_test, y_pred_test)
        test_mcc = matthews_corrcoef(y_true_test, y_pred_test)
                
        test_metrics.append([test_acc, test_balanced_acc, test_precision, test_recall, test_f1, test_roc_auc, test_mcc])
        
        test_metric = sub_dir_metrics + '/' + model_name + '_test_metrics.npy'
        np.save(test_metric, np.array(test_metrics), allow_pickle=True)

        print(f"Test Acc: {test_acc:.4f}, Test Balanced Acc: {test_balanced_acc:.4f}")


### Baseline Model

In [13]:
class BaselineModel(nn.Module):
    def __init__(self):
        super(BaselineModel, self).__init__()
       
        self.conv1 = nn.Conv2d(1, 8, 5, 2, 4)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(8, 64, 5,2,4)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 5,2,4)
        self.fc1 = nn.Linear(2176, 1024)
        self.fc2 = nn.Linear(1024, 128)
        self.fc3 = nn.Linear(128, 1)
        
        

    def forward(self, x):

        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.reshape(8, 2176)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

### Training - Baseline

In [15]:
learning_rate = 0.001
num_epochs = 75
batch_size = 8
criterion_baseline = nn.BCEWithLogitsLoss()

model_name = 'baseline'

for file in data_files:
    fname = file.split('/')[-1] #CORIN enzyme.csv
    protein_name = fname.split(' ')[0] #CORIN

    sub_dir_data = metrics_dir + '/' + protein_name + '/data'
    create_folder(sub_dir_data)

    sub_dir_metrics = metrics_dir + '/' + protein_name + '/metrics'
    create_folder(sub_dir_metrics)

    sub_dir_model = metrics_dir + '/' + protein_name + '/model'
    create_folder(sub_dir_model)

    best_model_path = sub_dir_model + '/' + model_name + '_' + 'best_model.pt'
    
    model_baseline = BaselineModel().to(device)
    optimizer_baseline = optim.Adam(model_baseline.parameters(), lr=learning_rate)
    # optimizer_baseline = optim.SGD(model_baseline.parameters(), lr=learning_rate, momentum=0.9)
    train(model_baseline, optimizer_baseline, criterion_baseline, batch_size, num_epochs, model_name='baseline', drop_last=True, file=file, sub_dir_metrics=sub_dir_metrics, sub_dir_data=sub_dir_data)

/s/project/gene2bird/groupA/cnn_results/CORIN/data exists
/s/project/gene2bird/groupA/cnn_results/CORIN/metrics exists
/s/project/gene2bird/groupA/cnn_results/CORIN/model exists

The training is starting using the CORIN enzyme.csv 

Epoch [1/75], Train Loss: 0.6758, Valid Loss: 0.6669, Train Acc: 0.4375, Train Balanced Acc: 0.4781, Valid Acc: 0.3250, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.6477, Valid Loss: 0.7115, Train Acc: 0.4625, Train Balanced Acc: 0.5094, Valid Acc: 0.6750, Valid Balanced Acc: 0.5000
Epoch [3/75], Train Loss: 0.6042, Valid Loss: 0.6320, Train Acc: 0.7312, Train Balanced Acc: 0.4958, Valid Acc: 0.6750, Valid Balanced Acc: 0.5000
Epoch [4/75], Train Loss: 0.5975, Valid Loss: 0.6449, Train Acc: 0.7250, Train Balanced Acc: 0.5000, Valid Acc: 0.6750, Valid Balanced Acc: 0.5000
Epoch [5/75], Train Loss: 0.5854, Valid Loss: 0.6306, Train Acc: 0.7312, Train Balanced Acc: 0.5000, Valid Acc: 0.6750, Valid Balanced Acc: 0.5000
Epoch [6/75], Train Loss: 0.5857

In [16]:
test(batch_size=8, model_name='baseline', drop_last=True)


The testing is starting using the CORIN enzyme.csv 

Test Acc: 0.7292, Test Balanced Acc: 0.5000

The testing is starting using the SFRP1 protein.csv 

Test Acc: 0.5625, Test Balanced Acc: 0.5367

The testing is starting using the CLCN7 protein.csv 

Test Acc: 0.6042, Test Balanced Acc: 0.5000

The testing is starting using the ATP7A ATPase.csv 

Test Acc: 0.8333, Test Balanced Acc: 0.5000

The testing is starting using the NR4A3 protein.csv 

Test Acc: 0.8125, Test Balanced Acc: 0.5000

The testing is starting using the HPGDS synthase.csv 

Test Acc: 0.6458, Test Balanced Acc: 0.5000

The testing is starting using the TYRP1 oxidase.csv 

Test Acc: 0.4167, Test Balanced Acc: 0.4421

The testing is starting using the ASIP protein.csv 

Test Acc: 0.5833, Test Balanced Acc: 0.5000

The testing is starting using the GPM6B protein.csv 

Test Acc: 0.8333, Test Balanced Acc: 0.5000

The testing is starting using the ATP7B ATPase.csv 

Test Acc: 0.8125, Test Balanced Acc: 0.5000

The testing 

### Training - ResNet50

In [20]:
learning_rate = 0.001
num_epochs = 75
batch_size = 8
criterion_resnet50 = nn.BCEWithLogitsLoss()

model_name = 'resnet50'

for file in data_files:
    fname = file.split('/')[-1] #CORIN enzyme.csv
    protein_name = fname.split(' ')[0] #CORIN

    sub_dir_data = metrics_dir + '/' + protein_name + '/data'
    create_folder(sub_dir_data)

    sub_dir_metrics = metrics_dir + '/' + protein_name + '/metrics'
    create_folder(sub_dir_metrics)

    sub_dir_model = metrics_dir + '/' + protein_name + '/model'
    create_folder(sub_dir_model)

    best_model_path = sub_dir_model + '/' + model_name + '_' + 'best_model.pt'
    
    model_resnet50 = resnet50(pretrained=True)
    num_features = model_resnet50.fc.in_features
    model_resnet50.conv1 = nn.Conv2d(1, 64, 7, 2, 3, bias=False)
    for param in model_resnet50.parameters():
        param.requires_grad = False

    model_resnet50.fc = nn.Linear(num_features, 1)
    model_resnet50 = model_resnet50.to(device)

    optimizer_resnet50 = optim.SGD(model_resnet50.fc.parameters(), lr=learning_rate, momentum=0.9)
    # optimizer_resnet50 = optim.Adam(model_resnet50.fc.parameters(), lr=learning_rate)
    train(model_resnet50, optimizer_resnet50, criterion_resnet50, batch_size, num_epochs, model_name='resnet50', drop_last=False, file=file, sub_dir_metrics=sub_dir_metrics, sub_dir_data=sub_dir_data)

/s/project/gene2bird/groupA/cnn_results/CORIN/data exists
/s/project/gene2bird/groupA/cnn_results/CORIN/metrics exists
/s/project/gene2bird/groupA/cnn_results/CORIN/model exists

The training is starting using the CORIN enzyme.csv 

Epoch [1/75], Train Loss: 0.6113, Valid Loss: 1.8421, Train Acc: 0.6402, Train Balanced Acc: 0.5239, Valid Acc: 0.6667, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.5940, Valid Loss: 0.8848, Train Acc: 0.6646, Train Balanced Acc: 0.4902, Valid Acc: 0.6667, Valid Balanced Acc: 0.5000
Epoch [3/75], Train Loss: 0.6025, Valid Loss: 0.6631, Train Acc: 0.6402, Train Balanced Acc: 0.5455, Valid Acc: 0.4286, Valid Balanced Acc: 0.4821
Epoch [4/75], Train Loss: 0.6168, Valid Loss: 0.6414, Train Acc: 0.5854, Train Balanced Acc: 0.4864, Valid Acc: 0.6429, Valid Balanced Acc: 0.5179
Epoch [5/75], Train Loss: 0.5415, Valid Loss: 0.6928, Train Acc: 0.7439, Train Balanced Acc: 0.5227, Valid Acc: 0.6667, Valid Balanced Acc: 0.5000
Epoch [6/75], Train Loss: 0.5290

In [21]:
test(batch_size=8, model_name='resnet50', drop_last=False)


The testing is starting using the CORIN enzyme.csv 

Test Acc: 0.6731, Test Balanced Acc: 0.5769

The testing is starting using the SFRP1 protein.csv 

Test Acc: 0.3846, Test Balanced Acc: 0.3869

The testing is starting using the CLCN7 protein.csv 

Test Acc: 0.5962, Test Balanced Acc: 0.5227

The testing is starting using the ATP7A ATPase.csv 

Test Acc: 0.5769, Test Balanced Acc: 0.3488

The testing is starting using the NR4A3 protein.csv 

Test Acc: 0.8269, Test Balanced Acc: 0.5000

The testing is starting using the HPGDS synthase.csv 

Test Acc: 0.6538, Test Balanced Acc: 0.5000

The testing is starting using the TYRP1 oxidase.csv 

Test Acc: 0.5385, Test Balanced Acc: 0.4242

The testing is starting using the ASIP protein.csv 

Test Acc: 0.5962, Test Balanced Acc: 0.5000

The testing is starting using the GPM6B protein.csv 

Test Acc: 0.6538, Test Balanced Acc: 0.4393

The testing is starting using the ATP7B ATPase.csv 

Test Acc: 0.8077, Test Balanced Acc: 0.5000

The testing 

### Training - ResNet18

In [22]:
learning_rate = 0.001
num_epochs = 75
batch_size = 8
criterion_resnet18 = nn.BCEWithLogitsLoss()

model_name = 'resnet18'

for file in data_files:
    fname = file.split('/')[-1] #CORIN enzyme.csv
    protein_name = fname.split(' ')[0] #CORIN

    sub_dir_data = metrics_dir + '/' + protein_name + '/data'
    create_folder(sub_dir_data)

    sub_dir_metrics = metrics_dir + '/' + protein_name + '/metrics'
    create_folder(sub_dir_metrics)

    sub_dir_model = metrics_dir + '/' + protein_name + '/model'
    create_folder(sub_dir_model)

    best_model_path = sub_dir_model + '/' + model_name + '_' + 'best_model.pt'
    
    model_resnet18 = torchvision.models.resnet18(pretrained=True)
    num_features = model_resnet18.fc.in_features
    model_resnet18.conv1 = nn.Conv2d(1, 64, 7, 2, 3, bias=False)
    for param in model_resnet18.parameters():
        param.requires_grad = False

    model_resnet18.fc = nn.Linear(num_features, 1)
    model_resnet18 = model_resnet18.to(device)

    optimizer_resnet18 = optim.SGD(model_resnet18.fc.parameters(), lr=learning_rate, momentum=0.9)
    # optimizer_resnet18 = optim.Adam(model_resnet18.fc.parameters(), lr=learning_rate)
    train(model_resnet18, optimizer_resnet18, criterion_resnet18, batch_size, num_epochs, model_name='resnet18', drop_last=False, file=file, sub_dir_metrics=sub_dir_metrics, sub_dir_data=sub_dir_data)

/s/project/gene2bird/groupA/cnn_results/CORIN/data exists
/s/project/gene2bird/groupA/cnn_results/CORIN/metrics exists
/s/project/gene2bird/groupA/cnn_results/CORIN/model exists

The training is starting using the CORIN enzyme.csv 

Epoch [1/75], Train Loss: 0.6678, Valid Loss: 0.6859, Train Acc: 0.6402, Train Balanced Acc: 0.5239, Valid Acc: 0.3333, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.5973, Valid Loss: 0.6415, Train Acc: 0.6646, Train Balanced Acc: 0.5117, Valid Acc: 0.6667, Valid Balanced Acc: 0.5893
Epoch [3/75], Train Loss: 0.5654, Valid Loss: 0.7468, Train Acc: 0.6585, Train Balanced Acc: 0.5364, Valid Acc: 0.6667, Valid Balanced Acc: 0.5000
Epoch [4/75], Train Loss: 0.5695, Valid Loss: 0.6642, Train Acc: 0.7195, Train Balanced Acc: 0.5564, Valid Acc: 0.6429, Valid Balanced Acc: 0.4821
Epoch [5/75], Train Loss: 0.5293, Valid Loss: 0.6756, Train Acc: 0.7561, Train Balanced Acc: 0.6030, Valid Acc: 0.6429, Valid Balanced Acc: 0.5000
Epoch [6/75], Train Loss: 0.5203

In [23]:
test(batch_size=8, model_name='resnet18', drop_last=False)


The testing is starting using the CORIN enzyme.csv 

Test Acc: 0.5192, Test Balanced Acc: 0.4744

The testing is starting using the SFRP1 protein.csv 

Test Acc: 0.5000, Test Balanced Acc: 0.5238

The testing is starting using the CLCN7 protein.csv 

Test Acc: 0.4231, Test Balanced Acc: 0.5000

The testing is starting using the ATP7A ATPase.csv 

Test Acc: 0.8077, Test Balanced Acc: 0.4884

The testing is starting using the NR4A3 protein.csv 

Test Acc: 0.8077, Test Balanced Acc: 0.4884

The testing is starting using the HPGDS synthase.csv 

Test Acc: 0.6538, Test Balanced Acc: 0.5000

The testing is starting using the TYRP1 oxidase.csv 

Test Acc: 0.6346, Test Balanced Acc: 0.5000

The testing is starting using the ASIP protein.csv 

Test Acc: 0.5577, Test Balanced Acc: 0.5061

The testing is starting using the GPM6B protein.csv 

Test Acc: 0.8462, Test Balanced Acc: 0.5556

The testing is starting using the ATP7B ATPase.csv 

Test Acc: 0.7692, Test Balanced Acc: 0.4762

The testing 

### Training - ResNeSt

In [25]:
learning_rate = 0.001
num_epochs = 75
batch_size = 8
criterion_resnest = nn.BCEWithLogitsLoss()

model_name = 'resnest'

for file in data_files:
    fname = file.split('/')[-1] #CORIN enzyme.csv
    protein_name = fname.split(' ')[0] #CORIN

    sub_dir_data = metrics_dir + '/' + protein_name + '/data'
    create_folder(sub_dir_data)

    sub_dir_metrics = metrics_dir + '/' + protein_name + '/metrics'
    create_folder(sub_dir_metrics)

    sub_dir_model = metrics_dir + '/' + protein_name + '/model'
    create_folder(sub_dir_model)

    best_model_path = sub_dir_model + '/' + model_name + '_' + 'best_model.pt'
    
    # torch.hub.list('zhanghang1989/ResNeSt', force_reload=True)
    model_resnest = torch.hub.load('zhanghang1989/ResNeSt', 'resnest50', pretrained=True)
    num_features = model_resnest.fc.in_features

    model_resnest.conv1 = nn.Conv2d(1, 64, 7, 2, 3, bias=False)
    for param in model_resnest.parameters():
        param.requires_grad = False

    model_resnest.fc = nn.Linear(num_features, 1)
    model_resnest = model_resnest.to(device)

    # optimizer_resnest = optim.SGD(model_resnest.fc.parameters(), lr=learning_rate, momentum=0.9)
    optimizer_resnest = optim.Adam(model_resnest.fc.parameters(), lr=learning_rate)
    train(model_resnest, optimizer_resnest, criterion_resnest, batch_size, num_epochs, model_name='resnest', drop_last=False, file=file, sub_dir_metrics=sub_dir_metrics, sub_dir_data=sub_dir_data)

/s/project/gene2bird/groupA/cnn_results/CORIN/data exists
/s/project/gene2bird/groupA/cnn_results/CORIN/metrics exists
/s/project/gene2bird/groupA/cnn_results/CORIN/model exists


Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the CORIN enzyme.csv 

Epoch [1/75], Train Loss: 0.6382, Valid Loss: 0.6559, Train Acc: 0.6585, Train Balanced Acc: 0.5076, Valid Acc: 0.3333, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.6001, Valid Loss: 0.6645, Train Acc: 0.7317, Train Balanced Acc: 0.5360, Valid Acc: 0.3333, Valid Balanced Acc: 0.5000
Epoch [3/75], Train Loss: 0.5721, Valid Loss: 0.8849, Train Acc: 0.7195, Train Balanced Acc: 0.5708, Valid Acc: 0.6905, Valid Balanced Acc: 0.5357
Epoch [4/75], Train Loss: 0.5583, Valid Loss: 0.6686, Train Acc: 0.7073, Train Balanced Acc: 0.6129, Valid Acc: 0.6429, Valid Balanced Acc: 0.4821
Epoch [5/75], Train Loss: 0.5328, Valid Loss: 0.6333, Train Acc: 0.7134, Train Balanced Acc: 0.6242, Valid Acc: 0.5714, Valid Balanced Acc: 0.4464
Epoch [6/75], Train Loss: 0.5287, Valid Loss: 0.6025, Train Acc: 0.7500, Train Balanced Acc: 0.5485, Valid Acc: 0.6667, Valid Balanced Acc: 0.5179
Epoch [7/75], Train Loss: 0.5147, Valid Loss: 0.5880, Train Acc

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the SFRP1 protein.csv 

Epoch [1/75], Train Loss: 0.7094, Valid Loss: 0.6775, Train Acc: 0.4573, Train Balanced Acc: 0.4938, Valid Acc: 0.3095, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.6638, Valid Loss: 0.7211, Train Acc: 0.5366, Train Balanced Acc: 0.5538, Valid Acc: 0.3095, Valid Balanced Acc: 0.5000
Epoch [3/75], Train Loss: 0.6425, Valid Loss: 1.3567, Train Acc: 0.6707, Train Balanced Acc: 0.6653, Valid Acc: 0.3095, Valid Balanced Acc: 0.5000
Epoch [4/75], Train Loss: 0.6535, Valid Loss: 0.6384, Train Acc: 0.5183, Train Balanced Acc: 0.5707, Valid Acc: 0.5000, Valid Balanced Acc: 0.5743
Epoch [5/75], Train Loss: 0.6230, Valid Loss: 0.8410, Train Acc: 0.7073, Train Balanced Acc: 0.6954, Valid Acc: 0.3095, Valid Balanced Acc: 0.4788
Epoch [6/75], Train Loss: 0.6018, Valid Loss: 0.6938, Train Acc: 0.5915, Train Balanced Acc: 0.6236, Valid Acc: 0.4048, Valid Balanced Acc: 0.3992
Epoch [7/75], Train Loss: 0.5875, Valid Loss: 0.7879, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the CLCN7 protein.csv 

Epoch [1/75], Train Loss: 0.6594, Valid Loss: 0.6936, Train Acc: 0.6341, Train Balanced Acc: 0.5123, Valid Acc: 0.3810, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.6107, Valid Loss: 0.6876, Train Acc: 0.5793, Train Balanced Acc: 0.5739, Valid Acc: 0.4048, Valid Balanced Acc: 0.5192
Epoch [3/75], Train Loss: 0.5839, Valid Loss: 0.7284, Train Acc: 0.7378, Train Balanced Acc: 0.6430, Valid Acc: 0.6429, Valid Balanced Acc: 0.5433
Epoch [4/75], Train Loss: 0.5974, Valid Loss: 0.6867, Train Acc: 0.6768, Train Balanced Acc: 0.5654, Valid Acc: 0.5000, Valid Balanced Acc: 0.4880
Epoch [5/75], Train Loss: 0.5535, Valid Loss: 0.6778, Train Acc: 0.7378, Train Balanced Acc: 0.7047, Valid Acc: 0.5714, Valid Balanced Acc: 0.4976
Epoch [6/75], Train Loss: 0.5815, Valid Loss: 0.7531, Train Acc: 0.6341, Train Balanced Acc: 0.5628, Valid Acc: 0.4048, Valid Balanced Acc: 0.4591
Epoch [7/75], Train Loss: 0.5528, Valid Loss: 0.7556, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the ATP7A ATPase.csv 

Epoch [1/75], Train Loss: 0.6056, Valid Loss: 0.6461, Train Acc: 0.6951, Train Balanced Acc: 0.4681, Valid Acc: 0.3333, Valid Balanced Acc: 0.5191
Epoch [2/75], Train Loss: 0.5773, Valid Loss: 0.6494, Train Acc: 0.6159, Train Balanced Acc: 0.5343, Valid Acc: 0.2857, Valid Balanced Acc: 0.5161
Epoch [3/75], Train Loss: 0.5314, Valid Loss: 0.6038, Train Acc: 0.7683, Train Balanced Acc: 0.5673, Valid Acc: 0.5714, Valid Balanced Acc: 0.6510
Epoch [4/75], Train Loss: 0.5405, Valid Loss: 0.5768, Train Acc: 0.7195, Train Balanced Acc: 0.5520, Valid Acc: 0.6667, Valid Balanced Acc: 0.4809
Epoch [5/75], Train Loss: 0.5107, Valid Loss: 0.6273, Train Acc: 0.7744, Train Balanced Acc: 0.5714, Valid Acc: 0.7143, Valid Balanced Acc: 0.5132
Epoch [6/75], Train Loss: 0.4762, Valid Loss: 0.6601, Train Acc: 0.7988, Train Balanced Acc: 0.6552, Valid Acc: 0.7143, Valid Balanced Acc: 0.4839
Epoch [7/75], Train Loss: 0.4855, Valid Loss: 0.6760, Train Acc

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the NR4A3 protein.csv 

Epoch [1/75], Train Loss: 0.6136, Valid Loss: 0.4996, Train Acc: 0.7073, Train Balanced Acc: 0.4663, Valid Acc: 0.8571, Valid Balanced Acc: 0.6389
Epoch [2/75], Train Loss: 0.5291, Valid Loss: 0.5709, Train Acc: 0.7744, Train Balanced Acc: 0.5383, Valid Acc: 0.3810, Valid Balanced Acc: 0.5694
Epoch [3/75], Train Loss: 0.5142, Valid Loss: 0.4715, Train Acc: 0.7622, Train Balanced Acc: 0.5017, Valid Acc: 0.6667, Valid Balanced Acc: 0.4583
Epoch [4/75], Train Loss: 0.5146, Valid Loss: 0.4726, Train Acc: 0.7622, Train Balanced Acc: 0.5113, Valid Acc: 0.7619, Valid Balanced Acc: 0.4444
Epoch [5/75], Train Loss: 0.4941, Valid Loss: 0.4972, Train Acc: 0.7866, Train Balanced Acc: 0.6036, Valid Acc: 0.8095, Valid Balanced Acc: 0.4722
Epoch [6/75], Train Loss: 0.4890, Valid Loss: 0.4980, Train Acc: 0.7988, Train Balanced Acc: 0.5732, Valid Acc: 0.6905, Valid Balanced Acc: 0.4028
Epoch [7/75], Train Loss: 0.4796, Valid Loss: 0.4494, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the HPGDS synthase.csv 

Epoch [1/75], Train Loss: 0.5882, Valid Loss: 0.5899, Train Acc: 0.6707, Train Balanced Acc: 0.5055, Valid Acc: 0.3571, Valid Balanced Acc: 0.4596
Epoch [2/75], Train Loss: 0.5524, Valid Loss: 0.5981, Train Acc: 0.7195, Train Balanced Acc: 0.5226, Valid Acc: 0.7619, Valid Balanced Acc: 0.5662
Epoch [3/75], Train Loss: 0.5452, Valid Loss: 0.6438, Train Acc: 0.7195, Train Balanced Acc: 0.5539, Valid Acc: 0.2619, Valid Balanced Acc: 0.3529
Epoch [4/75], Train Loss: 0.5269, Valid Loss: 0.5786, Train Acc: 0.7256, Train Balanced Acc: 0.5814, Valid Acc: 0.5714, Valid Balanced Acc: 0.6397
Epoch [5/75], Train Loss: 0.5101, Valid Loss: 0.5587, Train Acc: 0.7317, Train Balanced Acc: 0.5543, Valid Acc: 0.5238, Valid Balanced Acc: 0.4191
Epoch [6/75], Train Loss: 0.4841, Valid Loss: 0.5524, Train Acc: 0.7927, Train Balanced Acc: 0.7123, Valid Acc: 0.6905, Valid Balanced Acc: 0.4265
Epoch [7/75], Train Loss: 0.4696, Valid Loss: 0.5496, Train A

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the TYRP1 oxidase.csv 

Epoch [1/75], Train Loss: 0.6893, Valid Loss: 0.6689, Train Acc: 0.4085, Train Balanced Acc: 0.4503, Valid Acc: 0.3571, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.6648, Valid Loss: 0.6573, Train Acc: 0.5305, Train Balanced Acc: 0.5363, Valid Acc: 0.3810, Valid Balanced Acc: 0.5185
Epoch [3/75], Train Loss: 0.6422, Valid Loss: 0.6394, Train Acc: 0.5671, Train Balanced Acc: 0.5916, Valid Acc: 0.4762, Valid Balanced Acc: 0.5926
Epoch [4/75], Train Loss: 0.6372, Valid Loss: 0.6734, Train Acc: 0.6220, Train Balanced Acc: 0.6309, Valid Acc: 0.4524, Valid Balanced Acc: 0.5444
Epoch [5/75], Train Loss: 0.6083, Valid Loss: 0.6533, Train Acc: 0.6098, Train Balanced Acc: 0.6434, Valid Acc: 0.5238, Valid Balanced Acc: 0.5259
Epoch [6/75], Train Loss: 0.5869, Valid Loss: 0.6436, Train Acc: 0.6890, Train Balanced Acc: 0.6944, Valid Acc: 0.4524, Valid Balanced Acc: 0.4852
Epoch [7/75], Train Loss: 0.5826, Valid Loss: 0.6182, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the ASIP protein.csv 

Epoch [1/75], Train Loss: 0.6367, Valid Loss: 0.6225, Train Acc: 0.6280, Train Balanced Acc: 0.5357, Valid Acc: 0.6667, Valid Balanced Acc: 0.4828
Epoch [2/75], Train Loss: 0.6064, Valid Loss: 0.6241, Train Acc: 0.7012, Train Balanced Acc: 0.5176, Valid Acc: 0.4286, Valid Balanced Acc: 0.5650
Epoch [3/75], Train Loss: 0.5714, Valid Loss: 0.6874, Train Acc: 0.6646, Train Balanced Acc: 0.6730, Valid Acc: 0.6905, Valid Balanced Acc: 0.5000
Epoch [4/75], Train Loss: 0.5776, Valid Loss: 0.6225, Train Acc: 0.6829, Train Balanced Acc: 0.5572, Valid Acc: 0.5000, Valid Balanced Acc: 0.5531
Epoch [5/75], Train Loss: 0.5556, Valid Loss: 0.6425, Train Acc: 0.7012, Train Balanced Acc: 0.5937, Valid Acc: 0.5476, Valid Balanced Acc: 0.5451
Epoch [6/75], Train Loss: 0.5313, Valid Loss: 0.6978, Train Acc: 0.7500, Train Balanced Acc: 0.6753, Valid Acc: 0.4762, Valid Balanced Acc: 0.3873
Epoch [7/75], Train Loss: 0.5178, Valid Loss: 0.7652, Train Acc

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the GPM6B protein.csv 

Epoch [1/75], Train Loss: 0.5876, Valid Loss: 0.6437, Train Acc: 0.7683, Train Balanced Acc: 0.5073, Valid Acc: 0.2857, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.4782, Valid Loss: 0.6155, Train Acc: 0.8171, Train Balanced Acc: 0.5234, Valid Acc: 0.6429, Valid Balanced Acc: 0.6500
Epoch [3/75], Train Loss: 0.4363, Valid Loss: 0.6335, Train Acc: 0.8232, Train Balanced Acc: 0.5000, Valid Acc: 0.4286, Valid Balanced Acc: 0.5250
Epoch [4/75], Train Loss: 0.4635, Valid Loss: 0.7199, Train Acc: 0.8354, Train Balanced Acc: 0.5345, Valid Acc: 0.7143, Valid Balanced Acc: 0.5000
Epoch [5/75], Train Loss: 0.4733, Valid Loss: 0.6234, Train Acc: 0.8171, Train Balanced Acc: 0.4963, Valid Acc: 0.6429, Valid Balanced Acc: 0.4500
Epoch [6/75], Train Loss: 0.4091, Valid Loss: 0.6724, Train Acc: 0.8293, Train Balanced Acc: 0.5172, Valid Acc: 0.7143, Valid Balanced Acc: 0.5250
Epoch [7/75], Train Loss: 0.3967, Valid Loss: 0.7066, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the ATP7B ATPase.csv 

Epoch [1/75], Train Loss: 0.5610, Valid Loss: 0.5450, Train Acc: 0.7256, Train Balanced Acc: 0.4686, Valid Acc: 0.6905, Valid Balanced Acc: 0.4143
Epoch [2/75], Train Loss: 0.5218, Valid Loss: 0.5535, Train Acc: 0.7988, Train Balanced Acc: 0.5364, Valid Acc: 0.5000, Valid Balanced Acc: 0.5286
Epoch [3/75], Train Loss: 0.5441, Valid Loss: 0.4892, Train Acc: 0.7134, Train Balanced Acc: 0.5043, Valid Acc: 0.7857, Valid Balanced Acc: 0.5857
Epoch [4/75], Train Loss: 0.5007, Valid Loss: 0.4774, Train Acc: 0.7805, Train Balanced Acc: 0.4923, Valid Acc: 0.8095, Valid Balanced Acc: 0.4857
Epoch [5/75], Train Loss: 0.4711, Valid Loss: 0.4743, Train Acc: 0.7988, Train Balanced Acc: 0.6124, Valid Acc: 0.7857, Valid Balanced Acc: 0.5286
Epoch [6/75], Train Loss: 0.4903, Valid Loss: 0.5022, Train Acc: 0.7866, Train Balanced Acc: 0.4962, Valid Acc: 0.7381, Valid Balanced Acc: 0.5571
Epoch [7/75], Train Loss: 0.4373, Valid Loss: 0.4849, Train Acc

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the WNT5A protein.csv 

Epoch [1/75], Train Loss: 0.6310, Valid Loss: 0.6331, Train Acc: 0.7073, Train Balanced Acc: 0.5122, Valid Acc: 0.5952, Valid Balanced Acc: 0.4523
Epoch [2/75], Train Loss: 0.5861, Valid Loss: 0.6086, Train Acc: 0.6829, Train Balanced Acc: 0.4878, Valid Acc: 0.4286, Valid Balanced Acc: 0.5862
Epoch [3/75], Train Loss: 0.5433, Valid Loss: 0.6188, Train Acc: 0.7256, Train Balanced Acc: 0.5650, Valid Acc: 0.5714, Valid Balanced Acc: 0.5199
Epoch [4/75], Train Loss: 0.5163, Valid Loss: 0.6355, Train Acc: 0.7683, Train Balanced Acc: 0.5854, Valid Acc: 0.5476, Valid Balanced Acc: 0.4390
Epoch [5/75], Train Loss: 0.4928, Valid Loss: 0.6294, Train Acc: 0.7683, Train Balanced Acc: 0.6341, Valid Acc: 0.6905, Valid Balanced Acc: 0.6061
Epoch [6/75], Train Loss: 0.4865, Valid Loss: 0.6229, Train Acc: 0.7805, Train Balanced Acc: 0.6585, Valid Acc: 0.5476, Valid Balanced Acc: 0.3966
Epoch [7/75], Train Loss: 0.4710, Valid Loss: 0.6807, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the CASP3 protein.csv 

Epoch [1/75], Train Loss: 0.5309, Valid Loss: 0.6434, Train Acc: 0.7439, Train Balanced Acc: 0.4976, Valid Acc: 0.0714, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.5012, Valid Loss: 0.4703, Train Acc: 0.7927, Train Balanced Acc: 0.5043, Valid Acc: 0.6190, Valid Balanced Acc: 0.6410
Epoch [3/75], Train Loss: 0.4807, Valid Loss: 0.4288, Train Acc: 0.7927, Train Balanced Acc: 0.5516, Valid Acc: 0.9048, Valid Balanced Acc: 0.6410
Epoch [4/75], Train Loss: 0.4564, Valid Loss: 0.3618, Train Acc: 0.8110, Train Balanced Acc: 0.5275, Valid Acc: 0.9286, Valid Balanced Acc: 0.6538
Epoch [5/75], Train Loss: 0.4199, Valid Loss: 0.2938, Train Acc: 0.8049, Train Balanced Acc: 0.6065, Valid Acc: 0.9286, Valid Balanced Acc: 0.5000
Epoch [6/75], Train Loss: 0.4197, Valid Loss: 0.3294, Train Acc: 0.8171, Train Balanced Acc: 0.5668, Valid Acc: 0.9048, Valid Balanced Acc: 0.4872
Epoch [7/75], Train Loss: 0.4292, Valid Loss: 0.3963, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the NDP protein.csv 

Epoch [1/75], Train Loss: 0.5315, Valid Loss: 0.4929, Train Acc: 0.7683, Train Balanced Acc: 0.5232, Valid Acc: 0.8095, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.4781, Valid Loss: 0.5019, Train Acc: 0.8110, Train Balanced Acc: 0.5000, Valid Acc: 0.7857, Valid Balanced Acc: 0.4853
Epoch [3/75], Train Loss: 0.4569, Valid Loss: 0.5063, Train Acc: 0.8049, Train Balanced Acc: 0.4962, Valid Acc: 0.8095, Valid Balanced Acc: 0.5000
Epoch [4/75], Train Loss: 0.4681, Valid Loss: 0.4971, Train Acc: 0.8049, Train Balanced Acc: 0.5086, Valid Acc: 0.8095, Valid Balanced Acc: 0.5000
Epoch [5/75], Train Loss: 0.4500, Valid Loss: 0.5335, Train Acc: 0.8171, Train Balanced Acc: 0.5532, Valid Acc: 0.7143, Valid Balanced Acc: 0.4412
Epoch [6/75], Train Loss: 0.4483, Valid Loss: 0.5323, Train Acc: 0.8171, Train Balanced Acc: 0.5409, Valid Acc: 0.6190, Valid Balanced Acc: 0.4301
Epoch [7/75], Train Loss: 0.4145, Valid Loss: 0.5191, Train Acc:

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the RAB38 protein.csv 

Epoch [1/75], Train Loss: 0.6281, Valid Loss: 0.6174, Train Acc: 0.6098, Train Balanced Acc: 0.4699, Valid Acc: 0.4048, Valid Balanced Acc: 0.4208
Epoch [2/75], Train Loss: 0.5976, Valid Loss: 0.6270, Train Acc: 0.6707, Train Balanced Acc: 0.5368, Valid Acc: 0.3810, Valid Balanced Acc: 0.4633
Epoch [3/75], Train Loss: 0.5893, Valid Loss: 0.6079, Train Acc: 0.7073, Train Balanced Acc: 0.5863, Valid Acc: 0.5238, Valid Balanced Acc: 0.6188
Epoch [4/75], Train Loss: 0.5813, Valid Loss: 0.6430, Train Acc: 0.7195, Train Balanced Acc: 0.5833, Valid Acc: 0.3571, Valid Balanced Acc: 0.4472
Epoch [5/75], Train Loss: 0.5721, Valid Loss: 0.5563, Train Acc: 0.6524, Train Balanced Acc: 0.6233, Valid Acc: 0.6905, Valid Balanced Acc: 0.4971
Epoch [6/75], Train Loss: 0.5246, Valid Loss: 0.6175, Train Acc: 0.7805, Train Balanced Acc: 0.6971, Valid Acc: 0.3571, Valid Balanced Acc: 0.4472
Epoch [7/75], Train Loss: 0.5537, Valid Loss: 0.5691, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the TCF7 factor.csv 

Epoch [1/75], Train Loss: 0.3260, Valid Loss: 0.3774, Train Acc: 0.9268, Train Balanced Acc: 0.5000, Valid Acc: 0.9286, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.2757, Valid Loss: 0.4484, Train Acc: 0.9268, Train Balanced Acc: 0.5000, Valid Acc: 0.9286, Valid Balanced Acc: 0.5000
Epoch [3/75], Train Loss: 0.2366, Valid Loss: 0.2898, Train Acc: 0.9268, Train Balanced Acc: 0.5000, Valid Acc: 0.9286, Valid Balanced Acc: 0.5000
Epoch [4/75], Train Loss: 0.2219, Valid Loss: 0.3144, Train Acc: 0.9268, Train Balanced Acc: 0.5000, Valid Acc: 0.9286, Valid Balanced Acc: 0.5000
Epoch [5/75], Train Loss: 0.2266, Valid Loss: 0.2958, Train Acc: 0.9268, Train Balanced Acc: 0.5000, Valid Acc: 0.9286, Valid Balanced Acc: 0.5000
Epoch [6/75], Train Loss: 0.2080, Valid Loss: 0.3283, Train Acc: 0.9268, Train Balanced Acc: 0.5000, Valid Acc: 0.9286, Valid Balanced Acc: 0.5000
Epoch [7/75], Train Loss: 0.1833, Valid Loss: 0.3211, Train Acc:

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the JMJD6 hydroxylase.csv 

Epoch [1/75], Train Loss: 0.6481, Valid Loss: 0.6401, Train Acc: 0.6341, Train Balanced Acc: 0.4788, Valid Acc: 0.2381, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.6014, Valid Loss: 0.5768, Train Acc: 0.5915, Train Balanced Acc: 0.5402, Valid Acc: 0.7381, Valid Balanced Acc: 0.4844
Epoch [3/75], Train Loss: 0.5781, Valid Loss: 0.5434, Train Acc: 0.7073, Train Balanced Acc: 0.5183, Valid Acc: 0.6905, Valid Balanced Acc: 0.5563
Epoch [4/75], Train Loss: 0.5606, Valid Loss: 0.6412, Train Acc: 0.6646, Train Balanced Acc: 0.6286, Valid Acc: 0.6429, Valid Balanced Acc: 0.4219
Epoch [5/75], Train Loss: 0.5587, Valid Loss: 0.5355, Train Acc: 0.7012, Train Balanced Acc: 0.6056, Valid Acc: 0.7381, Valid Balanced Acc: 0.6219
Epoch [6/75], Train Loss: 0.4947, Valid Loss: 0.6235, Train Acc: 0.8110, Train Balanced Acc: 0.6954, Valid Acc: 0.5000, Valid Balanced Acc: 0.4656
Epoch [7/75], Train Loss: 0.4887, Valid Loss: 0.5505, Trai

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the WNT7B protein.csv 

Epoch [1/75], Train Loss: 0.6630, Valid Loss: 0.5930, Train Acc: 0.5976, Train Balanced Acc: 0.4874, Valid Acc: 0.2619, Valid Balanced Acc: 0.4963
Epoch [2/75], Train Loss: 0.6303, Valid Loss: 0.6257, Train Acc: 0.5732, Train Balanced Acc: 0.5450, Valid Acc: 0.2857, Valid Balanced Acc: 0.5110
Epoch [3/75], Train Loss: 0.6061, Valid Loss: 0.5711, Train Acc: 0.5915, Train Balanced Acc: 0.5153, Valid Acc: 0.6667, Valid Balanced Acc: 0.5074
Epoch [4/75], Train Loss: 0.5670, Valid Loss: 0.5825, Train Acc: 0.7073, Train Balanced Acc: 0.5509, Valid Acc: 0.5000, Valid Balanced Acc: 0.6434
Epoch [5/75], Train Loss: 0.5761, Valid Loss: 0.5201, Train Acc: 0.6646, Train Balanced Acc: 0.6652, Valid Acc: 0.6429, Valid Balanced Acc: 0.4449
Epoch [6/75], Train Loss: 0.5513, Valid Loss: 0.6014, Train Acc: 0.7500, Train Balanced Acc: 0.6787, Valid Acc: 0.5000, Valid Balanced Acc: 0.5478
Epoch [7/75], Train Loss: 0.5338, Valid Loss: 0.5560, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the CRHBP protein.csv 

Epoch [1/75], Train Loss: 0.6852, Valid Loss: 0.6479, Train Acc: 0.5183, Train Balanced Acc: 0.4827, Valid Acc: 0.3333, Valid Balanced Acc: 0.5000
Epoch [2/75], Train Loss: 0.6525, Valid Loss: 0.6914, Train Acc: 0.4878, Train Balanced Acc: 0.5362, Valid Acc: 0.3810, Valid Balanced Acc: 0.4464
Epoch [3/75], Train Loss: 0.6457, Valid Loss: 0.7255, Train Acc: 0.5915, Train Balanced Acc: 0.5580, Valid Acc: 0.6667, Valid Balanced Acc: 0.5000
Epoch [4/75], Train Loss: 0.6032, Valid Loss: 0.6894, Train Acc: 0.6524, Train Balanced Acc: 0.6131, Valid Acc: 0.5238, Valid Balanced Acc: 0.3929
Epoch [5/75], Train Loss: 0.6074, Valid Loss: 0.6701, Train Acc: 0.5854, Train Balanced Acc: 0.6308, Valid Acc: 0.5238, Valid Balanced Acc: 0.5000
Epoch [6/75], Train Loss: 0.5764, Valid Loss: 0.6411, Train Acc: 0.7195, Train Balanced Acc: 0.6801, Valid Acc: 0.5714, Valid Balanced Acc: 0.5000
Epoch [7/75], Train Loss: 0.5689, Valid Loss: 0.6779, Train Ac

Using cache found in /data/ouga/home/ag_gagneur/deu/.cache/torch/hub/zhanghang1989_ResNeSt_master



The training is starting using the FABP7 protein.csv 

Epoch [1/75], Train Loss: 0.6000, Valid Loss: 0.6098, Train Acc: 0.6829, Train Balanced Acc: 0.4536, Valid Acc: 0.5476, Valid Balanced Acc: 0.5469
Epoch [2/75], Train Loss: 0.5751, Valid Loss: 0.6656, Train Acc: 0.6768, Train Balanced Acc: 0.4772, Valid Acc: 0.2619, Valid Balanced Acc: 0.4413
Epoch [3/75], Train Loss: 0.5139, Valid Loss: 0.6369, Train Acc: 0.7683, Train Balanced Acc: 0.5184, Valid Acc: 0.3810, Valid Balanced Acc: 0.4047
Epoch [4/75], Train Loss: 0.5144, Valid Loss: 0.5962, Train Acc: 0.7439, Train Balanced Acc: 0.5668, Valid Acc: 0.5000, Valid Balanced Acc: 0.5147
Epoch [5/75], Train Loss: 0.5299, Valid Loss: 0.6334, Train Acc: 0.7744, Train Balanced Acc: 0.5315, Valid Acc: 0.6429, Valid Balanced Acc: 0.4941
Epoch [6/75], Train Loss: 0.5142, Valid Loss: 0.5780, Train Acc: 0.7805, Train Balanced Acc: 0.5539, Valid Acc: 0.6905, Valid Balanced Acc: 0.5850
Epoch [7/75], Train Loss: 0.4706, Valid Loss: 0.6031, Train Ac

In [26]:
test(batch_size=8, model_name='resnest', drop_last=False)


The testing is starting using the CORIN enzyme.csv 

Test Acc: 0.6346, Test Balanced Acc: 0.5513

The testing is starting using the SFRP1 protein.csv 

Test Acc: 0.4615, Test Balanced Acc: 0.4673

The testing is starting using the CLCN7 protein.csv 

Test Acc: 0.5000, Test Balanced Acc: 0.4515

The testing is starting using the ATP7A ATPase.csv 

Test Acc: 0.7500, Test Balanced Acc: 0.4535

The testing is starting using the NR4A3 protein.csv 

Test Acc: 0.7885, Test Balanced Acc: 0.5646

The testing is starting using the HPGDS synthase.csv 

Test Acc: 0.5192, Test Balanced Acc: 0.4101

The testing is starting using the TYRP1 oxidase.csv 

Test Acc: 0.5577, Test Balanced Acc: 0.4952

The testing is starting using the ASIP protein.csv 

Test Acc: 0.6154, Test Balanced Acc: 0.5392

The testing is starting using the GPM6B protein.csv 

Test Acc: 0.5385, Test Balanced Acc: 0.5013

The testing is starting using the ATP7B ATPase.csv 

Test Acc: 0.7885, Test Balanced Acc: 0.5262

The testing 

### Plot

In [2]:
main_dir = '/s/project/gene2bird/groupA/'
metrics_dir = main_dir + 'cnn_results'
prot_names = os.listdir(metrics_dir)
model_names = ['baseline', 'resnet18', 'resnet50', 'resnest']

mcc_scores = {}

    
for prot_name in prot_names:
    metrics_path = metrics_dir + '/' + prot_name + '/metrics/'
    model_mccs = {'baseline':0.0, 'resnet18':0.0, 'resnet50':0.0, 'resnest':0.0 }
    for model_name in model_names:
        train_metrics = np.load(metrics_path + model_name + '_train_metrics.npy', allow_pickle=True)
        val_metrics = np.load(metrics_path +  model_name + '_val_metrics.npy', allow_pickle=True)
        test_metrics = np.load(metrics_path +  model_name + '_test_metrics.npy', allow_pickle=True)
        model_mccs[model_name] = test_metrics[0,6]
    mcc_scores[prot_name] = model_mccs


In [4]:
colors = ['rgb(21, 96, 100)', 'rgb(209, 73, 91)', 'rgb(160, 175, 132)', 'rgb(162, 157, 210)', 'rgb(77, 161, 103)']
gene_folders = os.listdir(metrics_dir)
fig = go.Figure()

for i, model_name in enumerate(model_names):
    mcc_values = [mcc_scores[gene][model_name] for gene in gene_folders]
    fig.add_trace(go.Bar(x=gene_folders, y=mcc_values, name=model_name, marker_color=colors[i]))

fig.update_layout(
    title='Comparison of Matthews Correlation Coefficient (MCC) for Different Genes and Models',
    xaxis_title='Gene',
    yaxis_title='MCC',
    barmode='group',
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0, font=dict(size=10), itemwidth=30),
    width=1500,
    height=700,
    plot_bgcolor='rgb(247, 243, 245)'
)
pio.write_html(fig, file='dl_models_mcc.html', auto_open=True)
fig.show()