# CNN-based cmsiRpred (Module A+B1+C)
2-uni_v2-betaSearch-0327  
UNI-v2_0327

In [1]:
import time
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torchinfo import summary
from torch.utils.data import Dataset, DataLoader, Subset
import torch.optim as optim
#from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import precision_score, recall_score, mean_absolute_error
from sklearn.model_selection import KFold

BATCH_SIZE = 256

## Load Data

In [2]:
df_structured_encoded = pd.read_pickle('/home/ken/MyStorage/siRNA_2503/Data/df_structured_encoded_0326.pkl')

In [3]:
df_structured_encoded_iid_trvl = df_structured_encoded[df_structured_encoded['dataset_usage']=='IID_trvl'].sample(frac=1)
df_structured_encoded_iid_test = df_structured_encoded[df_structured_encoded['dataset_usage']=='IID_test']
df_structured_encoded_ood_test = df_structured_encoded[df_structured_encoded['dataset_usage']=='OOD_test']
print(df_structured_encoded_iid_trvl.shape,df_structured_encoded_iid_test.shape,df_structured_encoded_ood_test.shape)

(20626, 165) (2568, 165) (2588, 165)


---

## Model Design

In [4]:
class siRNA_dataset_CNN(Dataset):
    def __init__(self, df_structured_encoded):
        self.modiseq_tensor = torch.stack(list(df_structured_encoded['!!_modiseq_onehot3d'])).to(torch.float32)
        struct_sense = torch.stack(list(df_structured_encoded['!!_nt_struct_type_sense_mea'].apply(lambda x: torch.flip(x,dims=[1])))) # convert sense to antisense
        struct_antis = torch.stack(list(df_structured_encoded['!!_nt_struct_type_antis_mea']))
        self.structs_tensor = torch.cat([struct_sense,struct_antis],axis=1).to(torch.float32)
        df_tabular_encoded = df_structured_encoded.loc[:,df_structured_encoded.columns.str.contains(r'!\w+!')]
        self.features_tensor = torch.tensor(df_tabular_encoded.values).to(torch.float32)
        label_tensor = torch.tensor(list(df_structured_encoded['mRNA_remaining_pct']))
        self.label_tensor = label_tensor.reshape([len(label_tensor),1]).to(torch.float32)
        self.domain_label_A = np.array(df_structured_encoded['publication_id'])
    def __getitem__(self,index):
        return (self.modiseq_tensor[index],self.structs_tensor[index],
                self.features_tensor[index],self.label_tensor[index],
                self.domain_label_A[index])
    def __len__(self):
        return self.modiseq_tensor.size(0)

In [5]:
class OneHot3dConv(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv3d(in_channels=2,out_channels=64,kernel_size=(28,6,7),stride=1,padding=2,dilation=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=2,stride=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv3d(in_channels=64,out_channels=16,kernel_size=3,stride=1,padding=1,dilation=1),
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=2,stride=2)
        )
    
    def forward(self,x_modiseq):
        x = self.conv1(x_modiseq)
        x = self.conv2(x)
        return x.view(x.size(0),-1)

#summary(OneHot3dConv(), input_size=(BATCH_SIZE, 2, 28, 6,7))

class Struct2dConv(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=2,out_channels=32,kernel_size=(28,7),stride=1,padding=2,dilation=1),
            nn.ReLU(),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=32,out_channels=16,kernel_size=3,stride=1,padding=1,dilation=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3,stride=3)
        )
       
    def forward(self,x_structs):
        x = self.conv1(x_structs)
        x = self.conv2(x)
        return x.view(x.size(0),-1)

#summary(Struct2dConv(), input_size=(256, 2, 28, 7))

class TfxMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.dense1 = nn.Linear(109,256)
        self.actv1 = nn.ReLU()
        self.dense2 = nn.Linear(256,128)
        self.actv2 = nn.ReLU()
        self.dense3 = nn.Linear(128,16)
        self.actv3 = nn.ReLU()
    
    def forward(self,x): #output_onehot3dconv,dataload):    
        x = self.dense1(x)
        x = self.actv1(x)
        x = self.dense2(x)
        x = self.actv2(x)
        x = self.dense3(x)
        x = self.actv3(x)
        return x.view(x.size(0),-1)

#summary(TfxMLP(), input_size=(BATCH_SIZE, 109))

class CombineMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.dense1 = nn.Linear(48,128)
        self.actv1 = nn.ReLU()
        self.dense2 = nn.Linear(128,128)
        self.actv2 = nn.ReLU()
        self.dense3 = nn.Linear(128,64)
        self.actv3 = nn.ReLU()
        self.actv_linear = nn.Linear(64,1)
    def forward(self,x):
        x = self.dense1(x)
        x = self.actv1(x)
        x = self.dense2(x)
        x = self.actv2(x)
        x = self.dense3(x)
        x = self.actv3(x)
        x = F.dropout(x,p=0.5,training=self.training)
        x = self.actv_linear(x)
        return x.view(x.size(0),-1)

#summary(CombineMLP(), input_size=(BATCH_SIZE, 48))

class modisiR_3dConv(nn.Module):
    def __init__(self):
        super().__init__()
        self.modiseq_conv = OneHot3dConv()
        self.struct_conv = Struct2dConv()
        self.tfx_mlp = TfxMLP()
        self.combine_mlp = CombineMLP()
    def forward(self,x_modiseq,x_struct,x_tfx):
        x_modiseq_embed = self.modiseq_conv(x_modiseq)
        x_struct_embed = self.struct_conv(x_struct)
        x_tfx_embed = self.tfx_mlp(x_tfx)
        x_combine = torch.cat([x_modiseq_embed,x_tfx_embed,x_struct_embed],axis=1)
        y_pred = self.combine_mlp(x_combine)
        return y_pred.reshape([len(y_pred),1])

summary(modisiR_3dConv(), input_size=((BATCH_SIZE, 2,28,6,7),(BATCH_SIZE,2,28,7),(BATCH_SIZE,109)))

Layer (type:depth-idx)                   Output Shape              Param #
modisiR_3dConv                           [256, 1]                  --
├─OneHot3dConv: 1-1                      [256, 16]                 --
│    └─Sequential: 2-1                   [256, 64, 2, 2, 2]        --
│    │    └─Conv3d: 3-1                  [256, 64, 5, 5, 5]        150,592
│    │    └─ReLU: 3-2                    [256, 64, 5, 5, 5]        --
│    │    └─MaxPool3d: 3-3               [256, 64, 2, 2, 2]        --
│    └─Sequential: 2-2                   [256, 16, 1, 1, 1]        --
│    │    └─Conv3d: 3-4                  [256, 16, 2, 2, 2]        27,664
│    │    └─ReLU: 3-5                    [256, 16, 2, 2, 2]        --
│    │    └─MaxPool3d: 3-6               [256, 16, 1, 1, 1]        --
├─Struct2dConv: 1-2                      [256, 16]                 --
│    └─Sequential: 2-3                   [256, 32, 5, 5]           --
│    │    └─Conv2d: 3-7                  [256, 32, 5, 5]           12,576
│ 

In [16]:
class Ablation_CombineMLP(nn.Module):
    def __init__(self,input_len):
        super().__init__()
        self.dense1 = nn.Linear(input_len,128)
        self.actv1 = nn.ReLU()
        self.dense2 = nn.Linear(128,128)
        self.actv2 = nn.ReLU()
        self.dense3 = nn.Linear(128,64)
        self.actv3 = nn.ReLU()
        self.actv_linear = nn.Linear(64,1)
    def forward(self,x):
        x = self.dense1(x)
        x = self.actv1(x)
        x = self.dense2(x)
        x = self.actv2(x)
        x = self.dense3(x)
        x = self.actv3(x)
        x = F.dropout(x,p=0.5,training=self.training)
        x = self.actv_linear(x)
        return x.view(x.size(0),-1)

class Ablation_modisiR_3dConv(nn.Module):
    def __init__(self,ablation=(True,True,True)):
        super().__init__()
        self.ablation = ablation
        if self.ablation[0]:self.modiseq_conv = OneHot3dConv()
        if self.ablation[1]:self.struct_conv = Struct2dConv()
        if self.ablation[2]:self.tfx_mlp = TfxMLP()
        self.combine_mlp = Ablation_CombineMLP(16*sum(ablation))
    def forward(self,x_modiseq,x_struct,x_tfx):
        DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
        if self.ablation[0]:
            x_modiseq_embed = self.modiseq_conv(x_modiseq)
        else: x_modiseq_embed=torch.tensor([]).to(DEVICE)
        
        if self.ablation[1]:
            x_struct_embed = self.struct_conv(x_struct)
        else: x_struct_embed=torch.tensor([]).to(DEVICE)
        
        if self.ablation[2]:
            x_tfx_embed = self.tfx_mlp(x_tfx)
        else: x_tfx_embed=torch.tensor([]).to(DEVICE)
        
        x_combine = torch.cat([x_modiseq_embed,x_tfx_embed,x_struct_embed],axis=1)
        y_pred = self.combine_mlp(x_combine)
        return y_pred.reshape([len(y_pred),1])

summary(Ablation_modisiR_3dConv((True,True,True)), input_size=((BATCH_SIZE, 2,28,6,7),(BATCH_SIZE,2,28,7),(BATCH_SIZE,109)))

Layer (type:depth-idx)                   Output Shape              Param #
Ablation_modisiR_3dConv                  [256, 1]                  --
├─OneHot3dConv: 1-1                      [256, 16]                 --
│    └─Sequential: 2-1                   [256, 64, 2, 2, 2]        --
│    │    └─Conv3d: 3-1                  [256, 64, 5, 5, 5]        150,592
│    │    └─ReLU: 3-2                    [256, 64, 5, 5, 5]        --
│    │    └─MaxPool3d: 3-3               [256, 64, 2, 2, 2]        --
│    └─Sequential: 2-2                   [256, 16, 1, 1, 1]        --
│    │    └─Conv3d: 3-4                  [256, 16, 2, 2, 2]        27,664
│    │    └─ReLU: 3-5                    [256, 16, 2, 2, 2]        --
│    │    └─MaxPool3d: 3-6               [256, 16, 1, 1, 1]        --
├─Struct2dConv: 1-2                      [256, 16]                 --
│    └─Sequential: 2-3                   [256, 32, 5, 5]           --
│    │    └─Conv2d: 3-7                  [256, 32, 5, 5]           12,576
│ 

In [7]:
def train_VREX(dataload_TRAIN,env_list,beta,model,optimizer,criterion):
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(DEVICE)
    model.train()
    loss_train_total = 0
    
    for x_batch_modiseq, x_batch_struct, x_batch_tfx, y_batch_lbl, x_domain_label_A in dataload_TRAIN:
        risks = []
        x_domain_label_A = np.array(x_domain_label_A)
        for env in env_list:
            env_mask = (x_domain_label_A == env)
            if True not in env_mask: continue
            x_env_modiseq = x_batch_modiseq[env_mask].to(DEVICE)
            x_env_struct = x_batch_struct[env_mask].to(DEVICE)        
            x_env_tfx = x_batch_tfx[env_mask].to(DEVICE)
            y_env_lbl = y_batch_lbl[env_mask].to(DEVICE)
            y_env_pred = model(x_env_modiseq,x_env_struct,x_env_tfx)
            risks.append(criterion(y_env_pred,y_env_lbl))
        
        risks = torch.stack(risks)
        risks_mean = torch.mean(risks)
        risks_var = torch.var(risks)
        
        loss_batch = risks_mean + beta * risks_var
        
        optimizer.zero_grad()
        loss_batch.backward()
        optimizer.step()
        
        loss_train_total += risks_mean.item()
    return loss_train_total/len(dataload_TRAIN)

In [8]:
def train_stdREX(dataload_TRAIN,env_list,beta,model,optimizer,criterion):
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(DEVICE)
    model.train()
    loss_train_total = 0
    
    for x_batch_modiseq, x_batch_struct, x_batch_tfx, y_batch_lbl, x_domain_label_A in dataload_TRAIN:
        risks = []
        x_domain_label_A = np.array(x_domain_label_A)
        for env in env_list:
            env_mask = (x_domain_label_A == env)
            if True not in env_mask: continue
            x_env_modiseq = x_batch_modiseq[env_mask].to(DEVICE)
            x_env_struct = x_batch_struct[env_mask].to(DEVICE)        
            x_env_tfx = x_batch_tfx[env_mask].to(DEVICE)
            y_env_lbl = y_batch_lbl[env_mask].to(DEVICE)
            y_env_pred = model(x_env_modiseq,x_env_struct,x_env_tfx)
            risks.append(criterion(y_env_pred,y_env_lbl))
        
        risks = torch.stack(risks)
        risks_mean = torch.mean(risks)
        #risks_var = torch.var(risks)
        risks_var = torch.std(risks)
        
        loss_batch = risks_mean + beta * risks_var
        
        optimizer.zero_grad()
        loss_batch.backward()
        optimizer.step()
        
        loss_train_total += risks_mean.item()
    return loss_train_total/len(dataload_TRAIN)

In [9]:
def train_ERM(dataload_TRAIN,model,optimizer,criterion):
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(DEVICE)
    model.train()
    loss_train = 0
    
    for x_batch_modiseq, x_batch_struct, x_batch_tfx, y_batch_lbl, x_domain_label_A in dataload_TRAIN:
        x_batch_modiseq = x_batch_modiseq.to(DEVICE)
        x_batch_struct = x_batch_struct.to(DEVICE)        
        x_batch_tfx = x_batch_tfx.to(DEVICE)
        y_batch_lbl = y_batch_lbl.to(DEVICE)
        
        y_batch_pred = model(x_batch_modiseq,x_batch_struct,x_batch_tfx)
        loss_batch = criterion(y_batch_pred,y_batch_lbl)
        optimizer.zero_grad()
        loss_batch.backward()
        optimizer.step()
        
        loss_train += loss_batch.item()
    return loss_train/len(dataload_TRAIN)

In [10]:
def calculate_metrics(y_pred, y_true, threshold=30):
    import warnings
    warnings.simplefilter("ignore")
    
    y_true = y_true.clip(0,100)
    y_pred = y_pred.clip(0,100)
    
    mae = np.mean(np.abs(y_true - y_pred))

    y_true_binary = (y_true < threshold).astype(int)
    y_pred_binary = (y_pred < threshold).astype(int)

    mask = (y_pred >= 0) & (y_pred <= threshold)
    range_mae = mean_absolute_error(y_true[mask], y_pred[mask]) if mask.sum() > 0 else 100

    precision = precision_score(y_true_binary, y_pred_binary, average='binary')
    recall = recall_score(y_true_binary, y_pred_binary, average='binary')
    
    f1 = 2 * precision * recall / (precision + recall)

    score = (1 - mae / 100) * 0.5 + (1 - range_mae / 100) * f1 * 0.5
    
    warnings.filterwarnings("default")
    return score

In [11]:
def validate(dataload_VAL,model,criterion,threshold=30):
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(DEVICE)
    model.eval()
    loss_val = 0
    y_val_lbl = []
    y_val_pred = []
    with torch.no_grad():
        for x_batch_modiseq, x_batch_struct, x_batch_tfx, y_batch_lbl, x_domain_label_A in dataload_VAL:
            x_batch_modiseq = x_batch_modiseq.to(DEVICE)
            x_batch_struct = x_batch_struct.to(DEVICE)        
            x_batch_tfx = x_batch_tfx.to(DEVICE)
            y_batch_lbl = y_batch_lbl.to(DEVICE)
            
            y_batch_pred = model(x_batch_modiseq,x_batch_struct,x_batch_tfx)
            loss_batch = criterion(y_batch_pred,y_batch_lbl)

            loss_val += loss_batch.item()
            y_val_lbl.extend(y_batch_lbl.cpu().numpy())
            y_val_pred.extend(y_batch_pred.cpu().numpy())
        
    y_val_pred = np.array(y_val_pred)
    y_val_lbl = np.array(y_val_lbl)
    model_score = calculate_metrics(y_val_pred, y_val_lbl,threshold)
    return loss_val/len(dataload_VAL),model_score

---

## Training

In [12]:
dataset_TEST_IID = siRNA_dataset_CNN(df_structured_encoded_iid_test)
dataset_TEST_OOD = siRNA_dataset_CNN(df_structured_encoded_ood_test)

torch.cuda.is_available()

True

In [43]:
import pickle
import os

def cvmodel_test(model_list,dataset_TEST):
    model4test = modisiR_3dConv()
    test_score_list = []
    for i in range(len(model_list)):
        model4test.load_state_dict(model_list[i])
        model4test.eval()
        y_pred_TEST = model4test(dataset_TEST.modiseq_tensor,dataset_TEST.structs_tensor,dataset_TEST.features_tensor)
        test_score = calculate_metrics(y_pred_TEST.detach().numpy(),dataset_TEST.label_tensor.detach().numpy())
        print(test_score)
        test_score_list.append(test_score)
    return test_score_list

def cv_train(rex_beta,model_type:str,ablation,df_structured_encoded_iid_trvl,dataset_TEST_IID,dataset_TEST_OOD):
    lr = 0.002
    EPOCHS = 40
    BETA = rex_beta
    BATCH_SIZE = 256
    OVERSAMP = False

    early_stop_score = 1
    warm_up_epoch_num = 10
    loss_tolerance_epoch_num = 5
    env_list = df_structured_encoded_iid_trvl['publication_id'].unique()
    
    dataload_TEST_IID = DataLoader(dataset=dataset_TEST_IID,batch_size=BATCH_SIZE)
    dataload_TEST_OOD = DataLoader(dataset=dataset_TEST_OOD,batch_size=BATCH_SIZE)

    dataset_TRVL = siRNA_dataset_CNN(df_structured_encoded_iid_trvl)

    kfold = KFold(n_splits=10,shuffle=True)
    splits = kfold.split(dataset_TRVL)

    model_list = []
    cv_log = []
    
    print('rex_beta:',BETA)
    print('Epoch','val','iid','ood','(val*','ood*)',sep='\t')

    for train_index, val_index in splits:

        log_train = []

        start_time = time.time()

        dataset_TRAIN = Subset(dataset_TRVL,train_index)
        dataset_VAL = Subset(dataset_TRVL,val_index)

        dataload_TRAIN = DataLoader(dataset=dataset_TRAIN,batch_size=BATCH_SIZE)
        dataload_VAL = DataLoader(dataset=dataset_VAL,batch_size=BATCH_SIZE)

        lowest_loss_epoch = {'loss_val':float("inf"),'epoch':0}
        best_score = -float('inf')
        best_OOD = -float('inf')
        model = Ablation_modisiR_3dConv(ablation)
        optimizer = optim.AdamW([{'params':model.parameters(),'lr':lr}])
        criterion = nn.MSELoss(reduction='mean')

        for epoch in range(EPOCHS):
            start_time_epoch = time.time()
            if model_type == 'vrex':
                loss_train = train_VREX(dataload_TRAIN,env_list,BETA,model,optimizer,criterion)
            elif model_type == 'erm':
                loss_train = train_ERM(dataload_TRAIN,model,optimizer,criterion)
            elif model_type == 'stdrex':
                loss_train = train_stdREX(dataload_TRAIN,env_list,BETA,model,optimizer,criterion)
            else: print('No such model type. Type should be erm, vrex or stdrex.')
            loss_val,model_score = validate(dataload_VAL,model,criterion)
            
            _,test_score_iid = validate(dataload_TEST_IID,model,criterion)
            _,test_score_ood = validate(dataload_TEST_OOD,model,criterion)

            if epoch > warm_up_epoch_num:
                if loss_val < lowest_loss_epoch['loss_val']:
                    lowest_loss_epoch['epoch'] = epoch
                    lowest_loss_epoch['loss_val'] = loss_val
                elif (epoch-lowest_loss_epoch['epoch']) >= loss_tolerance_epoch_num:
                    lowest_loss_epoch['epoch'] = epoch
                    #lr = lr*0.5
                    for param_group in optimizer.param_groups:
                        param_group['lr'] *= 0.5
                        
            log_train.append((epoch,loss_train,loss_val,model_score,test_score_iid,test_score_ood,lr))
            
            if model_score > best_score:
                best_score = model_score
                best_OOD = test_score_ood
                best_model = copy.deepcopy(model.state_dict())
            
            print(f'\r{epoch}\t{model_score:.4f}\t{test_score_iid:.4f}\t{test_score_ood:.4f}\t({best_score:.4f},{best_OOD:.4f})',sep='',end='')
            
            if best_score >= early_stop_score:
                break
        model_list.append(best_model)
        cv_log.append(log_train)
        print('')
        
    df_cv_log = pd.DataFrame()
    for i in range(len(cv_log)):
        df_log = pd.DataFrame(cv_log[i])
        mindex = pd.MultiIndex.from_product([['Model_'+str(i)],['epoch','loss_train','loss_val','val_score','iid_score','ood_score','lr']])
        df_log.columns = mindex
        df_cv_log = pd.concat([df_cv_log,df_log],axis=1)
        
    return model_list,df_cv_log

In [25]:
def save_state_dict_2cpu(PATH_SAVE,model_list,model):
    os.mkdir(PATH_SAVE+'models')
    for i in range(len(model_list)):
        print(i,list(model_list[i].values())[0].device,end='\t')
        #model = modisiR_3dConv()
        model.load_state_dict(model_list[i])
        model.to('cpu')
        print('to',list(model.state_dict().values())[0].device)
        torch.save(model.state_dict(), PATH_SAVE+'models/state_dict_cpu_'+str(i)+'.pth')

### train_ERM

In [44]:
ERM_model_list,ERM_cv_log = cv_train(None,'erm',(True,True,True),df_structured_encoded_iid_trvl,dataset_TEST_IID,dataset_TEST_OOD)

rex_beta: None
Epoch	val	iid	ood	(val*	ood*)
39	0.8147	0.8240	0.5679	(0.8147,0.5679)
39	0.8324	0.8252	0.5048	(0.8326,0.5280)
39	0.8193	0.8166	0.6059	(0.8273,0.6026)
39	0.8316	0.8291	0.5582	(0.8340,0.5573)
39	0.8136	0.8145	0.6051	(0.8183,0.6110)
39	0.8260	0.8164	0.5836	(0.8260,0.5836)
39	0.8178	0.8171	0.5567	(0.8192,0.5582)
39	0.8265	0.8195	0.5938	(0.8294,0.6027)
39	0.8248	0.8202	0.6199	(0.8268,0.6208)
39	0.8197	0.8236	0.6061	(0.8206,0.6026)


In [45]:
ERM_cv_best_models_test_score = cvmodel_test(ERM_model_list,dataset_TEST_OOD)
print('mean:',np.mean(np.array(ERM_cv_best_models_test_score)))

0.5679281402018763
0.5280390236450506
0.6026027914395735
0.5573469793955484
0.611039572285671
0.5835802169465945
0.5581615777908702
0.6027320146030849
0.6208025716081216
0.6025747556256172
mean: 0.5834807643542008


In [46]:
PATH_SAVE = '/home/ken/MyStorage/Models_out/CNN-ERM-40epo-TTT-10xCV-5834-250327'
ablxn = (True,True,True)
mdl_list = ERM_model_list
cv_log = ERM_cv_log
#################################
if not os.path.exists(PATH_SAVE):
    os.makedirs(PATH_SAVE)
mdlsave = Ablation_modisiR_3dConv(ablxn)
save_state_dict_2cpu(PATH_SAVE,mdl_list,mdlsave)
######
cv_log.to_pickle(PATH_SAVE+'/df_cv_log.pickle')

0 cuda:0	to cpu
1 cuda:0	to cpu
2 cuda:0	to cpu
3 cuda:0	to cpu
4 cuda:0	to cpu
5 cuda:0	to cpu
6 cuda:0	to cpu
7 cuda:0	to cpu
8 cuda:0	to cpu
9 cuda:0	to cpu


#### 40epo-5808

In [47]:
ERM_model_list,ERM_cv_log = cv_train(None,'erm',(True,True,True),df_structured_encoded_iid_trvl,dataset_TEST_IID,dataset_TEST_OOD)

rex_beta: None
Epoch	val	iid	ood	(val*	ood*)
39	0.8173	0.8162	0.5896	(0.8184,0.5997)
39	0.8108	0.8209	0.5533	(0.8121,0.5592)
39	0.8078	0.8214	0.6030	(0.8117,0.6006)
39	0.8269	0.8270	0.6040	(0.8299,0.6049)
39	0.8015	0.8070	0.5129	(0.8201,0.5165)
39	0.7821	0.7989	0.5925	(0.8245,0.6062)
39	0.8180	0.8187	0.6070	(0.8264,0.6064)
39	0.8146	0.8208	0.5652	(0.8201,0.5723)
39	0.8219	0.8195	0.5689	(0.8279,0.5793)
39	0.8306	0.8261	0.5614	(0.8347,0.5634)


In [48]:
ERM_cv_best_models_test_score = cvmodel_test(ERM_model_list,dataset_TEST_OOD)
print('mean:',np.mean(np.array(ERM_cv_best_models_test_score)))

0.5997471199210008
0.5591965475587127
0.6005657944442366
0.604879484621486
0.5165173763711366
0.6061934393669389
0.606446155603132
0.5723382168056584
0.5792832166469117
0.5634480894436626
mean: 0.5808615440782876


In [49]:
PATH_SAVE = '/home/ken/MyStorage/Models_out/CNN-ERM-40epo-TTT-10xCV-5808-250327'
ablxn = (True,True,True)
mdl_list = ERM_model_list
cv_log = ERM_cv_log
#################################
if not os.path.exists(PATH_SAVE):
    os.makedirs(PATH_SAVE)
mdlsave = Ablation_modisiR_3dConv(ablxn)
save_state_dict_2cpu(PATH_SAVE,mdl_list,mdlsave)
######
cv_log.to_pickle(PATH_SAVE+'/df_cv_log.pickle')

0 cuda:0	to cpu
1 cuda:0	to cpu
2 cuda:0	to cpu
3 cuda:0	to cpu
4 cuda:0	to cpu
5 cuda:0	to cpu
6 cuda:0	to cpu
7 cuda:0	to cpu
8 cuda:0	to cpu
9 cuda:0	to cpu


### VREX

#### 40epo-6387

In [52]:
VREX_1_model_list,VREX_1_cv_log = cv_train(1,'vrex',(True,True,True),df_structured_encoded_iid_trvl,dataset_TEST_IID,dataset_TEST_OOD)

rex_beta: 1
Epoch	val	iid	ood	(val*	ood*)
39	0.5860	0.5786	0.6135	(0.6382,0.6549)
39	0.5982	0.5928	0.5671	(0.6450,0.6396)
39	0.6513	0.6277	0.6333	(0.6643,0.6566)
39	0.5974	0.6002	0.6180	(0.6607,0.6504)
39	0.5487	0.5803	0.5759	(0.6424,0.6286)
39	0.6528	0.6480	0.6071	(0.6541,0.6177)
39	0.6020	0.6000	0.6079	(0.6295,0.6280)
39	0.5423	0.5511	0.5519	(0.6079,0.6332)
39	0.6320	0.6322	0.6278	(0.6386,0.6363)
39	0.5816	0.5937	0.5900	(0.6319,0.6425)


In [54]:
VREX_cv_best_models_test_score = cvmodel_test(VREX_1_model_list,dataset_TEST_OOD)
print('mean:',np.mean(np.array(VREX_cv_best_models_test_score)))

0.6548543315475222
0.6395922210036205
0.6566229803476236
0.6503841276557112
0.628586882774321
0.6176733745291099
0.6280494159797089
0.6331854241136192
0.6362768648727204
0.6424905160897494
mean: 0.6387716138913706
