In [49]:
%mkdir output

In [1]:
import numpy as np
import pandas as pd

import torch
from torch import nn


import torch.optim as optim
from utils import count_parameters, classification_scores, mean_sq_error
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader


from models import SAINT
from augmentations import embed_data_mask
from augmentations import add_noise
from pretraining import SAINT_pretrain

In [2]:
df = pd.read_csv("../data/clinical_and_other_features.csv")
df2 = pd.read_csv('../data/clinical_and_other_features_filtered.csv')

In [34]:
opt_dict = {
    'd_task': 'clf',
    'dtask': 'clf',
    'task': 'multiclass',
    'batchsize': 16,
    'pt_aug': ['mixup', 'cutmix'],
    'pt_aug_lam': 0.1,
    'pretrain_epochs': 250, #50
    'nce_temp': 0.7,
    'lam0': 0.5,
    'lam1': 10,
    'lam2': 1,
    'lam3': 10,
    'pt_projhead_style': 'diff',
    'pt_tasks': ['contrastive','denoising'],
    'mixup_lam': 0.3,
    'ssl_samples': 312,
    'lr':0.0001,
    'train_noise_type':None,
    'train_noise_level':0,
    'save_path':"./output/model.pt"
}

class AttributeDict(dict):
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

opt = AttributeDict(opt_dict)

In [35]:
def data_split(X,y,nan_mask,indices):
    x_d = {
        'data': X.values[indices],
        'mask': nan_mask.values[indices]
    }

    if x_d['data'].shape != x_d['mask'].shape:
        raise'Shape of data not same as that of nan mask!'

    y_d = {
        'data': y[indices].reshape(-1, 1)
    }
    return x_d, y_d

In [36]:
class DataSetCatCon(Dataset):
    def __init__(self, X, Y, cat_cols,task='clf',continuous_mean_std=None):

        cat_cols = list(cat_cols)
        X_mask =  X['mask'].copy()
        X = X['data'].copy()
        con_cols = list(set(np.arange(X.shape[1])) - set(cat_cols))
        self.X1 = X[:,cat_cols].copy().astype(np.int64) #categorical columns
        self.X2 = X[:,con_cols].copy().astype(np.float32) #numerical columns
        self.X1_mask = X_mask[:,cat_cols].copy().astype(np.int64) #categorical columns
        self.X2_mask = X_mask[:,con_cols].copy().astype(np.int64) #numerical columns
        self.y = Y['data']#.astype(np.float32) if regression
        self.cls = np.zeros_like(self.y,dtype=int)
        self.cls_mask = np.ones_like(self.y,dtype=int)
        if continuous_mean_std is not None:
            mean, std = continuous_mean_std
            self.X2 = (self.X2 - mean) / std

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        # X1 has categorical data, X2 has continuous
        return np.concatenate((self.cls[idx], self.X1[idx])), self.X2[idx],self.y[idx], np.concatenate((self.cls_mask[idx], self.X1_mask[idx])), self.X2_mask[idx]

In [37]:
def prepare_dataset(df,p=[.65, .15, .2]):
  df1 = df.drop(['Overall Near-complete Response:  Looser Definition','Near-complete Response (Graded Measure)'],axis=1)
  df1.columns = df1.columns.str.strip()
  pathologic_response_to_neoadjuvant_therapy = ['Pathologic response to Neoadjuvant therapy: Pathologic stage (T) following neoadjuvant therapy',
        'Pathologic response to Neoadjuvant therapy:  Pathologic stage (N) following neoadjuvant therapy',
        'Pathologic response to Neoadjuvant therapy:  Pathologic stage (M) following neoadjuvant therapy']
  # df1.drop(pathologic_response_to_neoadjuvant_therapy, axis=1, inplace=True)
  X = df1.drop('Overall Near-complete Response:  Stricter Definition',axis=1)
  y = df1['Overall Near-complete Response:  Stricter Definition']
  cont_columns = ['Date of Birth (Days)', 'Days to Surgery (from the date of diagnosis)', 'Age at last contact in EMR f/u(days)(from the date of diagnosis) ,last time patient known to be alive, unless age of death is reported(in such case the age of death',
    'Age at mammo (days)', 'Days to distant recurrence(from the date of diagnosis)', 'Days to local recurrence (from the date of diagnosis)',
    'Days to death (from the date of diagnosis)', 'Days to last local recurrence free assessment (from the date of diagnosis)',
    ]
  categorical_columns = list(set(X.columns) - set(cont_columns))

  # convert categorical columns to str type
  X[categorical_columns] = X[categorical_columns].astype(str)

  cat_idxs = [X.columns.get_loc(c) for c in categorical_columns]
  con_idxs = [X.columns.get_loc(c) for c in cont_columns]
  X["Set"] = np.random.choice(["train", "valid", "test"], p = [.65, .15, .2], size=(X.shape[0],))

  train_indices = X[X.Set=="train"].index
  valid_indices = X[X.Set=="valid"].index
  test_indices = X[X.Set=="test"].index

  X = X.drop(columns=['Set'])
  temp = X.fillna("MissingValue")
#   creates a bert style mask for the missing values
  nan_mask = temp.ne("MissingValue").astype(int)

  cat_dims = []
  for col in categorical_columns:
      X[col] = X[col].fillna("MissingValue")
      l_enc = LabelEncoder()
      X[col] = l_enc.fit_transform(X[col].values)
      cat_dims.append(len(l_enc.classes_))

  for col in cont_columns:
      X[col] = pd.to_numeric(X[col], errors='coerce')
      X.fillna(X.loc[train_indices, col].mean(), inplace=True)
  y = y.values
  l_enc = LabelEncoder()
  y = l_enc.fit_transform(y)
  X_train, y_train = data_split(X,y,nan_mask,train_indices)
  X_valid, y_valid = data_split(X,y,nan_mask,valid_indices)
  X_test, y_test = data_split(X,y,nan_mask,test_indices)
  train_mean, train_std = np.array(X_train['data'][:,con_idxs],dtype=np.float32).mean(0), np.array(X_train['data'][:,con_idxs],dtype=np.float32).std(0)
  train_std = np.where(train_std < 1e-6, 1e-6, train_std)
  continuous_mean_std = np.array([train_mean,train_std]).astype(np.float32)
  train_ds = DataSetCatCon(X_train, y_train, cat_idxs,'clf',continuous_mean_std)
  trainloader = DataLoader(train_ds, batch_size=64, shuffle=True,num_workers=1)

  valid_ds = DataSetCatCon(X_valid, y_valid, cat_idxs,'clf', continuous_mean_std)
  validloader = DataLoader(valid_ds, batch_size=64, shuffle=False,num_workers=1)

  test_ds = DataSetCatCon(X_test, y_test, cat_idxs,'clf', continuous_mean_std)
  testloader = DataLoader(test_ds, batch_size=64, shuffle=False,num_workers=1)
  y_dim = len(np.unique(y_train['data'][:,0]))
  print('Number of classes in train:',y_dim)
  # in test
  print('Number of classes in test:',len(np.unique(y_test['data'][:,0]))
  )
  #in valid
  print('Number of classes in valid:',len(np.unique(y_valid['data'][:,0]))
  )


  cat_dims = np.append(np.array([1]),np.array(cat_dims)).astype(int) #Appending 1 for CLS token, this is later used to generate embeddings.

  return trainloader, validloader, testloader, cat_dims, con_idxs , cat_idxs, y_dim , continuous_mean_std , X_train, y_train, X_valid, y_valid, X_test, y_test, train_ds, valid_ds

In [38]:
trainloader, validloader, testloader, cat_dims, con_idxs , cat_idxs, y_dim , continuous_mean_std, X_train, y_train, X_valid, y_valid, X_test, y_test, train_ds, valid_ds = prepare_dataset(df,[.8, .2, 0])

Number of classes in train: 5
Number of classes in test: 5
Number of classes in valid: 5


In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [40]:
criterion = nn.CrossEntropyLoss().to(device)

In [41]:
y_dim = 4

In [47]:
model = SAINT(
categories = tuple(cat_dims),
num_continuous = len(con_idxs),
dim = 8,              # embedding dimension
dim_out = 1,
depth = 1,             # depth of the network (nr. of transformer blocks)
heads = 4,             # number of attention heads 8
attn_dropout = 0.1,
ff_dropout = 0.8,
mlp_hidden_mults = (4, 2),
cont_embeddings = 'MLP', # options: 'MLP', 'linear', 'hybrid' (MLP with continuous embeddings concatenated to the transformer block outputs)
attentiontype = 'row', # options: 'col', 'row', 'colrow', 'colrowv2'
final_mlp_style = 'sep',
y_dim = y_dim
)
model.to(device)


SAINT(
  (norm): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
  (simple_MLP): ModuleList(
    (0-7): 8 x simple_MLP(
      (layers): Sequential(
        (0): Linear(in_features=1, out_features=100, bias=True)
        (1): ReLU()
        (2): Linear(in_features=100, out_features=8, bias=True)
      )
    )
  )
  (transformer): RowColTransformer(
    (embeds): Embedding(1518, 8)
    (layers): ModuleList(
      (0): ModuleList(
        (0): PreNorm(
          (norm): LayerNorm((656,), eps=1e-05, elementwise_affine=True)
          (fn): Residual(
            (fn): Attention(
              (to_qkv): Linear(in_features=656, out_features=768, bias=False)
              (to_out): Linear(in_features=256, out_features=656, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
        )
        (1): PreNorm(
          (norm): LayerNorm((656,), eps=1e-05, elementwise_affine=True)
          (fn): Residual(
            (fn): FeedForward(
              (net

In [50]:
model = SAINT_pretrain(model,train_ds, valid_ds , opt, device=device)

Pretraining begins!
Model Checkpoint Saved!
Epoch: 0, Running Loss: 486394.9138183594 , Val Loss: 22833.375
Model Checkpoint Saved!
Epoch: 1, Running Loss: 407041.81005859375 , Val Loss: 4351.8984375
Early Stopping Counter:  1
Epoch: 2, Running Loss: 368174.34130859375 , Val Loss: 14736.794921875
Early Stopping Counter:  2
Epoch: 3, Running Loss: 316348.3505859375 , Val Loss: 10527.404296875
Early Stopping Counter:  3
Epoch: 4, Running Loss: 260474.99877929688 , Val Loss: 7408.66357421875
Early Stopping Counter:  4
Epoch: 5, Running Loss: 168812.5994873047 , Val Loss: 12901.056640625
Model Checkpoint Saved!
Epoch: 6, Running Loss: 66323.42533874512 , Val Loss: 2785.91943359375
Model Checkpoint Saved!
Epoch: 7, Running Loss: 78672.84365844727 , Val Loss: 555.6526489257812
Early Stopping Counter:  1
Epoch: 8, Running Loss: 51596.31997680664 , Val Loss: 19528.56640625
Early Stopping Counter:  2
Epoch: 9, Running Loss: 70233.99475097656 , Val Loss: 13340.3330078125
Model Checkpoint Saved!


In [51]:
# Labeled data

In [52]:
trainloader, validloader, testloader, cat_dims, con_idxs , cat_idxs, y_dim , continuous_mean_std, X_train, y_train, X_valid, y_valid, X_test, y_test,_,_ = prepare_dataset(df2)

Number of classes in train: 4
Number of classes in test: 4
Number of classes in valid: 4


In [56]:
optimizer = optim.AdamW(model.parameters(),lr=0.0001, betas=(0.9,0.999))

In [57]:
modelsave_path='outputs'

In [54]:
print('We are in semi-supervised learning case')

train_bsize = min(opt.ssl_samples//4,opt.batchsize)

We are in semi-supervised learning case


In [55]:
train_ds = DataSetCatCon(X_train, y_train, cat_idxs,opt.dtask,continuous_mean_std)
trainloader = DataLoader(train_ds, batch_size=train_bsize, shuffle=True,num_workers=4)

In [53]:
# Fine-tuning

In [58]:
# Start K-Fold Cross Validation
# Define the number of splits
n_splits = 4
best_valid_auroc = 0
best_valid_accuracy = 0
best_test_auroc = 0
best_test_accuracy = 0
best_valid_rmse = 100000

early_stop_counter = 0
early_stop_patience = 20

# fold_dict = {}

# Define the KFold object
kf = KFold(n_splits=n_splits, shuffle=True, random_state=21)

# Initialize lists to store the train and validation indices for each fold
train_indices_list = []
valid_indices_list = []

# Loop over the splits and get the train and validation indices for each fold
for train_indices, valid_indices in kf.split(X_train['data']):
    train_indices_list.append(train_indices)
    valid_indices_list.append(valid_indices)
best_test_accuracy_list = []
# Loop over the folds and train the model on each fold
for fold in range(n_splits):
    # Get the train and validation indices for this fold
    train_indices = train_indices_list[fold]
    valid_indices = valid_indices_list[fold]

    # Create the train and validation datasets and dataloaders for this fold
    train_ds = DataSetCatCon(X_train, y_train, cat_idxs,opt.dtask,continuous_mean_std)
    trainloader = DataLoader(train_ds, batch_size=train_bsize,num_workers=2, sampler=torch.utils.data.SubsetRandomSampler(train_indices))
    valid_ds = DataSetCatCon(X_train, y_train, cat_idxs,opt.dtask,continuous_mean_std)
    validloader = DataLoader(valid_ds, batch_size=train_bsize, shuffle=False,num_workers=2, sampler=torch.utils.data.SubsetRandomSampler(valid_indices))
    print(f'Training begins now for # {fold} Fold.')
    # Train the model on this fold
    for epoch in range(300):
        model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            optimizer.zero_grad()
            # x_categ is the the categorical data, with y appended as last feature. x_cont has continuous data. cat_mask is an array of ones same shape as x_categ except for last column(corresponding to y's) set to 0s. con_mask is an array of ones same shape as x_cont.
            x_categ, x_cont, y_gts, cat_mask, con_mask = data[0].to(device), data[1].to(device),data[2].to(device),data[3].to(device),data[4].to(device)
            if opt.train_noise_type is not None and opt.train_noise_level>0:
                noise_dict = {
                    'noise_type' : opt.train_noise_type,
                    'lambda' : opt.train_noise_level
                }
                if opt.train_noise_type == 'cutmix':
                    x_categ, x_cont = add_noise(x_categ,x_cont, noise_params = noise_dict)
                elif opt.train_noise_type == 'missing':
                    cat_mask, con_mask = add_noise(cat_mask, con_mask, noise_params = noise_dict)
            # We are converting the data to embeddings in the next step
            _ , x_categ_enc, x_cont_enc = embed_data_mask(x_categ, x_cont, cat_mask, con_mask,model)
            reps = model.transformer(x_categ_enc, x_cont_enc)
            # select only the representations corresponding to y and apply mlp on it in the next step to get the predictions.
            y_reps = reps[:,0,:]

            y_outs = model.mlpfory(y_reps)
            if opt.task == 'regression':
                loss = criterion(y_outs,y_gts)
            else:
                loss = criterion(y_outs,y_gts.squeeze())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(running_loss)
        if epoch%5==0:
            model.eval()
            with torch.no_grad():
                if opt.task in ['binary','multiclass']:
                    accuracy, auroc = classification_scores(model, validloader, device, opt.task)
                    # test_accuracy, test_auroc = classification_scores(model, testloader, device, opt.task)

                    print('[EPOCH %d] VALID ACCURACY: %.3f' %
                        (epoch + 1, accuracy ))
                    # print('[EPOCH %d] TEST ACCURACY: %.3f' %
                    #     (epoch + 1, test_accuracy ))

            if opt.task =='multiclass':
                if accuracy > best_valid_accuracy:
                    best_valid_accuracy = accuracy
                    early_stop_counter = 0
                    print("save model")
                    torch.save({'model': model, 'state_dict': model.state_dict(),'optimizer' : optimizer.state_dict()},modelsave_path+f"model-{fold}.pt")
                else:
                  early_stop_counter +=1
                  if early_stop_counter > early_stop_patience:
                    break

    model.eval()
    with torch.no_grad():
            accuracy, auroc = classification_scores(model, testloader, device, opt.task)
            print('TEST ACCURACY: %.3f' % accuracy)
            best_test_accuracy_list.append(accuracy)



# End K Fold
# Calculate the average of the best accuracy from each fold
average_best_valid_accuracy = sum(best_test_accuracy_list) / len(best_test_accuracy_list)
print('Average best validation accuracy from all folds:', average_best_valid_accuracy)


Training begins now for # 0 Fold.
15.866596043109894
[EPOCH 1] VALID ACCURACY: 59.615
save model
9.502539575099945
9.216988503932953
8.862392604351044
8.98517221212387
8.29756373167038
[EPOCH 6] VALID ACCURACY: 59.615
8.135764181613922
7.597042411565781
7.038891047239304
6.091987729072571
5.425549209117889
[EPOCH 11] VALID ACCURACY: 75.000
save model
5.187266498804092
5.14768123626709
4.472138300538063
4.256547451019287
4.8955088108778
[EPOCH 16] VALID ACCURACY: 84.615
save model
3.5341133773326874
3.731830283999443
2.7514409869909286
2.582195222377777
2.3576101511716843
[EPOCH 21] VALID ACCURACY: 78.846
2.6239263713359833
2.5860855504870415
2.6485532224178314
2.9966431632637978
1.8103970400989056
[EPOCH 26] VALID ACCURACY: 86.538
save model
2.3052387312054634
2.524137370288372
2.9214399978518486
2.2304787896573544
2.583153784275055
[EPOCH 31] VALID ACCURACY: 86.538
2.372879274189472
1.833578396588564
2.1592758372426033
1.9373781271278858
1.83826357498765
[EPOCH 36] VALID ACCURACY: 76.

In [59]:
model = SAINT(
categories = tuple(cat_dims),
num_continuous = len(con_idxs),
dim = 8,              # embedding dimension
dim_out = 1,
depth = 1,             # depth of the network (nr. of transformer blocks)
heads = 4,             # number of attention heads 8
attn_dropout = 0.1,
ff_dropout = 0.8,
mlp_hidden_mults = (4, 2),
cont_embeddings = 'MLP', # options: 'MLP', 'linear', 'hybrid' (MLP with continuous embeddings concatenated to the transformer block outputs)
attentiontype = 'row', # options: 'col', 'row', 'colrow', 'colrowv2'
final_mlp_style = 'sep',
y_dim = y_dim
)
model.to(device)


SAINT(
  (norm): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
  (simple_MLP): ModuleList(
    (0-7): 8 x simple_MLP(
      (layers): Sequential(
        (0): Linear(in_features=1, out_features=100, bias=True)
        (1): ReLU()
        (2): Linear(in_features=100, out_features=8, bias=True)
      )
    )
  )
  (transformer): RowColTransformer(
    (embeds): Embedding(828, 8)
    (layers): ModuleList(
      (0): ModuleList(
        (0): PreNorm(
          (norm): LayerNorm((656,), eps=1e-05, elementwise_affine=True)
          (fn): Residual(
            (fn): Attention(
              (to_qkv): Linear(in_features=656, out_features=768, bias=False)
              (to_out): Linear(in_features=256, out_features=656, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
          )
        )
        (1): PreNorm(
          (norm): LayerNorm((656,), eps=1e-05, elementwise_affine=True)
          (fn): Residual(
            (fn): FeedForward(
              (net)

In [60]:
# Start K-Fold for without pre-train
# Define the number of splits
n_splits = 4
best_valid_auroc = 0
best_valid_accuracy = 0
best_test_auroc = 0
best_test_accuracy = 0
best_valid_rmse = 100000

early_stop_counter = 0
early_stop_patience = 20

# fold_dict = {}

# Define the KFold object
kf = KFold(n_splits=n_splits, shuffle=True, random_state=21)

# Initialize lists to store the train and validation indices for each fold
train_indices_list = []
valid_indices_list = []

# Loop over the splits and get the train and validation indices for each fold
for train_indices, valid_indices in kf.split(X_train['data']):
    train_indices_list.append(train_indices)
    valid_indices_list.append(valid_indices)
best_valid_accuracy_list = []
# Loop over the folds and train the model on each fold
for fold in range(n_splits):
    # Get the train and validation indices for this fold
    train_indices = train_indices_list[fold]
    valid_indices = valid_indices_list[fold]

    # Create the train and validation datasets and dataloaders for this fold
    train_ds = DataSetCatCon(X_train, y_train, cat_idxs,opt.dtask,continuous_mean_std)
    trainloader = DataLoader(train_ds, batch_size=train_bsize,num_workers=2, sampler=torch.utils.data.SubsetRandomSampler(train_indices))
    valid_ds = DataSetCatCon(X_train, y_train, cat_idxs,opt.dtask,continuous_mean_std)
    validloader = DataLoader(valid_ds, batch_size=train_bsize, shuffle=False,num_workers=2, sampler=torch.utils.data.SubsetRandomSampler(valid_indices))
    print(f'Training begins now for # {fold} Fold.')
    # Train the model on this fold
    for epoch in range(300):
        model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            optimizer.zero_grad()
            # x_categ is the the categorical data, with y appended as last feature. x_cont has continuous data. cat_mask is an array of ones same shape as x_categ except for last column(corresponding to y's) set to 0s. con_mask is an array of ones same shape as x_cont.
            x_categ, x_cont, y_gts, cat_mask, con_mask = data[0].to(device), data[1].to(device),data[2].to(device),data[3].to(device),data[4].to(device)
            if opt.train_noise_type is not None and opt.train_noise_level>0:
                noise_dict = {
                    'noise_type' : opt.train_noise_type,
                    'lambda' : opt.train_noise_level
                }
                if opt.train_noise_type == 'cutmix':
                    x_categ, x_cont = add_noise(x_categ,x_cont, noise_params = noise_dict)
                elif opt.train_noise_type == 'missing':
                    cat_mask, con_mask = add_noise(cat_mask, con_mask, noise_params = noise_dict)
            # We are converting the data to embeddings in the next step
            _ , x_categ_enc, x_cont_enc = embed_data_mask(x_categ, x_cont, cat_mask, con_mask,model)
            reps = model.transformer(x_categ_enc, x_cont_enc)
            # select only the representations corresponding to y and apply mlp on it in the next step to get the predictions.
            y_reps = reps[:,0,:]

            y_outs = model.mlpfory(y_reps)
            if opt.task == 'regression':
                loss = criterion(y_outs,y_gts)
            else:
                loss = criterion(y_outs,y_gts.squeeze())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(running_loss)
        if epoch%5==0:
            model.eval()
            with torch.no_grad():
                if opt.task in ['binary','multiclass']:
                    accuracy, auroc = classification_scores(model, validloader, device, opt.task)
                    # test_accuracy, test_auroc = classification_scores(model, testloader, device, opt.task)

                    print('[EPOCH %d] VALID ACCURACY: %.3f' %
                        (epoch + 1, accuracy ))
                    # print('[EPOCH %d] TEST ACCURACY: %.3f' %
                    #     (epoch + 1, test_accuracy ))

            if opt.task =='multiclass':
                if accuracy > best_valid_accuracy:
                    best_valid_accuracy = accuracy
                    early_stop_counter = 0
                    print("save model")
                    torch.save({'model': model, 'state_dict': model.state_dict(),'optimizer' : optimizer.state_dict()},modelsave_path+f"model-{fold}.pt")
                else:
                  early_stop_counter +=1
                  if early_stop_counter > early_stop_patience:
                    break


    model.eval()
    with torch.no_grad():
            accuracy, auroc = classification_scores(model, testloader, device, opt.task)
            print('TEST ACCURACY: %.3f' % accuracy)
            best_valid_accuracy_list.append(accuracy)



# End K Fold
# Calculate the average of the best accuracy from each fold
average_best_valid_accuracy = sum(best_valid_accuracy_list) / len(best_valid_accuracy_list)
print('Average best validation accuracy from all folds:', average_best_valid_accuracy)

Training begins now for # 0 Fold.
12.931325435638428
[EPOCH 1] VALID ACCURACY: 59.615
save model
12.966737389564514
13.037555932998657
13.052281737327576
13.073012590408325
13.054601550102234
[EPOCH 6] VALID ACCURACY: 59.615
13.04453194141388
12.97135615348816
13.053110003471375
13.015082001686096
13.092105746269226
[EPOCH 11] VALID ACCURACY: 59.615
13.125950932502747
13.035520434379578
13.053758978843689
13.085068583488464
13.03072726726532
[EPOCH 16] VALID ACCURACY: 57.692
12.95281457901001
13.056073069572449
12.982487678527832
12.989891171455383
13.040847301483154
[EPOCH 21] VALID ACCURACY: 59.615
13.074813842773438
13.012911796569824
13.027708053588867
13.035239458084106
12.996838092803955
[EPOCH 26] VALID ACCURACY: 59.615
13.049815654754639
13.002563118934631
13.037336826324463
12.996376037597656
13.089592933654785
[EPOCH 31] VALID ACCURACY: 57.692
13.066810965538025
13.017314195632935
12.999440670013428
12.976470947265625
13.008018732070923
[EPOCH 36] VALID ACCURACY: 59.615
13.02