In [1]:
import random
random.seed(12)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy

from tqdm import tqdm

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch
from torch.utils.data import Dataset, ConcatDataset, SubsetRandomSampler, DataLoader
from torchvision import transforms
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import cv2

import math
import pickle

from pytorch_metric_learning import losses, miners, distances, reducers, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator

## Data set

In [2]:
class FERDataset(Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, dff, transforms):
        'Initialization'
        self.transforms = transforms
        self.dff= pd.read_csv(dff) if type(dff) is str else dff
        
        self.dff['pixels'] = [[int(y) for y in x.split()] for x in self.dff['pixels']]

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.dff)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        #ID = self.list_IDs[index]

        # Load data and get label
        X = self.dff.iloc[index]['pixels']
#         X = X.split()
        X = np.array(X, dtype=np.uint8)
        X = X.reshape(48,48)
        
        y = int(self.dff.iloc[index]['emotion'])

        if self.transforms:
            X = self.transforms(image=X)['image']

#             X = torch.cat((X,X,X),0)

        return X, y

In [3]:
batch_size= 64

df = pd.read_csv('/tf/data/Quan/fer2013/data/csv_file/fer2013.csv')

df_train = df[df['Usage'] == 'Training']
df_val = df[df['Usage'] == 'PublicTest']
df_test = df[df['Usage'] == 'PrivateTest']


train_transforms = A.Compose([
#     A.CLAHE(),
    A.Resize(48,48),
    A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=10, p=0.5, border_mode=0, value=0),
#     A.RandomCrop(height=40, width=40),
    A.Normalize(mean=(0.485,), std=(0.229,)),
    ToTensorV2()
])


test_transforms = A.Compose([
#     A.CLAHE(),
    A.Resize(48,48),
    A.Normalize(mean=(0.485,), std=(0.229,)),
    ToTensorV2()
])

# train_set = FERDataset(df_train, train_transforms)
train_set = FERDataset(df_train, test_transforms) # no augmentation!
val_set = FERDataset(df_val, test_transforms)
test_set = FERDataset(df_test, test_transforms)

dataset = ConcatDataset([train_set, val_set, test_set])

# train_loader = torch.utils.data.DataLoader(train_set,
#                                              batch_size=batch_size, shuffle=True,
#                                              num_workers=8)
# val_loader = torch.utils.data.DataLoader(val_set,
#                                              batch_size=batch_size, shuffle=False,
#                                              num_workers=8)
# test_loader = torch.utils.data.DataLoader(test_set,
#                                              batch_size=batch_size, shuffle=False,
#                                              num_workers=8)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


## Exploss residual

In [4]:
class ExpLoss(nn.Module):
    def __init__(self, depth, residual=True):
        super(ExpLoss, self).__init__()
        
        self.depth = depth
        self.residual = residual
        
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        
        self.fc1 = nn.Linear(depth, depth)
        self.fc2 = nn.Linear(depth, depth)
        
#         self.main_fc = nn.Linear(depth, depth)
        
        self.sim_act = nn.Sigmoid()
        self.att_act = nn.Sigmoid()
        
        self.out_fc = nn.Sequential(nn.Linear(depth, depth),
                                    nn.BatchNorm1d(depth),
                                    nn.ReLU())
        
    def forward(self, x, labels):
        batch_size = x.size(0)
        
        x = self.avgpool(x)
        x = self.flatten(x)
        
        if self.residual:
            x_res = x
        
        x1 = self.fc1(x)
        x2 = self.fc2(x)
        
        # cal sims (batchsize, depth)
        sims = torch.mm(x1, x2.permute(1,0))
        sims = sims / batch_size
        mask = self.filter_mask(labels)
        att = sims * (1 - mask.to(sims.device)) # only consider sample of same labels
#         att = self.att_act(sims)
#         print('att after sigmoid: ', att.min(), att.max(), att.mean(), att.var())
        
#         print('att pure: ', sims.min(), sims.max(), sims.mean(), sims.var())
#         att = torch.tanh(sims)
#         print('att after tanh: ', att.min(), att.max(), att.mean(), att.var())
#         att = self.relu(att)
#         print('att after relu: ', att.min(), att.max(), att.mean(), att.var())
# #         print('min: %.4f, max: %.4f, var: %.4f' % (att.min(), att.max(), att.std()))
        
#         x = self.main_fc(x)
        x = torch.mm(att, x)
#         print('x before out_fc: ', x.min(), x.max(), x.mean(), x.var())
        x = self.out_fc(x)
    
        if self.residual:
            x = x + x_res
    
#         print('x after out_fc: ', x.min(), x.max(), x.mean(), x.var())
        
#         print('att: ', att.min(), att.max(), att.mean(), att.var())
        
        
#         # cal sims loss
#         mask = self.filter_mask(labels)
#         sims = sims * mask.to(sims.device)


#         # prevent negative value
#         sims = self.sim_act(sims)
        
#         sims = sims.sum()
#         sims = sims / self.depth
        
        return x, 0

    def filter_mask(self, labels):
        """
        zero
        """
        classes = torch.unique(labels)
        mask = torch.ones((len(labels), len(labels)))
        
        indices = [(labels == k).nonzero().flatten() for k in classes]
        lindices = [torch.combinations(k, r=2, with_replacement=True) for k in indices]
        rindices = [torch.combinations(k.flip(0), r=2, with_replacement=True) for k in indices]
        indices = [torch.cat([lindices[i], rindices[i]]) for i in range(len(lindices))]

        for k in indices:
            mask[k[:,0], k[:,1]] = 0.
            
        return mask

In [5]:
class sVGG_exp(nn.Module):
    def __init__(self, features, in_features_classifier, n_classes):
        super(sVGG_exp, self).__init__()
        self.features = features
        self.features[0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.exploss = ExpLoss(512)
#         self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.classifier = nn.Sequential(nn.Flatten(),
                                        nn.Linear(in_features_classifier, in_features_classifier),
                                        nn.ReLU(inplace=True),
                                        nn.Dropout(0.5, inplace=False),
                                        nn.Linear(in_features_classifier, in_features_classifier // 2),
                                        nn.ReLU(inplace=True),
                                        nn.Dropout(0.5, inplace=False),
                                        nn.Linear(in_features_classifier // 2, n_classes))
        
    def forward(self, x, labels, return_att=True):
        x = self.features(x)
        att, sim_loss = self.exploss(x, labels)

#         att = self.avgpool(x)
        x = self.classifier(att)
        
        if return_att:
            return x, sim_loss, att
        return x, sim_loss
        

## KFold

In [15]:
from sklearn.model_selection import KFold
from torchvision.models import vgg16_bn

n_folds = 5
kfold = KFold(n_splits=n_folds, shuffle=True)

In [None]:
batch_size= 64
lr = 0.001
epochs = 50
device = torch.device('cuda')
optimal_folds = [] # [test loss, test acc]

for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    print('Fold ', fold + 1, ' / ', n_folds, ' ********************* ')
#     print('len train_ids: ', len(train_ids))
#     print('len test_ids: ', len(test_ids))
    
    # define sub train and test
    train_subsampler = SubsetRandomSampler(train_ids)
    test_subsampler = SubsetRandomSampler(test_ids)
    
    # data loader
    train_loader = DataLoader(dataset, batch_size, sampler=train_subsampler)
    test_loader = DataLoader(dataset, batch_size, sampler=test_subsampler)
    
#     print('len train_loader: ', len(train_loader), len(train_loader.dataset))
#     print('len test_loader: ', len(test_loader), len(test_loader.dataset))
    
    # create model
    model = sVGG_exp(features=vgg16_bn(pretrained=False).features[:34], in_features_classifier=512, n_classes=7)

    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=5, verbose=True)
    
    model_path = 'exploss_crossval/resExploss_model_fold' + str(fold+1) + '.pt'
    
    # train
    best_acc = 0.0
    curloss = -1
    hist = []
    
    for epoch in range(epochs):  # loop over the dataset multiple times
        
        print('Epoch: ', epoch + 1)
        running_loss = 0.0
        running_acc = 0.0
        processed = 0.0

        # TRAIN
        model.train()
        pbar = tqdm(train_loader, position=0)
        for i, data in enumerate(pbar):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            
#             print('Unique labels: ', torch.unique(labels))

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs, sim_loss, features = model(inputs, labels)
            loss = criterion(outputs, labels) + sim_loss
            acc = (torch.argmax(outputs, dim=1) == labels).float().sum()

            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += (loss.item() * inputs.size(0))
            running_acc += acc.item()
            processed += len(inputs)

            pbar.set_description(desc= f'Loss={loss.item()} Batch_id={i} Accuracy={acc.item() / inputs.size(0)}')

        print('- Avg.loss: %.4f | Avg.acc: %.4f' % (running_loss / len(train_ids), running_acc / len(train_ids)))
        avgloss = running_loss / len(train_ids)
        avgacc = running_acc / len(train_ids)

        # EVALUATE
        model.eval()
        running_valloss = 0.0
        running_valacc = 0.0
        for i,data in enumerate(test_loader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs, sim_loss, features = model(inputs, labels)

            loss = criterion(outputs, labels) + sim_loss
            acc = (torch.argmax(outputs, dim=1) == labels).float().sum()

            running_valloss += (loss.item() * inputs.size(0))
            running_valacc += acc.item()

        print('- Avg. test_loss: %.4f | Avg. test_acc: %.4f' % (running_valloss / len(test_ids), running_valacc / len(test_ids)))

        avgvalloss = running_valloss / len(test_ids)
        avgvalacc = running_valacc / len(test_ids)

        scheduler.step(avgvalacc)

        hist.append([avgloss, avgvalloss, avgacc, avgvalacc])

        if best_acc < avgvalacc:
            best_acc = avgvalacc
            curloss = avgvalloss
            torch.save(model, model_path)
            print('* Update optimal model')
    
    
    pickle.dump(hist, open('exploss_crossval/resExploss_hist_fold' + str(fold+1) + '.hdf5', 'wb'))
    optimal_folds.append([curloss, best_acc])
    print('Optimal result: test_loss: %.4f | test_acc: %.4f' % (curloss, best_acc))
    print(' - Finished Training of fold ', fold + 1, ' - ')

Fold  1  /  5  ********************* 


  0%|          | 0/449 [00:00<?, ?it/s]

Epoch:  1


Loss=0.8564144372940063 Batch_id=448 Accuracy=0.5675675675675675: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]


- Avg.loss: 1.2299 | Avg.acc: 0.4954


Loss=0.8158001899719238 Batch_id=0 Accuracy=0.546875:   0%|          | 1/449 [00:00<00:51,  8.68it/s]

- Avg. test_loss: 2.3678 | Avg. test_acc: 0.3257
* Update optimal model
Epoch:  2


Loss=0.8583192825317383 Batch_id=448 Accuracy=0.5135135135135135: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]


- Avg.loss: 0.9244 | Avg.acc: 0.5864
- Avg. test_loss: 1.5678 | Avg. test_acc: 0.4622


Loss=0.905745804309845 Batch_id=0 Accuracy=0.625:   0%|          | 1/449 [00:00<01:09,  6.43it/s]

* Update optimal model
Epoch:  3


Loss=1.1571018695831299 Batch_id=448 Accuracy=0.4864864864864865: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]


- Avg.loss: 0.8061 | Avg.acc: 0.6280


Loss=0.7502336502075195 Batch_id=0 Accuracy=0.703125:   0%|          | 1/449 [00:00<00:51,  8.76it/s]

- Avg. test_loss: 1.8505 | Avg. test_acc: 0.4347
Epoch:  4


Loss=1.3950679302215576 Batch_id=448 Accuracy=0.6486486486486487: 100%|██████████| 449/449 [00:39<00:00, 11.51it/s]


- Avg.loss: 0.7085 | Avg.acc: 0.7123
- Avg. test_loss: 1.5015 | Avg. test_acc: 0.6053


Loss=1.0086766481399536 Batch_id=0 Accuracy=0.65625:   0%|          | 1/449 [00:00<01:09,  6.43it/s]

* Update optimal model
Epoch:  5


Loss=1.0339726209640503 Batch_id=448 Accuracy=0.43243243243243246: 100%|██████████| 449/449 [00:39<00:00, 11.51it/s]


- Avg.loss: 0.6566 | Avg.acc: 0.7536
- Avg. test_loss: 0.9531 | Avg. test_acc: 0.7200


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  6


Loss=1.0853368043899536 Batch_id=448 Accuracy=0.5945945945945946: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]


- Avg.loss: 0.5184 | Avg.acc: 0.8007


Loss=0.369344025850296 Batch_id=0 Accuracy=0.953125:   0%|          | 1/449 [00:00<00:50,  8.85it/s]

- Avg. test_loss: 0.8882 | Avg. test_acc: 0.6956
Epoch:  7


Loss=0.5664389133453369 Batch_id=448 Accuracy=0.7837837837837838: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]


- Avg.loss: 0.5316 | Avg.acc: 0.8033


Loss=0.4663119614124298 Batch_id=0 Accuracy=0.90625:   0%|          | 1/449 [00:00<00:50,  8.80it/s]

- Avg. test_loss: 0.8278 | Avg. test_acc: 0.6775
Epoch:  8


Loss=0.3231678009033203 Batch_id=448 Accuracy=0.8918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.51it/s]


- Avg.loss: 0.4853 | Avg.acc: 0.8216
- Avg. test_loss: 0.6580 | Avg. test_acc: 0.7643


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  9


Loss=0.20031319558620453 Batch_id=448 Accuracy=0.972972972972973: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]


- Avg.loss: 0.4546 | Avg.acc: 0.8339


Loss=0.3577318489551544 Batch_id=0 Accuracy=0.9375:   0%|          | 1/449 [00:00<00:52,  8.60it/s]

- Avg. test_loss: 1.0669 | Avg. test_acc: 0.7129
Epoch:  10


Loss=0.8199410438537598 Batch_id=448 Accuracy=0.7027027027027027: 100%|██████████| 449/449 [00:38<00:00, 11.55it/s]


- Avg.loss: 0.4408 | Avg.acc: 0.8430


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 1.3967 | Avg. test_acc: 0.6475
Epoch:  11


Loss=0.5060980916023254 Batch_id=448 Accuracy=0.8378378378378378: 100%|██████████| 449/449 [00:39<00:00, 11.46it/s]


- Avg.loss: 0.4196 | Avg.acc: 0.8536
- Avg. test_loss: 0.5579 | Avg. test_acc: 0.8013


Loss=0.08973328024148941 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<01:09,  6.40it/s]

* Update optimal model
Epoch:  12


Loss=0.17149293422698975 Batch_id=448 Accuracy=0.972972972972973: 100%|██████████| 449/449 [00:38<00:00, 11.54it/s]


- Avg.loss: 0.3220 | Avg.acc: 0.8879


Loss=0.9893635511398315 Batch_id=0 Accuracy=0.53125:   0%|          | 1/449 [00:00<00:50,  8.78it/s]

- Avg. test_loss: 1.1312 | Avg. test_acc: 0.7449
Epoch:  13


Loss=1.7116801738739014 Batch_id=448 Accuracy=0.6216216216216216: 100%|██████████| 449/449 [00:38<00:00, 11.57it/s]


- Avg.loss: 0.3633 | Avg.acc: 0.8823


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.8865 | Avg. test_acc: 0.7446
Epoch:  14


Loss=0.4029677212238312 Batch_id=448 Accuracy=0.918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.46it/s]


- Avg.loss: 0.3077 | Avg.acc: 0.8982
- Avg. test_loss: 0.5881 | Avg. test_acc: 0.8026


Loss=0.09590555727481842 Batch_id=0 Accuracy=0.96875:   0%|          | 1/449 [00:00<01:08,  6.57it/s]

* Update optimal model
Epoch:  15


Loss=1.0071419477462769 Batch_id=448 Accuracy=0.5675675675675675: 100%|██████████| 449/449 [00:38<00:00, 11.56it/s]


- Avg.loss: 0.2745 | Avg.acc: 0.9115


Loss=0.40565016865730286 Batch_id=0 Accuracy=0.796875:   0%|          | 1/449 [00:00<00:50,  8.80it/s]

- Avg. test_loss: 1.3659 | Avg. test_acc: 0.6519
Epoch:  16


Loss=0.2983017861843109 Batch_id=448 Accuracy=0.9459459459459459: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.2846 | Avg.acc: 0.9089
- Avg. test_loss: 0.5071 | Avg. test_acc: 0.8401


Loss=0.07369867712259293 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<01:09,  6.43it/s]

* Update optimal model
Epoch:  17


Loss=0.40266841650009155 Batch_id=448 Accuracy=0.8108108108108109: 100%|██████████| 449/449 [00:38<00:00, 11.61it/s]


- Avg.loss: 0.2437 | Avg.acc: 0.9261
- Avg. test_loss: 0.3649 | Avg. test_acc: 0.8807


Loss=0.1111818179488182 Batch_id=0 Accuracy=0.96875:   0%|          | 1/449 [00:00<01:07,  6.65it/s]

* Update optimal model
Epoch:  18


Loss=0.7412776947021484 Batch_id=448 Accuracy=0.7567567567567568: 100%|██████████| 449/449 [00:38<00:00, 11.58it/s]


- Avg.loss: 0.2259 | Avg.acc: 0.9323


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.7171 | Avg. test_acc: 0.7983
Epoch:  19


Loss=0.20401425659656525 Batch_id=448 Accuracy=0.8918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.40it/s]


- Avg.loss: 0.2072 | Avg.acc: 0.9346
- Avg. test_loss: 0.4059 | Avg. test_acc: 0.8838


Loss=0.047078441828489304 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<01:08,  6.51it/s]

* Update optimal model
Epoch:  20


Loss=0.09892366826534271 Batch_id=448 Accuracy=0.972972972972973: 100%|██████████| 449/449 [00:38<00:00, 11.55it/s]


- Avg.loss: 0.2155 | Avg.acc: 0.9361
- Avg. test_loss: 0.3296 | Avg. test_acc: 0.8920


Loss=0.18717308342456818 Batch_id=0 Accuracy=0.890625:   0%|          | 1/449 [00:00<01:08,  6.49it/s]

* Update optimal model
Epoch:  21


Loss=0.6055008769035339 Batch_id=448 Accuracy=0.8378378378378378: 100%|██████████| 449/449 [00:38<00:00, 11.51it/s]


- Avg.loss: 0.2037 | Avg.acc: 0.9379


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 1.4086 | Avg. test_acc: 0.7354
Epoch:  22


Loss=0.1887921392917633 Batch_id=448 Accuracy=0.918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.39it/s]


- Avg.loss: 0.1966 | Avg.acc: 0.9370


Loss=0.7318978905677795 Batch_id=0 Accuracy=0.765625:   0%|          | 1/449 [00:00<00:50,  8.80it/s]

- Avg. test_loss: 0.4796 | Avg. test_acc: 0.8647
Epoch:  23


Loss=0.18601098656654358 Batch_id=448 Accuracy=0.9459459459459459: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]


- Avg.loss: 0.2034 | Avg.acc: 0.9371


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.7843 | Avg. test_acc: 0.8341
Epoch:  24


Loss=0.15894456207752228 Batch_id=448 Accuracy=0.918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.39it/s]


- Avg.loss: 0.1815 | Avg.acc: 0.9446


Loss=0.3202907145023346 Batch_id=0 Accuracy=0.875:   0%|          | 1/449 [00:00<00:50,  8.83it/s]

- Avg. test_loss: 2.8535 | Avg. test_acc: 0.4887
Epoch:  25


Loss=0.015237221494317055 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]     


- Avg.loss: 0.1580 | Avg.acc: 0.9495
- Avg. test_loss: 0.3230 | Avg. test_acc: 0.8944


Loss=0.06585655361413956 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<01:09,  6.48it/s]

* Update optimal model
Epoch:  26


Loss=0.8564202785491943 Batch_id=448 Accuracy=0.8108108108108109: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]


- Avg.loss: 0.1795 | Avg.acc: 0.9433


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.5389 | Avg. test_acc: 0.8433
Epoch:  27


Loss=0.2318902462720871 Batch_id=448 Accuracy=0.9459459459459459: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]


- Avg.loss: 0.1695 | Avg.acc: 0.9494


Loss=0.07663660496473312 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.86it/s]

- Avg. test_loss: 0.4213 | Avg. test_acc: 0.8530
Epoch:  28


Loss=0.6001963019371033 Batch_id=448 Accuracy=0.918918918918919: 100%|██████████| 449/449 [00:38<00:00, 11.56it/s]


- Avg.loss: 0.1837 | Avg.acc: 0.9442


Loss=0.24800646305084229 Batch_id=0 Accuracy=0.890625:   0%|          | 1/449 [00:00<00:51,  8.70it/s]

- Avg. test_loss: 0.4707 | Avg. test_acc: 0.8706
Epoch:  29


Loss=0.1410718858242035 Batch_id=448 Accuracy=0.918918918918919: 100%|██████████| 449/449 [00:38<00:00, 11.54it/s]


- Avg.loss: 0.1816 | Avg.acc: 0.9416


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.6881 | Avg. test_acc: 0.8224
Epoch:  30


Loss=0.25361722707748413 Batch_id=448 Accuracy=0.8918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.45it/s]


- Avg.loss: 0.1832 | Avg.acc: 0.9446


Loss=0.037372808903455734 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.66it/s]

- Avg. test_loss: 0.7853 | Avg. test_acc: 0.7778
Epoch:  31


Loss=1.0300179719924927 Batch_id=448 Accuracy=0.8378378378378378: 100%|██████████| 449/449 [00:38<00:00, 11.51it/s]


- Avg.loss: 0.1781 | Avg.acc: 0.9442


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.7080 | Avg. test_acc: 0.8279
Epoch    31: reducing learning rate of group 0 to 1.0000e-04.
Epoch:  32


Loss=0.08217966556549072 Batch_id=448 Accuracy=0.972972972972973: 100%|██████████| 449/449 [00:39<00:00, 11.37it/s]


- Avg.loss: 0.0995 | Avg.acc: 0.9690


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2755 | Avg. test_acc: 0.9096
* Update optimal model
Epoch:  33


Loss=0.020020131021738052 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]     


- Avg.loss: 0.0828 | Avg.acc: 0.9730


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2495 | Avg. test_acc: 0.9237
* Update optimal model
Epoch:  34


Loss=0.026196155697107315 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:38<00:00, 11.56it/s]     


- Avg.loss: 0.0837 | Avg.acc: 0.9743


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2437 | Avg. test_acc: 0.9227
Epoch:  35


Loss=0.0020130632910877466 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.45it/s]    


- Avg.loss: 0.0625 | Avg.acc: 0.9787


Loss=0.13668273389339447 Batch_id=0 Accuracy=0.9375:   0%|          | 1/449 [00:00<00:51,  8.77it/s]

- Avg. test_loss: 0.2384 | Avg. test_acc: 0.9086
Epoch:  36


Loss=0.12995877861976624 Batch_id=448 Accuracy=0.972972972972973: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.0808 | Avg.acc: 0.9757


Loss=0.10479117929935455 Batch_id=0 Accuracy=0.953125:   0%|          | 1/449 [00:00<00:50,  8.82it/s]

- Avg. test_loss: 0.2368 | Avg. test_acc: 0.9189
Epoch:  37


Loss=0.6038351655006409 Batch_id=448 Accuracy=0.8648648648648649: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.0538 | Avg.acc: 0.9808


Loss=0.00858826283365488 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.81it/s]

- Avg. test_loss: 0.3082 | Avg. test_acc: 0.9161
Epoch:  38


Loss=0.10449463874101639 Batch_id=448 Accuracy=0.9459459459459459: 100%|██████████| 449/449 [00:38<00:00, 11.51it/s]


- Avg.loss: 0.0571 | Avg.acc: 0.9823


Loss=0.0013263579457998276 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.79it/s]

- Avg. test_loss: 0.3061 | Avg. test_acc: 0.9022
Epoch:  39


Loss=0.001547239487990737 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]     


- Avg.loss: 0.0594 | Avg.acc: 0.9805


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2844 | Avg. test_acc: 0.9065
Epoch    39: reducing learning rate of group 0 to 1.0000e-05.
Epoch:  40


Loss=0.0026268051005899906 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.42it/s]    


- Avg.loss: 0.0535 | Avg.acc: 0.9818


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2403 | Avg. test_acc: 0.9281
* Update optimal model
Epoch:  41


Loss=0.08377863466739655 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.46it/s]      


- Avg.loss: 0.0452 | Avg.acc: 0.9854


Loss=0.0014770656125620008 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.75it/s]

- Avg. test_loss: 0.2405 | Avg. test_acc: 0.9256
Epoch:  42


Loss=0.4249342978000641 Batch_id=448 Accuracy=0.8918918918918919: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]


- Avg.loss: 0.0509 | Avg.acc: 0.9850


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2802 | Avg. test_acc: 0.9195
Epoch:  43


Loss=0.0005104361334815621 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.42it/s]    


- Avg.loss: 0.0534 | Avg.acc: 0.9826


Loss=0.009623968042433262 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.75it/s]

- Avg. test_loss: 0.2615 | Avg. test_acc: 0.9193
Epoch:  44


Loss=0.12318632006645203 Batch_id=448 Accuracy=0.9459459459459459: 100%|██████████| 449/449 [00:39<00:00, 11.51it/s]


- Avg.loss: 0.0510 | Avg.acc: 0.9827


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2966 | Avg. test_acc: 0.9079
Epoch:  45


Loss=0.004036416299641132 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.39it/s]     


- Avg.loss: 0.0547 | Avg.acc: 0.9818


Loss=0.12352496385574341 Batch_id=0 Accuracy=0.953125:   0%|          | 1/449 [00:00<00:51,  8.66it/s]

- Avg. test_loss: 0.2551 | Avg. test_acc: 0.9249
Epoch:  46


Loss=0.1849992722272873 Batch_id=448 Accuracy=0.9459459459459459: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]


- Avg.loss: 0.0444 | Avg.acc: 0.9860


Loss=0.003495868295431137 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.78it/s]

- Avg. test_loss: 0.2382 | Avg. test_acc: 0.9277
Epoch    46: reducing learning rate of group 0 to 1.0000e-06.
Epoch:  47


Loss=0.0003261975944042206 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]    


- Avg.loss: 0.0531 | Avg.acc: 0.9828


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2624 | Avg. test_acc: 0.9007
Epoch:  48


Loss=0.15292870998382568 Batch_id=448 Accuracy=0.9459459459459459: 100%|██████████| 449/449 [00:39<00:00, 11.40it/s]


- Avg.loss: 0.0459 | Avg.acc: 0.9854


Loss=0.018414106220006943 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.66it/s]

- Avg. test_loss: 0.3241 | Avg. test_acc: 0.8996
Epoch:  49


Loss=0.014004996977746487 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.50it/s]     


- Avg.loss: 0.0558 | Avg.acc: 0.9838


Loss=0.009879524819552898 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.65it/s]

- Avg. test_loss: 0.2302 | Avg. test_acc: 0.9257
Epoch:  50


Loss=1.1788554191589355 Batch_id=448 Accuracy=0.8648648648648649: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]


- Avg.loss: 0.0516 | Avg.acc: 0.9830
- Avg. test_loss: 0.2065 | Avg. test_acc: 0.9223
Optimal result: test_loss: 0.2403 | test_acc: 0.9281
 - Finished Training of fold  1  - 
Fold  2  /  5  ********************* 


Loss=1.9803346395492554 Batch_id=0 Accuracy=0.15625:   0%|          | 1/449 [00:00<00:50,  8.84it/s]

Epoch:  1


Loss=1.5067485570907593 Batch_id=448 Accuracy=0.24324324324324326: 100%|██████████| 449/449 [00:38<00:00, 11.55it/s]


- Avg.loss: 1.4956 | Avg.acc: 0.3978


Loss=1.2519142627716064 Batch_id=0 Accuracy=0.421875:   0%|          | 1/449 [00:00<00:50,  8.84it/s]

- Avg. test_loss: 7.7416 | Avg. test_acc: 0.1209
* Update optimal model
Epoch:  2


Loss=0.9286653399467468 Batch_id=448 Accuracy=0.5405405405405406: 100%|██████████| 449/449 [00:38<00:00, 11.54it/s]


- Avg.loss: 1.0570 | Avg.acc: 0.5611
- Avg. test_loss: 5.9388 | Avg. test_acc: 0.2686


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  3


Loss=0.9040740728378296 Batch_id=448 Accuracy=0.6756756756756757: 100%|██████████| 449/449 [00:39<00:00, 11.36it/s]


- Avg.loss: 0.8895 | Avg.acc: 0.6417
- Avg. test_loss: 1.4558 | Avg. test_acc: 0.4663


Loss=1.124502182006836 Batch_id=0 Accuracy=0.5:   0%|          | 1/449 [00:00<01:10,  6.38it/s]

* Update optimal model
Epoch:  4


Loss=1.0863966941833496 Batch_id=448 Accuracy=0.5945945945945946: 100%|██████████| 449/449 [00:39<00:00, 11.50it/s]


- Avg.loss: 0.7361 | Avg.acc: 0.7095
- Avg. test_loss: 2.5637 | Avg. test_acc: 0.4969


Loss=0.7649394869804382 Batch_id=0 Accuracy=0.6875:   0%|          | 1/449 [00:00<01:10,  6.39it/s]

* Update optimal model
Epoch:  5


Loss=0.8934998512268066 Batch_id=448 Accuracy=0.5405405405405406: 100%|██████████| 449/449 [00:39<00:00, 11.46it/s]


- Avg.loss: 0.6693 | Avg.acc: 0.7475
- Avg. test_loss: 1.3746 | Avg. test_acc: 0.5078


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  6


Loss=0.6371941566467285 Batch_id=448 Accuracy=0.7027027027027027: 100%|██████████| 449/449 [00:39<00:00, 11.41it/s]


- Avg.loss: 0.6624 | Avg.acc: 0.7494
- Avg. test_loss: 0.9536 | Avg. test_acc: 0.6081


Loss=0.4133269190788269 Batch_id=0 Accuracy=0.828125:   0%|          | 1/449 [00:00<01:06,  6.73it/s]

* Update optimal model
Epoch:  7


Loss=0.5789621472358704 Batch_id=448 Accuracy=0.7027027027027027: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]


- Avg.loss: 0.5931 | Avg.acc: 0.7812
- Avg. test_loss: 0.7954 | Avg. test_acc: 0.6783


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  8


Loss=0.8227747082710266 Batch_id=448 Accuracy=0.6756756756756757: 100%|██████████| 449/449 [00:39<00:00, 11.38it/s]


- Avg.loss: 0.5579 | Avg.acc: 0.7914
- Avg. test_loss: 0.6118 | Avg. test_acc: 0.7800


Loss=0.21992115676403046 Batch_id=0 Accuracy=0.953125:   0%|          | 1/449 [00:00<01:10,  6.40it/s]

* Update optimal model
Epoch:  9


Loss=1.110330581665039 Batch_id=448 Accuracy=0.4864864864864865: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.5044 | Avg.acc: 0.8200


Loss=0.30036428570747375 Batch_id=0 Accuracy=0.921875:   0%|          | 1/449 [00:00<00:51,  8.63it/s]

- Avg. test_loss: 0.6438 | Avg. test_acc: 0.7673
Epoch:  10


Loss=0.7668842673301697 Batch_id=448 Accuracy=0.7567567567567568: 100%|██████████| 449/449 [00:39<00:00, 11.50it/s]


- Avg.loss: 0.4702 | Avg.acc: 0.8250


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.7159 | Avg. test_acc: 0.7118
Epoch:  11


Loss=1.7789891958236694 Batch_id=448 Accuracy=0.5675675675675675: 100%|██████████| 449/449 [00:39<00:00, 11.42it/s]


- Avg.loss: 0.5070 | Avg.acc: 0.8140
- Avg. test_loss: 0.5483 | Avg. test_acc: 0.7963


Loss=0.20333848893642426 Batch_id=0 Accuracy=0.90625:   0%|          | 1/449 [00:00<01:08,  6.53it/s]

* Update optimal model
Epoch:  12


Loss=0.8028624057769775 Batch_id=448 Accuracy=0.7027027027027027: 100%|██████████| 449/449 [00:38<00:00, 11.54it/s]


- Avg.loss: 0.4315 | Avg.acc: 0.8429


Loss=0.49249207973480225 Batch_id=0 Accuracy=0.765625:   0%|          | 1/449 [00:00<00:50,  8.87it/s]

- Avg. test_loss: 0.8336 | Avg. test_acc: 0.7405
Epoch:  13


Loss=0.6508477330207825 Batch_id=448 Accuracy=0.7297297297297297: 100%|██████████| 449/449 [00:38<00:00, 11.56it/s]


- Avg.loss: 0.4438 | Avg.acc: 0.8414


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 8.2899 | Avg. test_acc: 0.4425
Epoch:  14


Loss=1.9396913051605225 Batch_id=448 Accuracy=0.4864864864864865: 100%|██████████| 449/449 [00:39<00:00, 11.43it/s]


- Avg.loss: 0.4383 | Avg.acc: 0.8390


Loss=0.3017536401748657 Batch_id=0 Accuracy=0.890625:   0%|          | 1/449 [00:00<00:50,  8.87it/s]

- Avg. test_loss: 0.6525 | Avg. test_acc: 0.7704
Epoch:  15


Loss=0.32212820649147034 Batch_id=448 Accuracy=0.918918918918919: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]


- Avg.loss: 0.3640 | Avg.acc: 0.8731


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.7206 | Avg. test_acc: 0.7786
Epoch:  16


Loss=0.4522601366043091 Batch_id=448 Accuracy=0.8378378378378378: 100%|██████████| 449/449 [00:39<00:00, 11.43it/s]


- Avg.loss: 0.3904 | Avg.acc: 0.8696
- Avg. test_loss: 0.4698 | Avg. test_acc: 0.8429


Loss=0.5245044827461243 Batch_id=0 Accuracy=0.78125:   0%|          | 1/449 [00:00<01:07,  6.59it/s]

* Update optimal model
Epoch:  17


Loss=0.25832700729370117 Batch_id=448 Accuracy=0.918918918918919: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.3170 | Avg.acc: 0.8930


Loss=0.34207507967948914 Batch_id=0 Accuracy=0.875:   0%|          | 1/449 [00:00<00:51,  8.77it/s]

- Avg. test_loss: 0.6596 | Avg. test_acc: 0.7660
Epoch:  18


Loss=0.53819340467453 Batch_id=448 Accuracy=0.8918918918918919: 100%|██████████| 449/449 [00:38<00:00, 11.55it/s]


- Avg.loss: 0.2870 | Avg.acc: 0.9039


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.5204 | Avg. test_acc: 0.8176
Epoch:  19


Loss=0.739406168460846 Batch_id=448 Accuracy=0.6216216216216216: 100%|██████████| 449/449 [00:39<00:00, 11.46it/s]


- Avg.loss: 0.3009 | Avg.acc: 0.9021


Loss=0.2382304072380066 Batch_id=0 Accuracy=0.875:   0%|          | 1/449 [00:00<00:51,  8.71it/s]

- Avg. test_loss: 0.7610 | Avg. test_acc: 0.7403
Epoch:  20


Loss=1.5548447370529175 Batch_id=448 Accuracy=0.4864864864864865: 100%|██████████| 449/449 [00:39<00:00, 11.49it/s]


- Avg.loss: 0.2497 | Avg.acc: 0.9233
- Avg. test_loss: 0.4895 | Avg. test_acc: 0.8447


Loss=0.05437486991286278 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<00:52,  8.50it/s]

* Update optimal model
Epoch:  21


Loss=0.44871985912323 Batch_id=448 Accuracy=0.918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]


- Avg.loss: 0.2451 | Avg.acc: 0.9235


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 1.2380 | Avg. test_acc: 0.7116
Epoch:  22


Loss=0.9952898621559143 Batch_id=448 Accuracy=0.5135135135135135: 100%|██████████| 449/449 [00:39<00:00, 11.35it/s]


- Avg.loss: 0.2060 | Avg.acc: 0.9346
- Avg. test_loss: 0.3388 | Avg. test_acc: 0.8944


Loss=0.08951937407255173 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<01:09,  6.47it/s]

* Update optimal model
Epoch:  23


Loss=0.7762160897254944 Batch_id=448 Accuracy=0.7027027027027027: 100%|██████████| 449/449 [00:38<00:00, 11.54it/s]


- Avg.loss: 0.2308 | Avg.acc: 0.9272


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.9167 | Avg. test_acc: 0.7251
Epoch:  24


Loss=1.4486826658248901 Batch_id=448 Accuracy=0.6486486486486487: 100%|██████████| 449/449 [00:39<00:00, 11.45it/s]


- Avg.loss: 0.1983 | Avg.acc: 0.9401


Loss=0.05264317989349365 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<00:51,  8.68it/s]

- Avg. test_loss: 0.4278 | Avg. test_acc: 0.8734
Epoch:  25


Loss=0.3470117151737213 Batch_id=448 Accuracy=0.8918918918918919: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.2355 | Avg.acc: 0.9255


Loss=0.16450577974319458 Batch_id=0 Accuracy=0.953125:   0%|          | 1/449 [00:00<00:51,  8.72it/s]

- Avg. test_loss: 0.4454 | Avg. test_acc: 0.8398
Epoch:  26


Loss=0.6268051862716675 Batch_id=448 Accuracy=0.8648648648648649: 100%|██████████| 449/449 [00:38<00:00, 11.55it/s]


- Avg.loss: 0.2129 | Avg.acc: 0.9321


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.4989 | Avg. test_acc: 0.8465
Epoch:  27


Loss=0.7161698937416077 Batch_id=448 Accuracy=0.8918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]


- Avg.loss: 0.1825 | Avg.acc: 0.9425
- Avg. test_loss: 0.2551 | Avg. test_acc: 0.9093


Loss=0.023896794766187668 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<01:08,  6.52it/s]

* Update optimal model
Epoch:  28


Loss=0.35157108306884766 Batch_id=448 Accuracy=0.8378378378378378: 100%|██████████| 449/449 [00:39<00:00, 11.51it/s]


- Avg.loss: 0.1915 | Avg.acc: 0.9447


Loss=0.016641946509480476 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.79it/s]

- Avg. test_loss: 0.4520 | Avg. test_acc: 0.8677
Epoch:  29


Loss=0.3774411380290985 Batch_id=448 Accuracy=0.8918918918918919: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]


- Avg.loss: 0.1693 | Avg.acc: 0.9483


Loss=0.0770561695098877 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<00:50,  8.86it/s]

- Avg. test_loss: 0.2906 | Avg. test_acc: 0.8975
Epoch:  30


Loss=0.35436367988586426 Batch_id=448 Accuracy=0.8918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]


- Avg.loss: 0.1778 | Avg.acc: 0.9467


Loss=0.21731036901474 Batch_id=0 Accuracy=0.890625:   0%|          | 1/449 [00:00<00:51,  8.68it/s]

- Avg. test_loss: 4.0822 | Avg. test_acc: 0.4916
Epoch:  31


Loss=0.06955866515636444 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.49it/s]      


- Avg.loss: 0.1703 | Avg.acc: 0.9478


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.6561 | Avg. test_acc: 0.8419
Epoch:  32


Loss=0.8556084632873535 Batch_id=448 Accuracy=0.6756756756756757: 100%|██████████| 449/449 [00:39<00:00, 11.45it/s]


- Avg.loss: 0.1738 | Avg.acc: 0.9486


Loss=0.26557257771492004 Batch_id=0 Accuracy=0.921875:   0%|          | 1/449 [00:00<00:51,  8.78it/s]

- Avg. test_loss: 0.4534 | Avg. test_acc: 0.8193
Epoch:  33


Loss=0.5292595028877258 Batch_id=448 Accuracy=0.8378378378378378: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]


- Avg.loss: 0.1569 | Avg.acc: 0.9509


Loss=0.07791311293840408 Batch_id=0 Accuracy=0.96875:   0%|          | 1/449 [00:00<00:51,  8.76it/s]

- Avg. test_loss: 0.4358 | Avg. test_acc: 0.8807
Epoch    33: reducing learning rate of group 0 to 1.0000e-04.
Epoch:  34


Loss=0.07797276228666306 Batch_id=448 Accuracy=0.972972972972973: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]


- Avg.loss: 0.1005 | Avg.acc: 0.9691


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.3158 | Avg. test_acc: 0.9069
Epoch:  35


Loss=0.006497981958091259 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.42it/s]     


- Avg.loss: 0.0815 | Avg.acc: 0.9749
- Avg. test_loss: 0.2995 | Avg. test_acc: 0.9178


Loss=0.10920723527669907 Batch_id=0 Accuracy=0.953125:   0%|          | 1/449 [00:00<01:09,  6.49it/s]

* Update optimal model
Epoch:  36


Loss=0.3619914948940277 Batch_id=448 Accuracy=0.8648648648648649: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]


- Avg.loss: 0.0767 | Avg.acc: 0.9752


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.3092 | Avg. test_acc: 0.9126
Epoch:  37


Loss=0.0071086883544921875 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.40it/s]    


- Avg.loss: 0.0655 | Avg.acc: 0.9788


Loss=0.010775316506624222 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.80it/s]

- Avg. test_loss: 0.3451 | Avg. test_acc: 0.8908
Epoch:  38


Loss=0.1670570969581604 Batch_id=448 Accuracy=0.972972972972973: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.0588 | Avg.acc: 0.9821


Loss=0.0028967319522053003 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.79it/s]

- Avg. test_loss: 0.2991 | Avg. test_acc: 0.8968
Epoch:  39


Loss=0.6460508108139038 Batch_id=448 Accuracy=0.7297297297297297: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.0682 | Avg.acc: 0.9780
- Avg. test_loss: 0.2928 | Avg. test_acc: 0.9199


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  40


Loss=0.0036870893090963364 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]    


- Avg.loss: 0.0646 | Avg.acc: 0.9784


Loss=0.005912124644964933 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.82it/s]

- Avg. test_loss: 0.2899 | Avg. test_acc: 0.9029
Epoch:  41


Loss=0.4311070144176483 Batch_id=448 Accuracy=0.8108108108108109: 100%|██████████| 449/449 [00:38<00:00, 11.57it/s]


- Avg.loss: 0.0714 | Avg.acc: 0.9759


Loss=0.03131125122308731 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.82it/s]

- Avg. test_loss: 0.3429 | Avg. test_acc: 0.8982
Epoch:  42


Loss=0.12928712368011475 Batch_id=448 Accuracy=0.972972972972973: 100%|██████████| 449/449 [00:38<00:00, 11.56it/s]


- Avg.loss: 0.0710 | Avg.acc: 0.9759


Loss=0.05271012336015701 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.72it/s]

- Avg. test_loss: 0.2832 | Avg. test_acc: 0.8996
Epoch:  43


Loss=0.2520104944705963 Batch_id=448 Accuracy=0.9459459459459459: 100%|██████████| 449/449 [00:39<00:00, 11.51it/s]


- Avg.loss: 0.0609 | Avg.acc: 0.9806


Loss=0.005521139595657587 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.73it/s]

- Avg. test_loss: 0.3305 | Avg. test_acc: 0.9050
Epoch:  44


Loss=0.7527603507041931 Batch_id=448 Accuracy=0.5945945945945946: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]


- Avg.loss: 0.0583 | Avg.acc: 0.9812


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.3565 | Avg. test_acc: 0.9025
Epoch:  45


Loss=0.11376631259918213 Batch_id=448 Accuracy=0.9459459459459459: 100%|██████████| 449/449 [00:39<00:00, 11.42it/s]


- Avg.loss: 0.0587 | Avg.acc: 0.9794


Loss=0.0056410119868814945 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.84it/s]

- Avg. test_loss: 0.2952 | Avg. test_acc: 0.8951
Epoch    45: reducing learning rate of group 0 to 1.0000e-05.
Epoch:  46


Loss=0.4677848219871521 Batch_id=448 Accuracy=0.8378378378378378: 100%|██████████| 449/449 [00:38<00:00, 11.54it/s]


- Avg.loss: 0.0521 | Avg.acc: 0.9836


Loss=0.16278159618377686 Batch_id=0 Accuracy=0.890625:   0%|          | 1/449 [00:00<00:50,  8.81it/s]

- Avg. test_loss: 0.3270 | Avg. test_acc: 0.8987
Epoch:  47


Loss=0.046324048191308975 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.51it/s]     


- Avg.loss: 0.0512 | Avg.acc: 0.9830
- Avg. test_loss: 0.2165 | Avg. test_acc: 0.9202


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  48


Loss=0.10105442255735397 Batch_id=448 Accuracy=0.918918918918919: 100%|██████████| 449/449 [00:39<00:00, 11.39it/s]


- Avg.loss: 0.0458 | Avg.acc: 0.9848


Loss=0.003191237337887287 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.75it/s]

- Avg. test_loss: 0.3252 | Avg. test_acc: 0.9037
Epoch:  49


Loss=0.10307157039642334 Batch_id=448 Accuracy=0.972972972972973: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.0508 | Avg.acc: 0.9832


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.3314 | Avg. test_acc: 0.8902
Epoch:  50


Loss=0.009382076561450958 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.43it/s]     


- Avg.loss: 0.0437 | Avg.acc: 0.9852
- Avg. test_loss: 0.2958 | Avg. test_acc: 0.9090
Optimal result: test_loss: 0.2165 | test_acc: 0.9202
 - Finished Training of fold  2  - 
Fold  3  /  5  ********************* 


Loss=1.9279733896255493 Batch_id=0 Accuracy=0.203125:   0%|          | 1/449 [00:00<00:50,  8.94it/s]

Epoch:  1


Loss=0.8514169454574585 Batch_id=448 Accuracy=0.631578947368421: 100%|██████████| 449/449 [00:38<00:00, 11.54it/s]


- Avg.loss: 1.3321 | Avg.acc: 0.4476


Loss=0.8482736945152283 Batch_id=0 Accuracy=0.625:   0%|          | 1/449 [00:00<00:50,  8.79it/s]

- Avg. test_loss: 1.1296 | Avg. test_acc: 0.5176
* Update optimal model
Epoch:  2


Loss=2.8241629600524902 Batch_id=448 Accuracy=0.15789473684210525: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]


- Avg.loss: 0.9653 | Avg.acc: 0.5791
- Avg. test_loss: 1.1215 | Avg. test_acc: 0.5487


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  3


Loss=1.6669842004776 Batch_id=448 Accuracy=0.13157894736842105: 100%|██████████| 449/449 [00:39<00:00, 11.42it/s]


- Avg.loss: 0.8683 | Avg.acc: 0.6288
- Avg. test_loss: 0.8332 | Avg. test_acc: 0.6174


Loss=0.6382387280464172 Batch_id=0 Accuracy=0.640625:   0%|          | 1/449 [00:00<00:52,  8.53it/s]

* Update optimal model
Epoch:  4


Loss=0.8057974576950073 Batch_id=448 Accuracy=0.6052631578947368: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.7996 | Avg.acc: 0.6550


Loss=3.6454570293426514 Batch_id=0 Accuracy=0.46875:   0%|          | 1/449 [00:00<00:51,  8.76it/s]

- Avg. test_loss: 0.9320 | Avg. test_acc: 0.6065
Epoch:  5


Loss=0.590202271938324 Batch_id=448 Accuracy=0.7105263157894737: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.6467 | Avg.acc: 0.7470


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 1.5902 | Avg. test_acc: 0.5976
Epoch:  6


Loss=0.44655463099479675 Batch_id=448 Accuracy=0.7894736842105263: 100%|██████████| 449/449 [00:39<00:00, 11.41it/s]


- Avg.loss: 0.5641 | Avg.acc: 0.7838
- Avg. test_loss: 0.6223 | Avg. test_acc: 0.7528


Loss=0.7336375713348389 Batch_id=0 Accuracy=0.875:   0%|          | 1/449 [00:00<01:10,  6.38it/s]

* Update optimal model
Epoch:  7


Loss=0.3449840843677521 Batch_id=448 Accuracy=0.8947368421052632: 100%|██████████| 449/449 [00:39<00:00, 11.49it/s]


- Avg.loss: 0.5426 | Avg.acc: 0.7892


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 2.1711 | Avg. test_acc: 0.5260
Epoch:  8


Loss=0.7531054019927979 Batch_id=448 Accuracy=0.631578947368421: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]


- Avg.loss: 0.5418 | Avg.acc: 0.8024


Loss=0.41873565316200256 Batch_id=0 Accuracy=0.921875:   0%|          | 1/449 [00:00<00:50,  8.82it/s]

- Avg. test_loss: 1.4531 | Avg. test_acc: 0.5479
Epoch:  9


Loss=0.48401471972465515 Batch_id=448 Accuracy=0.7894736842105263: 100%|██████████| 449/449 [00:38<00:00, 11.53it/s]


- Avg.loss: 0.4553 | Avg.acc: 0.8346
- Avg. test_loss: 0.6329 | Avg. test_acc: 0.7610


Loss=0.2269863337278366 Batch_id=0 Accuracy=0.953125:   0%|          | 1/449 [00:00<01:10,  6.37it/s]

* Update optimal model
Epoch:  10


Loss=0.12018001824617386 Batch_id=448 Accuracy=0.9736842105263158: 100%|██████████| 449/449 [00:39<00:00, 11.49it/s]


- Avg.loss: 0.4289 | Avg.acc: 0.8487


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.7602 | Avg. test_acc: 0.7382
Epoch:  11


Loss=0.1990685909986496 Batch_id=448 Accuracy=0.9210526315789473: 100%|██████████| 449/449 [00:39<00:00, 11.39it/s]


- Avg.loss: 0.3471 | Avg.acc: 0.8813
- Avg. test_loss: 0.5555 | Avg. test_acc: 0.8179


Loss=0.47953447699546814 Batch_id=0 Accuracy=0.796875:   0%|          | 1/449 [00:00<01:09,  6.43it/s]

* Update optimal model
Epoch:  12


Loss=0.19482775032520294 Batch_id=448 Accuracy=0.9210526315789473: 100%|██████████| 449/449 [00:39<00:00, 11.46it/s]


- Avg.loss: 0.3248 | Avg.acc: 0.8978
- Avg. test_loss: 0.4870 | Avg. test_acc: 0.8442


Loss=0.15742960572242737 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<01:10,  6.40it/s]

* Update optimal model
Epoch:  13


Loss=1.5570156574249268 Batch_id=448 Accuracy=0.7105263157894737: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]


- Avg.loss: 0.2615 | Avg.acc: 0.9181


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 1.0506 | Avg. test_acc: 0.7435
Epoch:  14


Loss=0.15864600241184235 Batch_id=448 Accuracy=0.9736842105263158: 100%|██████████| 449/449 [00:39<00:00, 11.42it/s]


- Avg.loss: 0.2517 | Avg.acc: 0.9191


Loss=0.09580014646053314 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<00:51,  8.78it/s]

- Avg. test_loss: 0.6794 | Avg. test_acc: 0.8138
Epoch:  15


Loss=0.2868410050868988 Batch_id=448 Accuracy=0.9473684210526315: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]


- Avg.loss: 0.2867 | Avg.acc: 0.9092


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 1.4392 | Avg. test_acc: 0.6439
Epoch:  16


Loss=0.13358601927757263 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]      


- Avg.loss: 0.2533 | Avg.acc: 0.9213


Loss=0.15648461878299713 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<00:51,  8.75it/s]

- Avg. test_loss: 0.5726 | Avg. test_acc: 0.7847
Epoch:  17


Loss=0.05693623423576355 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]      


- Avg.loss: 0.2145 | Avg.acc: 0.9325


Loss=1.0433461666107178 Batch_id=0 Accuracy=0.65625:   0%|          | 1/449 [00:00<00:50,  8.86it/s]

- Avg. test_loss: 1.3946 | Avg. test_acc: 0.6273
Epoch:  18


Loss=0.09953495860099792 Batch_id=448 Accuracy=0.9736842105263158: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]


- Avg.loss: 0.2034 | Avg.acc: 0.9372


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 1.1064 | Avg. test_acc: 0.7353
Epoch    18: reducing learning rate of group 0 to 1.0000e-04.
Epoch:  19


Loss=2.133416175842285 Batch_id=448 Accuracy=0.7368421052631579: 100%|██████████| 449/449 [00:39<00:00, 11.36it/s]


- Avg.loss: 0.1445 | Avg.acc: 0.9541
- Avg. test_loss: 0.4999 | Avg. test_acc: 0.8628


Loss=0.8722628355026245 Batch_id=0 Accuracy=0.6875:   0%|          | 1/449 [00:00<01:08,  6.51it/s]

* Update optimal model
Epoch:  20


Loss=0.054143521934747696 Batch_id=448 Accuracy=0.9736842105263158: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]


- Avg.loss: 0.1125 | Avg.acc: 0.9643
- Avg. test_loss: 0.2845 | Avg. test_acc: 0.9079


Loss=0.004405001178383827 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<01:08,  6.50it/s]

* Update optimal model
Epoch:  21


Loss=0.020102914422750473 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]     


- Avg.loss: 0.0796 | Avg.acc: 0.9757
- Avg. test_loss: 0.2806 | Avg. test_acc: 0.9135


Loss=0.06487839668989182 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<01:08,  6.52it/s]

* Update optimal model
Epoch:  22


Loss=0.7211451530456543 Batch_id=448 Accuracy=0.7368421052631579: 100%|██████████| 449/449 [00:38<00:00, 11.51it/s]


- Avg.loss: 0.0787 | Avg.acc: 0.9757
- Avg. test_loss: 0.3080 | Avg. test_acc: 0.9144


Loss=0.002895342418923974 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<01:08,  6.51it/s]

* Update optimal model
Epoch:  23


Loss=0.22607946395874023 Batch_id=448 Accuracy=0.8947368421052632: 100%|██████████| 449/449 [00:39<00:00, 11.46it/s]


- Avg.loss: 0.0809 | Avg.acc: 0.9757


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.4416 | Avg. test_acc: 0.8693
Epoch:  24


Loss=0.39276644587516785 Batch_id=448 Accuracy=0.8947368421052632: 100%|██████████| 449/449 [00:39<00:00, 11.41it/s]


- Avg.loss: 0.0779 | Avg.acc: 0.9761


Loss=0.004940242972224951 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.64it/s]

- Avg. test_loss: 0.3296 | Avg. test_acc: 0.8902
Epoch:  25


Loss=0.26916107535362244 Batch_id=448 Accuracy=0.868421052631579: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]


- Avg.loss: 0.0677 | Avg.acc: 0.9769


Loss=0.026373181492090225 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<00:52,  8.60it/s]

- Avg. test_loss: 0.2944 | Avg. test_acc: 0.9136
Epoch:  26


Loss=0.050699666142463684 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:38<00:00, 11.52it/s]     


- Avg.loss: 0.0783 | Avg.acc: 0.9744


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2610 | Avg. test_acc: 0.9108
Epoch:  27


Loss=0.6550506353378296 Batch_id=448 Accuracy=0.7368421052631579: 100%|██████████| 449/449 [00:39<00:00, 11.40it/s]


- Avg.loss: 0.0556 | Avg.acc: 0.9825


Loss=0.1508484035730362 Batch_id=0 Accuracy=0.953125:   0%|          | 1/449 [00:00<00:51,  8.72it/s]

- Avg. test_loss: 0.2999 | Avg. test_acc: 0.9108
Epoch:  28


Loss=0.043281134217977524 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.45it/s]     


- Avg.loss: 0.0708 | Avg.acc: 0.9772
- Avg. test_loss: 0.2876 | Avg. test_acc: 0.9232


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  29


Loss=0.7291671633720398 Batch_id=448 Accuracy=0.8157894736842105: 100%|██████████| 449/449 [00:39<00:00, 11.44it/s]


- Avg.loss: 0.0600 | Avg.acc: 0.9817


Loss=0.5788686275482178 Batch_id=0 Accuracy=0.828125:   0%|          | 1/449 [00:00<00:50,  8.83it/s]

- Avg. test_loss: 0.2903 | Avg. test_acc: 0.9099
Epoch:  30


Loss=0.6526706218719482 Batch_id=448 Accuracy=0.7368421052631579: 100%|██████████| 449/449 [00:39<00:00, 11.46it/s]


- Avg.loss: 0.0707 | Avg.acc: 0.9792


Loss=0.029427846893668175 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.83it/s]

- Avg. test_loss: 0.3692 | Avg. test_acc: 0.8926
Epoch:  31


Loss=0.004145320970565081 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.50it/s]     


- Avg.loss: 0.0648 | Avg.acc: 0.9809
- Avg. test_loss: 0.2765 | Avg. test_acc: 0.9241


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  32


Loss=0.017513422295451164 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.39it/s]     


- Avg.loss: 0.0564 | Avg.acc: 0.9827


Loss=0.006682279985398054 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.66it/s]

- Avg. test_loss: 0.3863 | Avg. test_acc: 0.9150
Epoch:  33


Loss=0.2954237759113312 Batch_id=448 Accuracy=0.8157894736842105: 100%|██████████| 449/449 [00:39<00:00, 11.49it/s]


- Avg.loss: 0.0616 | Avg.acc: 0.9829


Loss=0.00330716441385448 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.80it/s]

- Avg. test_loss: 0.2704 | Avg. test_acc: 0.9167
Epoch:  34


Loss=0.010868620127439499 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.51it/s]     


- Avg.loss: 0.0677 | Avg.acc: 0.9789


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2669 | Avg. test_acc: 0.9193
Epoch:  35


Loss=0.21855856478214264 Batch_id=448 Accuracy=0.868421052631579: 100%|██████████| 449/449 [00:39<00:00, 11.40it/s]


- Avg.loss: 0.0549 | Avg.acc: 0.9836


Loss=0.002347191795706749 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:51,  8.70it/s]

- Avg. test_loss: 0.2903 | Avg. test_acc: 0.9151
Epoch:  36


Loss=0.7722010612487793 Batch_id=448 Accuracy=0.6842105263157895: 100%|██████████| 449/449 [00:39<00:00, 11.47it/s]


- Avg.loss: 0.0625 | Avg.acc: 0.9816


  0%|          | 0/449 [00:00<?, ?it/s]

- Avg. test_loss: 0.2724 | Avg. test_acc: 0.9143
Epoch:  37


Loss=0.012639102526009083 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.40it/s]     


- Avg.loss: 0.0590 | Avg.acc: 0.9824


Loss=0.003999821841716766 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<00:50,  8.79it/s]

- Avg. test_loss: 0.2406 | Avg. test_acc: 0.9154
Epoch    37: reducing learning rate of group 0 to 1.0000e-05.
Epoch:  38


Loss=0.17883922159671783 Batch_id=448 Accuracy=0.9210526315789473: 100%|██████████| 449/449 [00:39<00:00, 11.48it/s]


- Avg.loss: 0.0454 | Avg.acc: 0.9852


Loss=0.11569508165121078 Batch_id=0 Accuracy=0.953125:   0%|          | 1/449 [00:00<00:50,  8.80it/s]

- Avg. test_loss: 0.2387 | Avg. test_acc: 0.9221
Epoch:  39


Loss=0.26992619037628174 Batch_id=448 Accuracy=0.9210526315789473: 100%|██████████| 449/449 [00:39<00:00, 11.50it/s]


- Avg.loss: 0.0458 | Avg.acc: 0.9860
- Avg. test_loss: 0.2125 | Avg. test_acc: 0.9335


  0%|          | 0/449 [00:00<?, ?it/s]

* Update optimal model
Epoch:  40


Loss=0.0007145774434320629 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:39<00:00, 11.41it/s]    


- Avg.loss: 0.0390 | Avg.acc: 0.9874
- Avg. test_loss: 0.2254 | Avg. test_acc: 0.9349


Loss=0.00679016625508666 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<01:08,  6.52it/s]

* Update optimal model
Epoch:  41


Loss=0.036302000284194946 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:38<00:00, 11.54it/s]     


- Avg.loss: 0.0369 | Avg.acc: 0.9882


Loss=0.07433376461267471 Batch_id=0 Accuracy=0.984375:   0%|          | 1/449 [00:00<00:50,  8.84it/s]

- Avg. test_loss: 0.1873 | Avg. test_acc: 0.9292
Epoch:  42


Loss=0.0610218308866024 Batch_id=448 Accuracy=1.0: 100%|██████████| 449/449 [00:38<00:00, 11.54it/s]       


- Avg.loss: 0.0415 | Avg.acc: 0.9897
- Avg. test_loss: 0.2359 | Avg. test_acc: 0.9360


Loss=0.0015432958025485277 Batch_id=0 Accuracy=1.0:   0%|          | 1/449 [00:00<01:08,  6.49it/s]

* Update optimal model
Epoch:  43


Loss=0.032886479049921036 Batch_id=376 Accuracy=1.0:  84%|████████▍ | 377/449 [00:32<00:06, 11.48it/s]     

In [None]:
pickle.dump(optimal_folds, open('exploss_crossval/optimal_folds.hdf5', 'wb'))

In [19]:
optimal_folds

[[0.24028798388709063, 0.9281136806910003],
 [0.21653891619897944, 0.9201727500696573],
 [0.17440381800836052, 0.9423157308067438],
 [0.25318973042331433, 0.910965584506061],
 [0.2775112829517983, 0.9175142817333147]]