In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as  F
import torch.nn as nn
from pathlib import Path
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from collections import OrderedDict
from tqdm import tqdm
import sys
import time
from sklearn.metrics import accuracy_score

In [2]:
DEVICE='cuda'
use_gpu=True

In [3]:
def reproducibilitySeed():
    """
    Ensure reproducibility of results; Seeds to 0
    """
    torch_init_seed = 0
    torch.manual_seed(torch_init_seed)
    numpy_init_seed = 0
    np.random.seed(numpy_init_seed)
    if use_gpu:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

reproducibilitySeed()

# # # *Loading Cifar10*

In [4]:


NUM_WORKERS = 2


class TensorImgSet(Dataset):
    """TensorDataset with support of transforms.
    """

    def __init__(self, tensors, transform=None):
        self.imgs = tensors[0]
        self.targets = tensors[1]
        self.tensors = tensors
        self.transform = transform
        self.len = len(self.imgs)

    def __getitem__(self, index):
        x = self.imgs[index]
        if self.transform:
            x = self.transform(x)
        y = self.targets[index]
        return x, y

    def __len__(self):
        return self.len



def get_cifar(num_classes=100, dataset_dir="./data", batch_size=128,
              use_cifar_10_1=False):

    if num_classes == 10:
        print("Loading CIFAR10...")
        dataset = torchvision.datasets.CIFAR10
        normalize = transforms.Normalize(
            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    else:
        print("Loading CIFAR100...")
        dataset = torchvision.datasets.CIFAR100
        normalize = transforms.Normalize(
            mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])

    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    trainset = dataset(root=dataset_dir, train=True,
                       download=True, transform=train_transform)

    test_transform = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])

    testset = dataset(root=dataset_dir, train=False,
                          download=True,
                          transform=test_transform)

    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=batch_size,
                                               num_workers=NUM_WORKERS,
                                               pin_memory=True, shuffle=True)
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=batch_size,
                                              num_workers=NUM_WORKERS,
                                              pin_memory=True, shuffle=False)
    return train_loader, test_loader

# # #*Creating Models*

In [5]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(x)
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(x)
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion, num_classes)
        self.n_channels = [64, 128, 256, 512]

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, is_feat=False, use_relu=True):
        out = self.conv1(x)
        out = self.bn1(out)
        if use_relu:
            out = F.relu(out)
        feat1 = self.layer1(out)
        if use_relu:
            feat1 = F.relu(feat1)
        feat2 = self.layer2(feat1)
        if use_relu:
            feat2 = F.relu(feat2)
        feat3 = self.layer3(feat2)
        if use_relu:
            feat3 = F.relu(feat3)

        feat4 = self.layer4(feat3)
        feat4 = F.relu(feat4)
        pool = F.avg_pool2d(feat4, 4)
        pool = pool.view(pool.size(0), -1)
        out = self.linear(pool)

        if is_feat:
            return[feat1, feat2, feat3, feat4], pool, out

        return out

    def get_bn_before_relu(self):
        if isinstance(self.layer1[0], Bottleneck):
            bn1 = self.layer1[-1].bn3
            bn2 = self.layer2[-1].bn3
            bn3 = self.layer3[-1].bn3
            bn4 = self.layer4[-1].bn3
        elif isinstance(self.layer1[0], BasicBlock):
            bn1 = self.layer1[-1].bn2
            bn2 = self.layer2[-1].bn2
            bn3 = self.layer3[-1].bn2
            bn4 = self.layer4[-1].bn2
        else:
            print('ResNet unknown block error !!!')

        return [bn1, bn2, bn3, bn4]

    def get_channel_num(self):
        return self.n_channels

    def extract_feature(self, x, preReLU=False):

        x = self.conv1(x)
        x = self.bn1(x)

        feat1 = self.layer1(x)
        feat2 = self.layer2(feat1)
        feat3 = self.layer3(feat2)
        feat4 = self.layer4(feat3)

        x = F.relu(feat4)
        x = F.avg_pool2d(x, 4)
        x = x.view(x.size(0), -1)
        out = self.linear(x)

        if not preReLU:
            feat1 = F.relu(feat1)
            feat2 = F.relu(feat2)
            feat3 = F.relu(feat3)
            feat4 = F.relu(feat4)

        return [feat1, feat2, feat3, feat4], out


class ResNetSmall(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNetSmall, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(256 * block.expansion, num_classes)
        self.n_channels = [16, 32, 64]

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, is_feat=False, use_relu=True):
        out = self.conv1(x)
        out = self.bn1(out)
        if use_relu:
            out = F.relu(out)
        feat1 = self.layer1(out)
        if use_relu:
            feat1 = F.relu(feat1)
        feat2 = self.layer2(feat1)
        if use_relu:
            feat2 = F.relu(feat2)
        feat3 = self.layer3(feat2)

        # the last relu is always included
        feat3 = F.relu(feat3)
        pool = F.avg_pool2d(feat3, 4)
        pool = pool.view(pool.size(0), -1)
        out = self.linear(pool)

        if is_feat:
            return[feat1, feat2, feat3], pool, out

        return out

    def get_bn_before_relu(self):
        if isinstance(self.layer1[0], Bottleneck):
            bn1 = self.layer1[-1].bn3
            bn2 = self.layer2[-1].bn3
            bn3 = self.layer3[-1].bn3
        elif isinstance(self.layer1[0], BasicBlock):
            bn1 = self.layer1[-1].bn2
            bn2 = self.layer2[-1].bn2
            bn3 = self.layer3[-1].bn2
        else:
            print('ResNet unknown block error !!!')

        return [bn1, bn2, bn3]

    def get_channel_num(self):
        return self.n_channels

    def extract_feature(self, x, preReLU=False):

        x = self.conv1(x)
        x = self.bn1(x)

        feat1 = self.layer1(x)
        feat2 = self.layer2(feat1)
        feat3 = self.layer3(feat2)

        x = F.relu(feat3)
        x = F.avg_pool2d(x, 4)
        x = x.view(x.size(0), -1)
        out = self.linear(x)

        if not preReLU:
            feat1 = F.relu(feat1)
            feat2 = F.relu(feat2)
            feat3 = F.relu(feat3)

        return [feat1, feat2, feat3], out


def resnet8(**kwargs):
    return ResNetSmall(BasicBlock, [1, 1, 1], **kwargs)


def resnet14(**kwargs):
    return ResNetSmall(BasicBlock, [2, 2, 2], **kwargs)


def resnet20(**kwargs):
    return ResNetSmall(BasicBlock, [3, 3, 3], **kwargs)


def resnet26(**kwargs):
    return ResNetSmall(BasicBlock, [4, 4, 4], **kwargs)


def resnet32(**kwargs):
    return ResNetSmall(BasicBlock, [5, 5, 5], **kwargs)


def resnet44(**kwargs):
    return ResNetSmall(BasicBlock, [7, 7, 7], **kwargs)


def resnet56(**kwargs):
    return ResNetSmall(BasicBlock, [9, 9, 9], **kwargs)


def resnet10(**kwargs):
    return ResNet(BasicBlock, [1, 1, 1, 1], **kwargs)


def resnet18(**kwargs):
    return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)


def resnet34(**kwargs):
    return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)


def resnet50(**kwargs):
    return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)


def resnet101(**kwargs):
    return ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)


def resnet152(**kwargs):
    return ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)


def test():
    net = resnet18()
    y = net(torch.randn(1, 3, 32, 32))
    print(y.size())

In [6]:
def load_checkpoint(model, checkpoint_path,device ):
    device = torch.device(device)
    model_ckp = torch.load(checkpoint_path, map_location=device)

    # handle both dataparallel and normal models
    model_tmp_dict = OrderedDict()
    for name, value in model_ckp["model_state_dict"].items():
        if name.startswith("module."):
            name = name[7:]
        model_tmp_dict[name] = value

    if isinstance(model, torch.nn.DataParallel):
        model.module.load_state_dict(model_tmp_dict)
    else:
        model.load_state_dict(model_tmp_dict)
    return model

In [7]:
teacher_model=resnet18()
PATH="../input/model-weights/best_model (1).pt"
teacher_model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [8]:
teacher_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [9]:
teacher_model.to(DEVICE)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [10]:
#avg_val_loss, score = valid_fn(test_loader, teacher_model, criterion, DEVICE)

In [11]:
#score

In [12]:
#for param in teacher_model.parameters():
       # param.requires_grad=False

In [13]:
#for param in teacher_model.linear.parameters():
    #param.requires_grad = True

In [14]:
#import torchvision.models as models
#resnet18 = models.resnet18(pretrained=True)

In [15]:
#teacher_model=resnet18

In [16]:
#teacher_model.fc

In [17]:
#for param in teacher_model.parameters():
   # param.requires_grad = False

In [18]:
#teacher_model.fc= nn.Linear(512,10)

In [19]:
student_model=resnet20()

In [20]:
learning_rate=0.1
num_epochs=200
optimizer_student =torch.optim.SGD(student_model.parameters(), lr=1e-1, momentum=0.9, weight_decay=0.0005, nesterov=True)
scheduler_student = torch.optim.lr_scheduler.MultiStepLR(optimizer_student, milestones=[66,132], gamma=0.1,verbose=True)

Adjusting learning rate of group 0 to 1.0000e-01.


In [21]:
def freeze_teacher(t_net):
    # freeze the layers of the teacher
    for param in t_net.parameters():
        param.requires_grad = False
    # set the teacher net into evaluation mode
    t_net.eval()
    return t_net

In [22]:
student_model.to(DEVICE)

ResNetSmall(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, 

In [23]:
#avg_val_loss, score = valid_fn(test_loader, student_model, criterion, DEVICE)
#score

In [24]:
def init_progress_bar(train_loader):
    batch_size = train_loader.batch_size
    bar_format = "{desc}{percentage:3.0f}%"
    # bar_format += "|{bar}|"
    bar_format += " {n_fmt}/{total_fmt} [{elapsed} < {remaining}]"
    bar_format += "{postfix}"
    # if stderr has no tty disable the progress bar
    disable = not sys.stderr.isatty()
    t = tqdm(total=len(train_loader) * batch_size,
             bar_format=bar_format)
    if disable:
        # a trick to allow execution in environments where stderr is redirected
        t._time = lambda: 0.0
    return t

In [25]:
print(init_progress_bar)

<function init_progress_bar at 0x7f2b5f9ba5f0>


In [26]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def get_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

In [27]:
def train_fn(train_loader, model, criterion, optimizer, scheduler, device,alpha,temperature):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()

    
    start = end = time.time()
    for step, (images, labels) in tqdm(enumerate(train_loader), total = len(train_loader)):
        data_time.update(time.time() - end)
        images = images.to(device, dtype=torch.float)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
                large_logits = teacher_model(images)
        model.train()
        y_preds = model(images)
        soft_targets_loss = F.kl_div(F.log_softmax(y_preds/ temperature, dim=1), F.softmax(large_logits / temperature, dim=1), reduction='batchmean')
        label_loss = F.cross_entropy(y_preds, labels)
        # Weighted sum of the two losses
        loss = alpha*(temperature*temperature) * soft_targets_loss + (1-alpha) * label_loss

        #loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        batch_time.update(time.time() - end)
        end = time.time()

    return losses.avg

def valid_fn(test_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()


    model.eval()
    start = end = time.time()
    preds = []
    valid_labels = []
    for step, (images, labels) in tqdm(enumerate(test_loader), total = len(test_loader)):
        data_time.update(time.time() - end)
        images = images.to(device, dtype=torch.float)
        labels = labels.to(device)
        batch_size = labels.size(0)

        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        preds.append(y_preds.softmax(1).to('cpu').numpy())
        valid_labels.append(labels.to('cpu').numpy())
        
        batch_time.update(time.time() - end)
        end = time.time()

    predictions = np.concatenate(preds)
    valid_labels = np.concatenate(valid_labels)
    
    score = get_score(valid_labels, predictions.argmax(1))
    
    return losses.avg, score

In [28]:
def calculate_loss(model, data, target):
        # Standard Learning Loss ( Classification Loss)
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer_teacher.step()
        return output, loss

In [29]:
 def save(model, epoch, name):
        torch.save({"model_state_dict": model.state_dict(), }, name)

In [30]:
train_loader,test_loader=get_cifar(num_classes=10, dataset_dir="./data", batch_size=128,
              use_cifar_10_1=False)

Loading CIFAR10...
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [31]:
criterion = nn.CrossEntropyLoss()

In [32]:
best_score = 0

for epoch in range(200):
    start_time = time.time()
    student_model.to(DEVICE)
    avg_loss = train_fn(train_loader, student_model, criterion, optimizer_student, None, DEVICE,0.5,5)
    avg_val_loss, score = valid_fn(test_loader, student_model, criterion, DEVICE)

    scheduler_student.step()
    
    print(f"Epoch {epoch+1}")
    print(f"Accuracy: {score} | Train loss: {avg_loss} | Valid loss: {avg_val_loss}")
    
    if score > best_score:
        print("YES")
        best_score = score
        torch.save(student_model.state_dict(), f"best_model.pt")
        
    elapsed = time.time() - start_time

100%|██████████| 391/391 [00:40<00:00,  9.59it/s]
100%|██████████| 79/79 [00:03<00:00, 24.80it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 1
Accuracy: 0.4834 | Train loss: 6.0680620237731935 | Valid loss: 1.8559828826904297
YES


100%|██████████| 391/391 [00:34<00:00, 11.35it/s]
100%|██████████| 79/79 [00:03<00:00, 25.27it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 2
Accuracy: 0.618 | Train loss: 4.421740653686523 | Valid loss: 1.3260414360046386
YES



100%|██████████| 391/391 [00:34<00:00, 11.40it/s]
100%|██████████| 79/79 [00:03<00:00, 21.68it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 3
Accuracy: 0.6925 | Train loss: 3.5251644717407227 | Valid loss: 1.0786912509918214
YES



100%|██████████| 391/391 [00:34<00:00, 11.17it/s]
100%|██████████| 79/79 [00:03<00:00, 22.86it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 4
Accuracy: 0.7473 | Train loss: 2.9863833493041994 | Valid loss: 0.9095211145401001
YES



100%|██████████| 391/391 [00:34<00:00, 11.27it/s]
100%|██████████| 79/79 [00:03<00:00, 22.88it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 5
Accuracy: 0.7374 | Train loss: 2.616706748428345 | Valid loss: 0.9685753994941712



100%|██████████| 391/391 [00:34<00:00, 11.23it/s]
100%|██████████| 79/79 [00:03<00:00, 24.38it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 6
Accuracy: 0.7403 | Train loss: 2.397597225036621 | Valid loss: 1.0848851614952086



100%|██████████| 391/391 [00:34<00:00, 11.28it/s]
100%|██████████| 79/79 [00:03<00:00, 24.68it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 7
Accuracy: 0.7803 | Train loss: 2.245342003555298 | Valid loss: 0.864412455368042
YES



100%|██████████| 391/391 [00:34<00:00, 11.32it/s]
100%|██████████| 79/79 [00:03<00:00, 22.21it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 8
Accuracy: 0.7845 | Train loss: 2.137675756072998 | Valid loss: 0.8717600147247314
YES



100%|██████████| 391/391 [00:34<00:00, 11.23it/s]
100%|██████████| 79/79 [00:04<00:00, 19.30it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 9
Accuracy: 0.7804 | Train loss: 2.0340521461486816 | Valid loss: 0.8829697156906128



100%|██████████| 391/391 [00:35<00:00, 10.89it/s]
100%|██████████| 79/79 [00:04<00:00, 18.16it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 10
Accuracy: 0.7971 | Train loss: 1.9776791635894775 | Valid loss: 0.7771355924606324
YES



100%|██████████| 391/391 [00:35<00:00, 11.06it/s]
100%|██████████| 79/79 [00:03<00:00, 21.94it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 11
Accuracy: 0.7859 | Train loss: 1.930433922805786 | Valid loss: 0.8576076077461243



100%|██████████| 391/391 [00:35<00:00, 11.09it/s]
100%|██████████| 79/79 [00:03<00:00, 23.47it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 12
Accuracy: 0.7928 | Train loss: 1.8875936553955077 | Valid loss: 0.8391471921920777



100%|██████████| 391/391 [00:35<00:00, 10.89it/s]
100%|██████████| 79/79 [00:03<00:00, 20.94it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 13
Accuracy: 0.7986 | Train loss: 1.826411861152649 | Valid loss: 0.8291585610389709
YES


100%|██████████| 391/391 [00:35<00:00, 10.89it/s]
100%|██████████| 79/79 [00:03<00:00, 23.42it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 14
Accuracy: 0.773 | Train loss: 1.7984142112731933 | Valid loss: 0.972421668434143



100%|██████████| 391/391 [00:35<00:00, 11.01it/s]
100%|██████████| 79/79 [00:03<00:00, 23.19it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 15
Accuracy: 0.7941 | Train loss: 1.7602069706344605 | Valid loss: 0.8725285497665405



100%|██████████| 391/391 [00:35<00:00, 11.07it/s]
100%|██████████| 79/79 [00:04<00:00, 17.22it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 16
Accuracy: 0.8001 | Train loss: 1.7538905289840698 | Valid loss: 0.8050944854736328
YES



100%|██████████| 391/391 [00:35<00:00, 10.87it/s]
100%|██████████| 79/79 [00:03<00:00, 22.84it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 17
Accuracy: 0.8345 | Train loss: 1.7220897900009156 | Valid loss: 0.6305189762115478
YES



100%|██████████| 391/391 [00:34<00:00, 11.41it/s]
100%|██████████| 79/79 [00:04<00:00, 19.35it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 18
Accuracy: 0.813 | Train loss: 1.7092482363128663 | Valid loss: 0.75810716714859



100%|██████████| 391/391 [00:36<00:00, 10.85it/s]
100%|██████████| 79/79 [00:03<00:00, 24.49it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 19
Accuracy: 0.8137 | Train loss: 1.675018653717041 | Valid loss: 0.7608640751838684



100%|██████████| 391/391 [00:36<00:00, 10.57it/s]
100%|██████████| 79/79 [00:03<00:00, 24.51it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 20
Accuracy: 0.7886 | Train loss: 1.6686491609573364 | Valid loss: 0.8558456897735596



100%|██████████| 391/391 [00:33<00:00, 11.53it/s]
100%|██████████| 79/79 [00:04<00:00, 19.68it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 21
Accuracy: 0.7664 | Train loss: 1.645394005279541 | Valid loss: 1.0026601537704467



100%|██████████| 391/391 [00:36<00:00, 10.58it/s]
100%|██████████| 79/79 [00:03<00:00, 24.47it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 22
Accuracy: 0.8284 | Train loss: 1.654553323287964 | Valid loss: 0.7086695845603943



100%|██████████| 391/391 [00:34<00:00, 11.21it/s]
100%|██████████| 79/79 [00:03<00:00, 22.63it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 23
Accuracy: 0.8224 | Train loss: 1.6150367836380004 | Valid loss: 0.7386015398025513



100%|██████████| 391/391 [00:36<00:00, 10.74it/s]
100%|██████████| 79/79 [00:03<00:00, 24.60it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 24
Accuracy: 0.8389 | Train loss: 1.6202921860504151 | Valid loss: 0.6228412868499756
YES



100%|██████████| 391/391 [00:34<00:00, 11.38it/s]
100%|██████████| 79/79 [00:03<00:00, 22.23it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 25
Accuracy: 0.8313 | Train loss: 1.6041802279663087 | Valid loss: 0.6630909468650817



100%|██████████| 391/391 [00:36<00:00, 10.71it/s]
100%|██████████| 79/79 [00:03<00:00, 22.10it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 26
Accuracy: 0.7506 | Train loss: 1.5979086755752563 | Valid loss: 1.2251624802589416



100%|██████████| 391/391 [00:33<00:00, 11.54it/s]
100%|██████████| 79/79 [00:04<00:00, 16.71it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 27
Accuracy: 0.8171 | Train loss: 1.5707899099349976 | Valid loss: 0.783026754951477



100%|██████████| 391/391 [00:35<00:00, 11.05it/s]
100%|██████████| 79/79 [00:03<00:00, 23.28it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 28
Accuracy: 0.8211 | Train loss: 1.5566498546600342 | Valid loss: 0.7141798590660096



100%|██████████| 391/391 [00:35<00:00, 10.95it/s]
100%|██████████| 79/79 [00:04<00:00, 17.21it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 29
Accuracy: 0.8195 | Train loss: 1.5704580110549926 | Valid loss: 0.7380633942604065



100%|██████████| 391/391 [00:34<00:00, 11.33it/s]
100%|██████████| 79/79 [00:03<00:00, 23.10it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 30
Accuracy: 0.7863 | Train loss: 1.552940584716797 | Valid loss: 0.9670036221504211



100%|██████████| 391/391 [00:37<00:00, 10.50it/s]
100%|██████████| 79/79 [00:04<00:00, 19.69it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 31
Accuracy: 0.8071 | Train loss: 1.5516723258972167 | Valid loss: 0.8209571599960327



100%|██████████| 391/391 [00:33<00:00, 11.59it/s]
100%|██████████| 79/79 [00:03<00:00, 21.41it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 32
Accuracy: 0.815 | Train loss: 1.5485044537734984 | Valid loss: 0.7676167891025544



100%|██████████| 391/391 [00:37<00:00, 10.32it/s]
100%|██████████| 79/79 [00:03<00:00, 24.46it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 33
Accuracy: 0.8387 | Train loss: 1.5475721881866455 | Valid loss: 0.6662679796218872



100%|██████████| 391/391 [00:33<00:00, 11.65it/s]
100%|██████████| 79/79 [00:03<00:00, 22.42it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 34
Accuracy: 0.8328 | Train loss: 1.536700569229126 | Valid loss: 0.6719585720062256


100%|██████████| 391/391 [00:34<00:00, 11.35it/s]
100%|██████████| 79/79 [00:03<00:00, 24.28it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 35
Accuracy: 0.833 | Train loss: 1.530026190109253 | Valid loss: 0.6764285117149353



100%|██████████| 391/391 [00:34<00:00, 11.44it/s]
100%|██████████| 79/79 [00:04<00:00, 17.93it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 36
Accuracy: 0.8146 | Train loss: 1.5207777460098266 | Valid loss: 0.7513252853393555



100%|██████████| 391/391 [00:36<00:00, 10.58it/s]
100%|██████████| 79/79 [00:04<00:00, 18.23it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 37
Accuracy: 0.8335 | Train loss: 1.513306707763672 | Valid loss: 0.6705650583267212



100%|██████████| 391/391 [00:33<00:00, 11.55it/s]
100%|██████████| 79/79 [00:03<00:00, 24.38it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 38
Accuracy: 0.8309 | Train loss: 1.5361967358398438 | Valid loss: 0.7387189070701599



100%|██████████| 391/391 [00:34<00:00, 11.37it/s]
100%|██████████| 79/79 [00:03<00:00, 24.33it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 39
Accuracy: 0.771 | Train loss: 1.5095775786590575 | Valid loss: 1.0683064146995545



100%|██████████| 391/391 [00:38<00:00, 10.17it/s]
100%|██████████| 79/79 [00:03<00:00, 24.59it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 40
Accuracy: 0.796 | Train loss: 1.5044623094558716 | Valid loss: 0.9175789601325989



100%|██████████| 391/391 [00:33<00:00, 11.61it/s]
100%|██████████| 79/79 [00:03<00:00, 22.25it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 41
Accuracy: 0.8077 | Train loss: 1.5170368143463135 | Valid loss: 0.7560154761314392



100%|██████████| 391/391 [00:33<00:00, 11.67it/s]
100%|██████████| 79/79 [00:03<00:00, 21.25it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 42
Accuracy: 0.8236 | Train loss: 1.5057657006835938 | Valid loss: 0.7074376526832581



100%|██████████| 391/391 [00:33<00:00, 11.64it/s]
100%|██████████| 79/79 [00:03<00:00, 24.83it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 43
Accuracy: 0.7651 | Train loss: 1.506234967803955 | Valid loss: 1.0557164313316345



100%|██████████| 391/391 [00:38<00:00, 10.19it/s]
100%|██████████| 79/79 [00:03<00:00, 24.48it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 44
Accuracy: 0.7925 | Train loss: 1.490966626815796 | Valid loss: 0.9173130766868591



100%|██████████| 391/391 [00:33<00:00, 11.62it/s]
100%|██████████| 79/79 [00:04<00:00, 18.12it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 45
Accuracy: 0.8201 | Train loss: 1.500975193710327 | Valid loss: 0.7881186166286468



100%|██████████| 391/391 [00:34<00:00, 11.50it/s]
100%|██████████| 79/79 [00:03<00:00, 22.53it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 46
Accuracy: 0.8386 | Train loss: 1.4927625518417358 | Valid loss: 0.6613145757675171



100%|██████████| 391/391 [00:37<00:00, 10.56it/s]
100%|██████████| 79/79 [00:04<00:00, 15.86it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 47
Accuracy: 0.8226 | Train loss: 1.5011912688446045 | Valid loss: 0.7515381911277771


100%|██████████| 391/391 [00:34<00:00, 11.41it/s]
100%|██████████| 79/79 [00:03<00:00, 24.30it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 48
Accuracy: 0.8232 | Train loss: 1.49289337184906 | Valid loss: 0.7399001829147339



100%|██████████| 391/391 [00:34<00:00, 11.47it/s]
100%|██████████| 79/79 [00:03<00:00, 21.37it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 49
Accuracy: 0.7894 | Train loss: 1.4831201539611816 | Valid loss: 1.0061967758178711



100%|██████████| 391/391 [00:33<00:00, 11.53it/s]
100%|██████████| 79/79 [00:03<00:00, 24.44it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 50
Accuracy: 0.809 | Train loss: 1.4841702629852296 | Valid loss: 0.8381411518096924



100%|██████████| 391/391 [00:40<00:00,  9.76it/s]
100%|██████████| 79/79 [00:03<00:00, 24.06it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 51
Accuracy: 0.8392 | Train loss: 1.485460069961548 | Valid loss: 0.6375224055767059
YES


100%|██████████| 391/391 [00:34<00:00, 11.50it/s]
100%|██████████| 79/79 [00:03<00:00, 22.55it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 52
Accuracy: 0.7671 | Train loss: 1.4893734533309937 | Valid loss: 1.0758880860328675



100%|██████████| 391/391 [00:34<00:00, 11.48it/s]
100%|██████████| 79/79 [00:03<00:00, 24.63it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 53
Accuracy: 0.817 | Train loss: 1.4677501531982422 | Valid loss: 0.8112660380363464



100%|██████████| 391/391 [00:33<00:00, 11.51it/s]
100%|██████████| 79/79 [00:04<00:00, 15.81it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 54
Accuracy: 0.8288 | Train loss: 1.486396813583374 | Valid loss: 0.7113731054306031



100%|██████████| 391/391 [00:37<00:00, 10.32it/s]
100%|██████████| 79/79 [00:04<00:00, 18.97it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 55
Accuracy: 0.7899 | Train loss: 1.4629900857543945 | Valid loss: 0.9256727378845215


100%|██████████| 391/391 [00:34<00:00, 11.39it/s]
100%|██████████| 79/79 [00:03<00:00, 22.66it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 56
Accuracy: 0.8186 | Train loss: 1.4616959429550171 | Valid loss: 0.7425760622024536



100%|██████████| 391/391 [00:33<00:00, 11.63it/s]
100%|██████████| 79/79 [00:03<00:00, 25.08it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 57
Accuracy: 0.8288 | Train loss: 1.4718684257125854 | Valid loss: 0.721045396232605


100%|██████████| 391/391 [00:38<00:00, 10.23it/s]
100%|██████████| 79/79 [00:04<00:00, 16.75it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 58
Accuracy: 0.8084 | Train loss: 1.4683775389862062 | Valid loss: 0.7730891680717469



100%|██████████| 391/391 [00:33<00:00, 11.55it/s]
100%|██████████| 79/79 [00:03<00:00, 24.49it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 59
Accuracy: 0.8233 | Train loss: 1.4704750568389893 | Valid loss: 0.7288425892829895



100%|██████████| 391/391 [00:33<00:00, 11.53it/s]
100%|██████████| 79/79 [00:03<00:00, 24.04it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 60
Accuracy: 0.8329 | Train loss: 1.4768289295959474 | Valid loss: 0.7000473380088806



100%|██████████| 391/391 [00:33<00:00, 11.56it/s]
100%|██████████| 79/79 [00:03<00:00, 24.61it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 61
Accuracy: 0.8183 | Train loss: 1.47717142578125 | Valid loss: 0.7964387778997422



100%|██████████| 391/391 [00:40<00:00,  9.57it/s]
100%|██████████| 79/79 [00:03<00:00, 24.10it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 62
Accuracy: 0.8363 | Train loss: 1.4541972734832764 | Valid loss: 0.6564207605361938


100%|██████████| 391/391 [00:33<00:00, 11.65it/s]
100%|██████████| 79/79 [00:04<00:00, 18.89it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 63
Accuracy: 0.8251 | Train loss: 1.4600476205825805 | Valid loss: 0.7506424706935882



100%|██████████| 391/391 [00:33<00:00, 11.66it/s]
100%|██████████| 79/79 [00:03<00:00, 24.33it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 64
Accuracy: 0.7941 | Train loss: 1.453534398727417 | Valid loss: 0.9149568721771241


100%|██████████| 391/391 [00:34<00:00, 11.47it/s]
100%|██████████| 79/79 [00:03<00:00, 24.18it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 65
Accuracy: 0.8298 | Train loss: 1.4628129441833495 | Valid loss: 0.6845538856506348



100%|██████████| 391/391 [00:40<00:00,  9.61it/s]
100%|██████████| 79/79 [00:03<00:00, 22.89it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 66
Accuracy: 0.8164 | Train loss: 1.4585285511016846 | Valid loss: 0.8328799399375916



100%|██████████| 391/391 [00:34<00:00, 11.41it/s]
100%|██████████| 79/79 [00:03<00:00, 24.46it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 67
Accuracy: 0.906 | Train loss: 0.9759113849639892 | Valid loss: 0.37556705322265627
YES


100%|██████████| 391/391 [00:33<00:00, 11.67it/s]
100%|██████████| 79/79 [00:03<00:00, 25.08it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 68
Accuracy: 0.9114 | Train loss: 0.8147821802520752 | Valid loss: 0.36290670232772826
YES



100%|██████████| 391/391 [00:35<00:00, 11.13it/s]
100%|██████████| 79/79 [00:04<00:00, 16.62it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 69
Accuracy: 0.9121 | Train loss: 0.7706343342208862 | Valid loss: 0.35567050256729127
YES



100%|██████████| 391/391 [00:39<00:00,  9.94it/s]
100%|██████████| 79/79 [00:03<00:00, 24.83it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 70
Accuracy: 0.913 | Train loss: 0.7357067172241211 | Valid loss: 0.3492842935562134
YES



100%|██████████| 391/391 [00:33<00:00, 11.68it/s]
100%|██████████| 79/79 [00:03<00:00, 25.03it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 71
Accuracy: 0.9137 | Train loss: 0.7189598069572448 | Valid loss: 0.34450633878707887
YES



100%|██████████| 391/391 [00:33<00:00, 11.59it/s]
100%|██████████| 79/79 [00:03<00:00, 20.44it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 72
Accuracy: 0.9149 | Train loss: 0.6973585005569458 | Valid loss: 0.3574694416999817
YES



100%|██████████| 391/391 [00:36<00:00, 10.68it/s]
100%|██████████| 79/79 [00:04<00:00, 17.21it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 73
Accuracy: 0.9169 | Train loss: 0.6764501787948608 | Valid loss: 0.34461981854438783
YES


100%|██████████| 391/391 [00:36<00:00, 10.74it/s]
100%|██████████| 79/79 [00:03<00:00, 24.75it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 74
Accuracy: 0.9177 | Train loss: 0.6601614823913574 | Valid loss: 0.3545379502773285
YES



100%|██████████| 391/391 [00:33<00:00, 11.79it/s]
100%|██████████| 79/79 [00:03<00:00, 24.57it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 75
Accuracy: 0.9169 | Train loss: 0.6476432795810699 | Valid loss: 0.3488758499145508



100%|██████████| 391/391 [00:33<00:00, 11.66it/s]
100%|██████████| 79/79 [00:03<00:00, 22.21it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 76
Accuracy: 0.9173 | Train loss: 0.6358189624977112 | Valid loss: 0.3519696434497833



100%|██████████| 391/391 [00:33<00:00, 11.77it/s]
100%|██████████| 79/79 [00:03<00:00, 24.56it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 77
Accuracy: 0.9154 | Train loss: 0.633663869228363 | Valid loss: 0.35769450407028197



100%|██████████| 391/391 [00:33<00:00, 11.70it/s]
100%|██████████| 79/79 [00:03<00:00, 24.62it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 78
Accuracy: 0.9172 | Train loss: 0.6223564216995239 | Valid loss: 0.35806852293014524



100%|██████████| 391/391 [00:33<00:00, 11.63it/s]
100%|██████████| 79/79 [00:03<00:00, 22.20it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 79
Accuracy: 0.9161 | Train loss: 0.6118100097846985 | Valid loss: 0.3562238612651825



100%|██████████| 391/391 [00:34<00:00, 11.42it/s]
100%|██████████| 79/79 [00:04<00:00, 16.92it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 80
Accuracy: 0.9138 | Train loss: 0.6118995854949951 | Valid loss: 0.3610575843811035



100%|██████████| 391/391 [00:39<00:00,  9.93it/s]
100%|██████████| 79/79 [00:03<00:00, 24.53it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 81
Accuracy: 0.9145 | Train loss: 0.5968702331352234 | Valid loss: 0.361551806306839



100%|██████████| 391/391 [00:33<00:00, 11.57it/s]
100%|██████████| 79/79 [00:03<00:00, 22.03it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 82
Accuracy: 0.9163 | Train loss: 0.5948857726860046 | Valid loss: 0.3644990415096283



100%|██████████| 391/391 [00:33<00:00, 11.59it/s]
100%|██████████| 79/79 [00:03<00:00, 24.32it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 83
Accuracy: 0.9137 | Train loss: 0.6011240916442872 | Valid loss: 0.3718390000343323


100%|██████████| 391/391 [00:33<00:00, 11.64it/s]
100%|██████████| 79/79 [00:03<00:00, 24.05it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 84
Accuracy: 0.9132 | Train loss: 0.5958672159767151 | Valid loss: 0.37969548635482786



100%|██████████| 391/391 [00:34<00:00, 11.41it/s]
100%|██████████| 79/79 [00:03<00:00, 24.30it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 85
Accuracy: 0.9163 | Train loss: 0.5990824493217468 | Valid loss: 0.3594734482765198



100%|██████████| 391/391 [00:33<00:00, 11.62it/s]
100%|██████████| 79/79 [00:03<00:00, 19.95it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 86
Accuracy: 0.9125 | Train loss: 0.5927139099788665 | Valid loss: 0.39338529500961306



100%|██████████| 391/391 [00:34<00:00, 11.41it/s]
100%|██████████| 79/79 [00:05<00:00, 15.67it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 87
Accuracy: 0.9151 | Train loss: 0.5869995415306092 | Valid loss: 0.3827118753910065



100%|██████████| 391/391 [00:41<00:00,  9.49it/s]
100%|██████████| 79/79 [00:03<00:00, 24.39it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 88
Accuracy: 0.9162 | Train loss: 0.597877690229416 | Valid loss: 0.3649259454727173



100%|██████████| 391/391 [00:34<00:00, 11.30it/s]
100%|██████████| 79/79 [00:03<00:00, 24.63it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 89
Accuracy: 0.9179 | Train loss: 0.5725365182113648 | Valid loss: 0.3711622465133667
YES



100%|██████████| 391/391 [00:34<00:00, 11.42it/s]
100%|██████████| 79/79 [00:03<00:00, 23.04it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 90
Accuracy: 0.9173 | Train loss: 0.5842245012950897 | Valid loss: 0.36850229988098143



100%|██████████| 391/391 [00:34<00:00, 11.25it/s]
100%|██████████| 79/79 [00:03<00:00, 24.74it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 91
Accuracy: 0.9126 | Train loss: 0.5805128273773194 | Valid loss: 0.3789386202812195



100%|██████████| 391/391 [00:33<00:00, 11.56it/s]
100%|██████████| 79/79 [00:03<00:00, 21.93it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 92
Accuracy: 0.9162 | Train loss: 0.5869549562835693 | Valid loss: 0.3607866636753082



100%|██████████| 391/391 [00:33<00:00, 11.71it/s]
100%|██████████| 79/79 [00:03<00:00, 24.80it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 93
Accuracy: 0.9162 | Train loss: 0.5891439785385132 | Valid loss: 0.366998974943161



100%|██████████| 391/391 [00:34<00:00, 11.47it/s]
100%|██████████| 79/79 [00:03<00:00, 24.47it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 94
Accuracy: 0.9111 | Train loss: 0.5863866506576538 | Valid loss: 0.406586515045166



100%|██████████| 391/391 [00:45<00:00,  8.66it/s]
100%|██████████| 79/79 [00:03<00:00, 25.09it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 95
Accuracy: 0.9095 | Train loss: 0.589281224193573 | Valid loss: 0.39880988664627076



100%|██████████| 391/391 [00:33<00:00, 11.75it/s]
100%|██████████| 79/79 [00:03<00:00, 24.18it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 96
Accuracy: 0.9166 | Train loss: 0.5803866481971741 | Valid loss: 0.3719761856079102



100%|██████████| 391/391 [00:33<00:00, 11.71it/s]
100%|██████████| 79/79 [00:03<00:00, 22.24it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 97
Accuracy: 0.9152 | Train loss: 0.588620516910553 | Valid loss: 0.3724422486782074



100%|██████████| 391/391 [00:33<00:00, 11.76it/s]
100%|██████████| 79/79 [00:03<00:00, 24.85it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 98
Accuracy: 0.9089 | Train loss: 0.5910874782752991 | Valid loss: 0.40323143367767333



100%|██████████| 391/391 [00:33<00:00, 11.53it/s]
100%|██████████| 79/79 [00:03<00:00, 24.37it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 99
Accuracy: 0.9009 | Train loss: 0.5921946837997436 | Valid loss: 0.4703923719406128


100%|██████████| 391/391 [00:34<00:00, 11.37it/s]
100%|██████████| 79/79 [00:03<00:00, 21.24it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 100
Accuracy: 0.9088 | Train loss: 0.5970148996543885 | Valid loss: 0.41132854471206665



100%|██████████| 391/391 [00:34<00:00, 11.38it/s]
100%|██████████| 79/79 [00:03<00:00, 24.72it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 101
Accuracy: 0.9112 | Train loss: 0.5913483397102356 | Valid loss: 0.3878987536907196



100%|██████████| 391/391 [00:41<00:00,  9.31it/s]
100%|██████████| 79/79 [00:04<00:00, 16.96it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 102
Accuracy: 0.903 | Train loss: 0.5959748370933533 | Valid loss: 0.4302804663658142



100%|██████████| 391/391 [00:37<00:00, 10.53it/s]
100%|██████████| 79/79 [00:03<00:00, 24.76it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 103
Accuracy: 0.9126 | Train loss: 0.5984616786766053 | Valid loss: 0.3869655663013458



100%|██████████| 391/391 [00:34<00:00, 11.46it/s]
100%|██████████| 79/79 [00:03<00:00, 21.44it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 104
Accuracy: 0.9122 | Train loss: 0.5899475020313263 | Valid loss: 0.39373335952758787



100%|██████████| 391/391 [00:33<00:00, 11.62it/s]
100%|██████████| 79/79 [00:03<00:00, 22.13it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 105
Accuracy: 0.902 | Train loss: 0.5958609357643128 | Valid loss: 0.4353572376012802



100%|██████████| 391/391 [00:33<00:00, 11.71it/s]
100%|██████████| 79/79 [00:03<00:00, 24.62it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 106
Accuracy: 0.9104 | Train loss: 0.5945777762985229 | Valid loss: 0.39509372463226317



100%|██████████| 391/391 [00:33<00:00, 11.50it/s]
100%|██████████| 79/79 [00:03<00:00, 22.31it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 107
Accuracy: 0.9102 | Train loss: 0.6049810729598999 | Valid loss: 0.40371261565685274



100%|██████████| 391/391 [00:34<00:00, 11.50it/s]
100%|██████████| 79/79 [00:03<00:00, 25.46it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 108
Accuracy: 0.9064 | Train loss: 0.6015485320472718 | Valid loss: 0.42090754809379577



100%|██████████| 391/391 [00:36<00:00, 10.73it/s]
100%|██████████| 79/79 [00:04<00:00, 16.14it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 109
Accuracy: 0.9121 | Train loss: 0.6002523062992096 | Valid loss: 0.4009872646331787



100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
100%|██████████| 79/79 [00:04<00:00, 18.73it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 110
Accuracy: 0.9096 | Train loss: 0.5983696744537353 | Valid loss: 0.40573061380386355



100%|██████████| 391/391 [00:33<00:00, 11.85it/s]
100%|██████████| 79/79 [00:03<00:00, 25.48it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 111
Accuracy: 0.9102 | Train loss: 0.5989253341865539 | Valid loss: 0.4029323440551758



100%|██████████| 391/391 [00:33<00:00, 11.74it/s]
100%|██████████| 79/79 [00:03<00:00, 21.88it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 112
Accuracy: 0.9073 | Train loss: 0.6048439047908784 | Valid loss: 0.40911090030670166



100%|██████████| 391/391 [00:33<00:00, 11.66it/s]
100%|██████████| 79/79 [00:03<00:00, 24.94it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 113
Accuracy: 0.9143 | Train loss: 0.6010168999481201 | Valid loss: 0.37185374636650087



100%|██████████| 391/391 [00:33<00:00, 11.68it/s]
100%|██████████| 79/79 [00:03<00:00, 25.50it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 114
Accuracy: 0.9083 | Train loss: 0.5965677423477173 | Valid loss: 0.4044218377113342



100%|██████████| 391/391 [00:33<00:00, 11.76it/s]
100%|██████████| 79/79 [00:03<00:00, 22.46it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 115
Accuracy: 0.9079 | Train loss: 0.5975712395763397 | Valid loss: 0.41653162724375725



100%|██████████| 391/391 [00:32<00:00, 11.97it/s]
100%|██████████| 79/79 [00:03<00:00, 25.32it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 116
Accuracy: 0.9077 | Train loss: 0.5959124324989319 | Valid loss: 0.4123590121269226



100%|██████████| 391/391 [00:43<00:00,  8.96it/s]
100%|██████████| 79/79 [00:04<00:00, 17.77it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 117
Accuracy: 0.9017 | Train loss: 0.6035911465454101 | Valid loss: 0.4439268168449402



100%|██████████| 391/391 [00:34<00:00, 11.42it/s]
100%|██████████| 79/79 [00:03<00:00, 22.81it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 118
Accuracy: 0.9083 | Train loss: 0.6087015815734863 | Valid loss: 0.41886722166538237



100%|██████████| 391/391 [00:33<00:00, 11.84it/s]
100%|██████████| 79/79 [00:03<00:00, 25.09it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 119
Accuracy: 0.9027 | Train loss: 0.595565984249115 | Valid loss: 0.4528974135398865



100%|██████████| 391/391 [00:33<00:00, 11.84it/s]
100%|██████████| 79/79 [00:03<00:00, 25.39it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 120
Accuracy: 0.9043 | Train loss: 0.5948590893936158 | Valid loss: 0.42619939374923704



100%|██████████| 391/391 [00:32<00:00, 11.88it/s]
100%|██████████| 79/79 [00:03<00:00, 22.38it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 121
Accuracy: 0.9028 | Train loss: 0.5990189911937713 | Valid loss: 0.4512252126455307



100%|██████████| 391/391 [00:33<00:00, 11.64it/s]
100%|██████████| 79/79 [00:03<00:00, 22.10it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 122
Accuracy: 0.9103 | Train loss: 0.5915647350120544 | Valid loss: 0.39740849052667615



100%|██████████| 391/391 [00:33<00:00, 11.83it/s]
100%|██████████| 79/79 [00:03<00:00, 24.99it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 123
Accuracy: 0.9012 | Train loss: 0.5993729944038391 | Valid loss: 0.4373713780999184



100%|██████████| 391/391 [00:37<00:00, 10.39it/s]
100%|██████████| 79/79 [00:04<00:00, 16.03it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 124
Accuracy: 0.9003 | Train loss: 0.5951777054214478 | Valid loss: 0.45158005638122556



100%|██████████| 391/391 [00:40<00:00,  9.58it/s]
100%|██████████| 79/79 [00:03<00:00, 25.72it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 125
Accuracy: 0.9029 | Train loss: 0.5963073554611206 | Valid loss: 0.42372951620817184



100%|██████████| 391/391 [00:33<00:00, 11.75it/s]
100%|██████████| 79/79 [00:03<00:00, 22.31it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 126
Accuracy: 0.901 | Train loss: 0.5875796858024597 | Valid loss: 0.4511375782966614



100%|██████████| 391/391 [00:32<00:00, 11.92it/s]
100%|██████████| 79/79 [00:03<00:00, 25.27it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 127
Accuracy: 0.9075 | Train loss: 0.5977082199859619 | Valid loss: 0.4102508620977402



100%|██████████| 391/391 [00:33<00:00, 11.73it/s]
100%|██████████| 79/79 [00:03<00:00, 24.49it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 128
Accuracy: 0.9015 | Train loss: 0.5828054281234741 | Valid loss: 0.43447194051742555



100%|██████████| 391/391 [00:33<00:00, 11.54it/s]
100%|██████████| 79/79 [00:03<00:00, 22.16it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 129
Accuracy: 0.9062 | Train loss: 0.5917926601219178 | Valid loss: 0.42093634848594663



100%|██████████| 391/391 [00:33<00:00, 11.74it/s]
100%|██████████| 79/79 [00:03<00:00, 24.91it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 130
Accuracy: 0.9057 | Train loss: 0.598060080165863 | Valid loss: 0.42245987797677514



100%|██████████| 391/391 [00:33<00:00, 11.78it/s]
100%|██████████| 79/79 [00:04<00:00, 19.14it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 131
Accuracy: 0.9058 | Train loss: 0.5834129086303711 | Valid loss: 0.4400376772403717



100%|██████████| 391/391 [00:42<00:00,  9.24it/s]
100%|██████████| 79/79 [00:05<00:00, 15.34it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 132
Accuracy: 0.907 | Train loss: 0.5775604133033753 | Valid loss: 0.4373533835887909



100%|██████████| 391/391 [00:37<00:00, 10.45it/s]
100%|██████████| 79/79 [00:03<00:00, 24.63it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 133
Accuracy: 0.9265 | Train loss: 0.42914523315429687 | Valid loss: 0.325257239818573
YES



100%|██████████| 391/391 [00:34<00:00, 11.32it/s]
100%|██████████| 79/79 [00:03<00:00, 24.05it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 134
Accuracy: 0.9282 | Train loss: 0.37094393002510073 | Valid loss: 0.31575552587509154
YES



100%|██████████| 391/391 [00:33<00:00, 11.69it/s]
100%|██████████| 79/79 [00:03<00:00, 19.88it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 135
Accuracy: 0.9285 | Train loss: 0.3492877177619934 | Valid loss: 0.31500302357673643
YES


100%|██████████| 391/391 [00:33<00:00, 11.63it/s]
100%|██████████| 79/79 [00:03<00:00, 22.03it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 136
Accuracy: 0.931 | Train loss: 0.3418600731086731 | Valid loss: 0.3076446098804474
YES



100%|██████████| 391/391 [00:33<00:00, 11.77it/s]
100%|██████████| 79/79 [00:03<00:00, 25.57it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 137
Accuracy: 0.9316 | Train loss: 0.33244540658950805 | Valid loss: 0.306367310667038
YES



100%|██████████| 391/391 [00:33<00:00, 11.60it/s]
100%|██████████| 79/79 [00:03<00:00, 24.12it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 138
Accuracy: 0.9311 | Train loss: 0.32966563844680785 | Valid loss: 0.3053909903526306



100%|██████████| 391/391 [00:34<00:00, 11.31it/s]
100%|██████████| 79/79 [00:04<00:00, 16.97it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 139
Accuracy: 0.9325 | Train loss: 0.32209649044036864 | Valid loss: 0.31095861825942994
YES



100%|██████████| 391/391 [00:42<00:00,  9.18it/s]
100%|██████████| 79/79 [00:05<00:00, 14.30it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 140
Accuracy: 0.932 | Train loss: 0.31612790864944457 | Valid loss: 0.3047232413291931



100%|██████████| 391/391 [00:33<00:00, 11.61it/s]
100%|██████████| 79/79 [00:03<00:00, 22.07it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 141
Accuracy: 0.9316 | Train loss: 0.3147610336303711 | Valid loss: 0.30934013800621035


100%|██████████| 391/391 [00:33<00:00, 11.84it/s]
100%|██████████| 79/79 [00:03<00:00, 24.51it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 142
Accuracy: 0.9328 | Train loss: 0.3071037864971161 | Valid loss: 0.3072947557449341
YES



100%|██████████| 391/391 [00:33<00:00, 11.68it/s]
100%|██████████| 79/79 [00:03<00:00, 24.88it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 143
Accuracy: 0.9324 | Train loss: 0.3058872573375702 | Valid loss: 0.3049090033531189



100%|██████████| 391/391 [00:34<00:00, 11.49it/s]
100%|██████████| 79/79 [00:03<00:00, 23.88it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 144
Accuracy: 0.9323 | Train loss: 0.29862142246246337 | Valid loss: 0.3073415204524994



100%|██████████| 391/391 [00:34<00:00, 11.50it/s]
100%|██████████| 79/79 [00:03<00:00, 24.55it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 145
Accuracy: 0.9328 | Train loss: 0.2992662948036194 | Valid loss: 0.30751140122413634



100%|██████████| 391/391 [00:33<00:00, 11.60it/s]
100%|██████████| 79/79 [00:03<00:00, 21.67it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 146
Accuracy: 0.9299 | Train loss: 0.2949023163986206 | Valid loss: 0.3071384394168854



100%|██████████| 391/391 [00:40<00:00,  9.60it/s]
100%|██████████| 79/79 [00:05<00:00, 15.41it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 147
Accuracy: 0.9328 | Train loss: 0.2925429537630081 | Valid loss: 0.30136333994865416



100%|██████████| 391/391 [00:42<00:00,  9.16it/s]
100%|██████████| 79/79 [00:03<00:00, 24.35it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 148
Accuracy: 0.9318 | Train loss: 0.29114927775383 | Valid loss: 0.30385383682250977



100%|██████████| 391/391 [00:34<00:00, 11.42it/s]
100%|██████████| 79/79 [00:03<00:00, 24.18it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 149
Accuracy: 0.9322 | Train loss: 0.28823829652786254 | Valid loss: 0.3051855764389038



100%|██████████| 391/391 [00:34<00:00, 11.25it/s]
100%|██████████| 79/79 [00:03<00:00, 24.14it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 150
Accuracy: 0.9316 | Train loss: 0.28164519696235657 | Valid loss: 0.3082926644325256



100%|██████████| 391/391 [00:34<00:00, 11.20it/s]
100%|██████████| 79/79 [00:03<00:00, 24.20it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 151
Accuracy: 0.932 | Train loss: 0.2831230256843567 | Valid loss: 0.30643885273933413



100%|██████████| 391/391 [00:34<00:00, 11.45it/s]
100%|██████████| 79/79 [00:03<00:00, 24.32it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 152
Accuracy: 0.9324 | Train loss: 0.2808395327758789 | Valid loss: 0.307029238986969



100%|██████████| 391/391 [00:34<00:00, 11.27it/s]
100%|██████████| 79/79 [00:03<00:00, 24.05it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 153
Accuracy: 0.9303 | Train loss: 0.2840535877418518 | Valid loss: 0.3101268341064453



100%|██████████| 391/391 [00:34<00:00, 11.45it/s]
100%|██████████| 79/79 [00:03<00:00, 21.47it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 154
Accuracy: 0.9331 | Train loss: 0.27951471062660216 | Valid loss: 0.3101828582763672
YES



100%|██████████| 391/391 [00:34<00:00, 11.28it/s]
100%|██████████| 79/79 [00:03<00:00, 24.06it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 155
Accuracy: 0.9328 | Train loss: 0.27623436153888703 | Valid loss: 0.30843600564002993



100%|██████████| 391/391 [00:34<00:00, 11.45it/s]
100%|██████████| 79/79 [00:03<00:00, 21.71it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 156
Accuracy: 0.9339 | Train loss: 0.2726192637252808 | Valid loss: 0.3095841452121735
YES



100%|██████████| 391/391 [00:34<00:00, 11.26it/s]
100%|██████████| 79/79 [00:04<00:00, 19.58it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 157
Accuracy: 0.9345 | Train loss: 0.27390705080032346 | Valid loss: 0.3089134747505188
YES



100%|██████████| 391/391 [00:34<00:00, 11.25it/s]
100%|██████████| 79/79 [00:03<00:00, 21.48it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 158
Accuracy: 0.9317 | Train loss: 0.2744658638858795 | Valid loss: 0.3078631295204163



100%|██████████| 391/391 [00:34<00:00, 11.46it/s]
100%|██████████| 79/79 [00:03<00:00, 24.45it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 159
Accuracy: 0.9324 | Train loss: 0.2679856504440308 | Valid loss: 0.31260530898571015



100%|██████████| 391/391 [00:33<00:00, 11.57it/s]
100%|██████████| 79/79 [00:03<00:00, 21.04it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 160
Accuracy: 0.9311 | Train loss: 0.269117695980072 | Valid loss: 0.3161885494232178



100%|██████████| 391/391 [00:40<00:00,  9.65it/s]
100%|██████████| 79/79 [00:04<00:00, 15.92it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 161
Accuracy: 0.9327 | Train loss: 0.27084451806545257 | Valid loss: 0.3125919144630432



100%|██████████| 391/391 [00:44<00:00,  8.73it/s]
100%|██████████| 79/79 [00:03<00:00, 23.81it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 162
Accuracy: 0.9303 | Train loss: 0.26644005074501037 | Valid loss: 0.315624566078186



100%|██████████| 391/391 [00:34<00:00, 11.35it/s]
100%|██████████| 79/79 [00:03<00:00, 21.14it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 163
Accuracy: 0.9319 | Train loss: 0.26921767308235167 | Valid loss: 0.3180634222507477


100%|██████████| 391/391 [00:34<00:00, 11.22it/s]
100%|██████████| 79/79 [00:03<00:00, 23.77it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 164
Accuracy: 0.9308 | Train loss: 0.26405351026535034 | Valid loss: 0.3124244126319885



100%|██████████| 391/391 [00:34<00:00, 11.36it/s]
100%|██████████| 79/79 [00:03<00:00, 21.23it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 165
Accuracy: 0.9315 | Train loss: 0.26137455757141115 | Valid loss: 0.31184599289894105



100%|██████████| 391/391 [00:34<00:00, 11.29it/s]
100%|██████████| 79/79 [00:04<00:00, 18.40it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 166
Accuracy: 0.9311 | Train loss: 0.2612602595329285 | Valid loss: 0.31649557185173033



100%|██████████| 391/391 [00:34<00:00, 11.33it/s]
100%|██████████| 79/79 [00:03<00:00, 21.32it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 167
Accuracy: 0.9328 | Train loss: 0.25977018469810487 | Valid loss: 0.31721214303970335



100%|██████████| 391/391 [00:34<00:00, 11.37it/s]
100%|██████████| 79/79 [00:03<00:00, 23.62it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 168
Accuracy: 0.9327 | Train loss: 0.26073006174087526 | Valid loss: 0.3146393104553223



100%|██████████| 391/391 [00:34<00:00, 11.25it/s]
100%|██████████| 79/79 [00:03<00:00, 21.26it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 169
Accuracy: 0.9313 | Train loss: 0.26009738810539246 | Valid loss: 0.3141059189796448



100%|██████████| 391/391 [00:34<00:00, 11.35it/s]
100%|██████████| 79/79 [00:03<00:00, 23.46it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 170
Accuracy: 0.9327 | Train loss: 0.25634074145317076 | Valid loss: 0.3166464190483093



100%|██████████| 391/391 [00:34<00:00, 11.34it/s]
100%|██████████| 79/79 [00:03<00:00, 22.84it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 171
Accuracy: 0.9342 | Train loss: 0.25614080549240115 | Valid loss: 0.31778087291717527



100%|██████████| 391/391 [00:34<00:00, 11.19it/s]
100%|██████████| 79/79 [00:03<00:00, 20.99it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 172
Accuracy: 0.9296 | Train loss: 0.2579442768573761 | Valid loss: 0.32479842433929446



100%|██████████| 391/391 [00:34<00:00, 11.23it/s]
100%|██████████| 79/79 [00:03<00:00, 21.49it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 173
Accuracy: 0.933 | Train loss: 0.25597887170314787 | Valid loss: 0.3184926097869873



100%|██████████| 391/391 [00:34<00:00, 11.34it/s]
100%|██████████| 79/79 [00:03<00:00, 23.74it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 174
Accuracy: 0.9318 | Train loss: 0.25814149828910826 | Valid loss: 0.323415672492981



100%|██████████| 391/391 [00:42<00:00,  9.20it/s]
100%|██████████| 79/79 [00:04<00:00, 17.04it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 175
Accuracy: 0.9312 | Train loss: 0.25090752559185026 | Valid loss: 0.32047411532402037



100%|██████████| 391/391 [00:45<00:00,  8.62it/s]
100%|██████████| 79/79 [00:03<00:00, 21.22it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 176
Accuracy: 0.9323 | Train loss: 0.2524546530437469 | Valid loss: 0.3237093753814697



100%|██████████| 391/391 [00:35<00:00, 11.14it/s]
100%|██████████| 79/79 [00:03<00:00, 23.63it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 177
Accuracy: 0.9311 | Train loss: 0.25018155952453613 | Valid loss: 0.3162970307826996



100%|██████████| 391/391 [00:34<00:00, 11.35it/s]
100%|██████████| 79/79 [00:03<00:00, 21.77it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 178
Accuracy: 0.9314 | Train loss: 0.2506019710445404 | Valid loss: 0.3222152642250061



100%|██████████| 391/391 [00:34<00:00, 11.23it/s]
100%|██████████| 79/79 [00:03<00:00, 24.60it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 179
Accuracy: 0.9318 | Train loss: 0.25081151767730714 | Valid loss: 0.3202977010726929


100%|██████████| 391/391 [00:34<00:00, 11.46it/s]
100%|██████████| 79/79 [00:03<00:00, 21.92it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 180
Accuracy: 0.9305 | Train loss: 0.25022571027755736 | Valid loss: 0.3196082266330719



100%|██████████| 391/391 [00:34<00:00, 11.49it/s]
100%|██████████| 79/79 [00:04<00:00, 19.55it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 181
Accuracy: 0.9329 | Train loss: 0.24766638787269593 | Valid loss: 0.3203247399330139



100%|██████████| 391/391 [00:34<00:00, 11.46it/s]
100%|██████████| 79/79 [00:04<00:00, 19.15it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 182
Accuracy: 0.9328 | Train loss: 0.24611430390834807 | Valid loss: 0.3203515791893005



100%|██████████| 391/391 [00:34<00:00, 11.42it/s]
100%|██████████| 79/79 [00:03<00:00, 23.99it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 183
Accuracy: 0.9317 | Train loss: 0.24642891040802 | Valid loss: 0.32185294885635374



100%|██████████| 391/391 [00:34<00:00, 11.28it/s]
100%|██████████| 79/79 [00:03<00:00, 21.64it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 184
Accuracy: 0.9316 | Train loss: 0.24766307011604308 | Valid loss: 0.3265190927505493


100%|██████████| 391/391 [00:34<00:00, 11.35it/s]
100%|██████████| 79/79 [00:03<00:00, 23.93it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 185
Accuracy: 0.9321 | Train loss: 0.24566180198669432 | Valid loss: 0.32103427963256836



100%|██████████| 391/391 [00:34<00:00, 11.44it/s]
100%|██████████| 79/79 [00:03<00:00, 24.20it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 186
Accuracy: 0.9323 | Train loss: 0.24701742635726928 | Valid loss: 0.31954605493545535



100%|██████████| 391/391 [00:34<00:00, 11.32it/s]
100%|██████████| 79/79 [00:03<00:00, 23.64it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 187
Accuracy: 0.9323 | Train loss: 0.24372070706367494 | Valid loss: 0.3198401876449585



100%|██████████| 391/391 [00:35<00:00, 11.16it/s]
100%|██████████| 79/79 [00:03<00:00, 20.41it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 188
Accuracy: 0.9336 | Train loss: 0.24172085560798645 | Valid loss: 0.3266596501350403



100%|██████████| 391/391 [00:40<00:00,  9.61it/s]
100%|██████████| 79/79 [00:05<00:00, 15.78it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 189
Accuracy: 0.9299 | Train loss: 0.24355245079517365 | Valid loss: 0.32266055669784544



100%|██████████| 391/391 [00:44<00:00,  8.81it/s]
100%|██████████| 79/79 [00:04<00:00, 16.85it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 190
Accuracy: 0.931 | Train loss: 0.24235444751739502 | Valid loss: 0.3272381830215454



100%|██████████| 391/391 [00:34<00:00, 11.20it/s]
100%|██████████| 79/79 [00:04<00:00, 17.56it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 191
Accuracy: 0.9328 | Train loss: 0.24318806328773498 | Valid loss: 0.3216883764743805



100%|██████████| 391/391 [00:34<00:00, 11.24it/s]
100%|██████████| 79/79 [00:03<00:00, 23.70it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 192
Accuracy: 0.9309 | Train loss: 0.2413692953300476 | Valid loss: 0.3207400890350342



100%|██████████| 391/391 [00:34<00:00, 11.39it/s]
100%|██████████| 79/79 [00:03<00:00, 20.89it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 193
Accuracy: 0.9314 | Train loss: 0.23702201944351195 | Valid loss: 0.32660194664001463



100%|██████████| 391/391 [00:34<00:00, 11.35it/s]
100%|██████████| 79/79 [00:03<00:00, 23.61it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 194
Accuracy: 0.9309 | Train loss: 0.23713304441452027 | Valid loss: 0.3264253071784973



100%|██████████| 391/391 [00:34<00:00, 11.30it/s]
100%|██████████| 79/79 [00:03<00:00, 21.28it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 195
Accuracy: 0.9312 | Train loss: 0.24196737248420716 | Valid loss: 0.32388197469711305



100%|██████████| 391/391 [00:34<00:00, 11.31it/s]
100%|██████████| 79/79 [00:03<00:00, 24.08it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 196
Accuracy: 0.9317 | Train loss: 0.23952987688064575 | Valid loss: 0.3248234746456146



100%|██████████| 391/391 [00:34<00:00, 11.30it/s]
100%|██████████| 79/79 [00:03<00:00, 21.44it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 197
Accuracy: 0.9319 | Train loss: 0.23688102320194243 | Valid loss: 0.3218430280685425



100%|██████████| 391/391 [00:35<00:00, 11.15it/s]
100%|██████████| 79/79 [00:03<00:00, 23.92it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 198
Accuracy: 0.93 | Train loss: 0.235200298743248 | Valid loss: 0.329350901222229



100%|██████████| 391/391 [00:34<00:00, 11.36it/s]
100%|██████████| 79/79 [00:03<00:00, 21.39it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 199
Accuracy: 0.9295 | Train loss: 0.23935989603042604 | Valid loss: 0.33261438217163086



100%|██████████| 391/391 [00:34<00:00, 11.33it/s]
100%|██████████| 79/79 [00:03<00:00, 23.52it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 200
Accuracy: 0.932 | Train loss: 0.23279077295303344 | Valid loss: 0.3289026490688324





In [33]:
best_score

0.9345