In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as  F
import torch.nn as nn
from pathlib import Path
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from collections import OrderedDict
from tqdm import tqdm
import sys
import time
from sklearn.metrics import accuracy_score

In [2]:
DEVICE='cuda'
use_gpu=True

In [3]:
def reproducibilitySeed():
    """
    Ensure reproducibility of results; Seeds to 0
    """
    torch_init_seed = 0
    torch.manual_seed(torch_init_seed)
    numpy_init_seed = 0
    np.random.seed(numpy_init_seed)
    if use_gpu:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

reproducibilitySeed()

# # # *Loading Cifar10*

In [4]:


NUM_WORKERS = 2


class TensorImgSet(Dataset):
    """TensorDataset with support of transforms.
    """

    def __init__(self, tensors, transform=None):
        self.imgs = tensors[0]
        self.targets = tensors[1]
        self.tensors = tensors
        self.transform = transform
        self.len = len(self.imgs)

    def __getitem__(self, index):
        x = self.imgs[index]
        if self.transform:
            x = self.transform(x)
        y = self.targets[index]
        return x, y

    def __len__(self):
        return self.len



def get_cifar(num_classes=100, dataset_dir="./data", batch_size=128,
              use_cifar_10_1=False):

    if num_classes == 10:
        print("Loading CIFAR10...")
        dataset = torchvision.datasets.CIFAR10
        normalize = transforms.Normalize(
            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    else:
        print("Loading CIFAR100...")
        dataset = torchvision.datasets.CIFAR100
        normalize = transforms.Normalize(
            mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])

    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    trainset = dataset(root=dataset_dir, train=True,
                       download=True, transform=train_transform)

    test_transform = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])

    testset = dataset(root=dataset_dir, train=False,
                          download=True,
                          transform=test_transform)

    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=batch_size,
                                               num_workers=NUM_WORKERS,
                                               pin_memory=True, shuffle=True)
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=batch_size,
                                              num_workers=NUM_WORKERS,
                                              pin_memory=True, shuffle=False)
    return train_loader, test_loader

# # #*Creating Models*

In [5]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(x)
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(x)
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion, num_classes)
        self.n_channels = [64, 128, 256, 512]

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, is_feat=False, use_relu=True):
        out = self.conv1(x)
        out = self.bn1(out)
        if use_relu:
            out = F.relu(out)
        feat1 = self.layer1(out)
        if use_relu:
            feat1 = F.relu(feat1)
        feat2 = self.layer2(feat1)
        if use_relu:
            feat2 = F.relu(feat2)
        feat3 = self.layer3(feat2)
        if use_relu:
            feat3 = F.relu(feat3)

        feat4 = self.layer4(feat3)
        feat4 = F.relu(feat4)
        pool = F.avg_pool2d(feat4, 4)
        pool = pool.view(pool.size(0), -1)
        out = self.linear(pool)

        if is_feat:
            return[feat1, feat2, feat3, feat4], pool, out

        return out

    def get_bn_before_relu(self):
        if isinstance(self.layer1[0], Bottleneck):
            bn1 = self.layer1[-1].bn3
            bn2 = self.layer2[-1].bn3
            bn3 = self.layer3[-1].bn3
            bn4 = self.layer4[-1].bn3
        elif isinstance(self.layer1[0], BasicBlock):
            bn1 = self.layer1[-1].bn2
            bn2 = self.layer2[-1].bn2
            bn3 = self.layer3[-1].bn2
            bn4 = self.layer4[-1].bn2
        else:
            print('ResNet unknown block error !!!')

        return [bn1, bn2, bn3, bn4]

    def get_channel_num(self):
        return self.n_channels

    def extract_feature(self, x, preReLU=False):

        x = self.conv1(x)
        x = self.bn1(x)

        feat1 = self.layer1(x)
        feat2 = self.layer2(feat1)
        feat3 = self.layer3(feat2)
        feat4 = self.layer4(feat3)

        x = F.relu(feat4)
        x = F.avg_pool2d(x, 4)
        x = x.view(x.size(0), -1)
        out = self.linear(x)

        if not preReLU:
            feat1 = F.relu(feat1)
            feat2 = F.relu(feat2)
            feat3 = F.relu(feat3)
            feat4 = F.relu(feat4)

        return [feat1, feat2, feat3, feat4], out


class ResNetSmall(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNetSmall, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(256 * block.expansion, num_classes)
        self.n_channels = [16, 32, 64]

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, is_feat=False, use_relu=True):
        out = self.conv1(x)
        out = self.bn1(out)
        if use_relu:
            out = F.relu(out)
        feat1 = self.layer1(out)
        if use_relu:
            feat1 = F.relu(feat1)
        feat2 = self.layer2(feat1)
        if use_relu:
            feat2 = F.relu(feat2)
        feat3 = self.layer3(feat2)

        # the last relu is always included
        feat3 = F.relu(feat3)
        pool = F.avg_pool2d(feat3, 4)
        pool = pool.view(pool.size(0), -1)
        out = self.linear(pool)

        if is_feat:
            return[feat1, feat2, feat3], pool, out

        return out

    def get_bn_before_relu(self):
        if isinstance(self.layer1[0], Bottleneck):
            bn1 = self.layer1[-1].bn3
            bn2 = self.layer2[-1].bn3
            bn3 = self.layer3[-1].bn3
        elif isinstance(self.layer1[0], BasicBlock):
            bn1 = self.layer1[-1].bn2
            bn2 = self.layer2[-1].bn2
            bn3 = self.layer3[-1].bn2
        else:
            print('ResNet unknown block error !!!')

        return [bn1, bn2, bn3]

    def get_channel_num(self):
        return self.n_channels

    def extract_feature(self, x, preReLU=False):

        x = self.conv1(x)
        x = self.bn1(x)

        feat1 = self.layer1(x)
        feat2 = self.layer2(feat1)
        feat3 = self.layer3(feat2)

        x = F.relu(feat3)
        x = F.avg_pool2d(x, 4)
        x = x.view(x.size(0), -1)
        out = self.linear(x)

        if not preReLU:
            feat1 = F.relu(feat1)
            feat2 = F.relu(feat2)
            feat3 = F.relu(feat3)

        return [feat1, feat2, feat3], out


def resnet8(**kwargs):
    return ResNetSmall(BasicBlock, [1, 1, 1], **kwargs)


def resnet14(**kwargs):
    return ResNetSmall(BasicBlock, [2, 2, 2], **kwargs)


def resnet20(**kwargs):
    return ResNetSmall(BasicBlock, [3, 3, 3], **kwargs)


def resnet26(**kwargs):
    return ResNetSmall(BasicBlock, [4, 4, 4], **kwargs)


def resnet32(**kwargs):
    return ResNetSmall(BasicBlock, [5, 5, 5], **kwargs)


def resnet44(**kwargs):
    return ResNetSmall(BasicBlock, [7, 7, 7], **kwargs)


def resnet56(**kwargs):
    return ResNetSmall(BasicBlock, [9, 9, 9], **kwargs)


def resnet10(**kwargs):
    return ResNet(BasicBlock, [1, 1, 1, 1], **kwargs)


def resnet18(**kwargs):
    return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)


def resnet34(**kwargs):
    return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)


def resnet50(**kwargs):
    return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)


def resnet101(**kwargs):
    return ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)


def resnet152(**kwargs):
    return ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)


def test():
    net = resnet18()
    y = net(torch.randn(1, 3, 32, 32))
    print(y.size())

In [6]:
def load_checkpoint(model, checkpoint_path,device ):
    device = torch.device(device)
    model_ckp = torch.load(checkpoint_path, map_location=device)

    # handle both dataparallel and normal models
    model_tmp_dict = OrderedDict()
    for name, value in model_ckp["model_state_dict"].items():
        if name.startswith("module."):
            name = name[7:]
        model_tmp_dict[name] = value

    if isinstance(model, torch.nn.DataParallel):
        model.module.load_state_dict(model_tmp_dict)
    else:
        model.load_state_dict(model_tmp_dict)
    return model

In [7]:
teacher_model=resnet18()
PATH="../input/model-weights/best_model (1).pt"
teacher_model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [8]:
teacher_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [9]:
teacher_model.to(DEVICE)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [10]:
#avg_val_loss, score = valid_fn(test_loader, teacher_model, criterion, DEVICE)

In [11]:
#score

In [12]:
#for param in teacher_model.parameters():
       # param.requires_grad=False

In [13]:
#for param in teacher_model.linear.parameters():
    #param.requires_grad = True

In [14]:
#import torchvision.models as models
#resnet18 = models.resnet18(pretrained=True)

In [15]:
#teacher_model=resnet18

In [16]:
#teacher_model.fc

In [17]:
#for param in teacher_model.parameters():
   # param.requires_grad = False

In [18]:
#teacher_model.fc= nn.Linear(512,10)

In [19]:
student_model=resnet8()

In [20]:
learning_rate=0.1
num_epochs=200
optimizer_student =torch.optim.SGD(student_model.parameters(), lr=1e-1, momentum=0.9, weight_decay=0.0005, nesterov=True)
scheduler_student = torch.optim.lr_scheduler.MultiStepLR(optimizer_student, milestones=[66,132], gamma=0.1,verbose=True)

Adjusting learning rate of group 0 to 1.0000e-01.


In [21]:
def freeze_teacher(t_net):
    # freeze the layers of the teacher
    for param in t_net.parameters():
        param.requires_grad = False
    # set the teacher net into evaluation mode
    t_net.eval()
    return t_net

In [22]:
student_model.to(DEVICE)

ResNetSmall(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(32, eps=1e-05,

In [23]:
#avg_val_loss, score = valid_fn(test_loader, student_model, criterion, DEVICE)
#score

In [24]:
def init_progress_bar(train_loader):
    batch_size = train_loader.batch_size
    bar_format = "{desc}{percentage:3.0f}%"
    # bar_format += "|{bar}|"
    bar_format += " {n_fmt}/{total_fmt} [{elapsed} < {remaining}]"
    bar_format += "{postfix}"
    # if stderr has no tty disable the progress bar
    disable = not sys.stderr.isatty()
    t = tqdm(total=len(train_loader) * batch_size,
             bar_format=bar_format)
    if disable:
        # a trick to allow execution in environments where stderr is redirected
        t._time = lambda: 0.0
    return t

In [25]:
print(init_progress_bar)

<function init_progress_bar at 0x7fb5df020b00>


In [26]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def get_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

In [27]:
def train_fn(train_loader, model, criterion, optimizer, scheduler, device,alpha,temperature):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()

    
    start = end = time.time()
    for step, (images, labels) in tqdm(enumerate(train_loader), total = len(train_loader)):
        data_time.update(time.time() - end)
        images = images.to(device, dtype=torch.float)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
                large_logits = teacher_model(images)
        model.train()
        y_preds = model(images)
        soft_targets_loss = F.kl_div(F.log_softmax(y_preds/ temperature, dim=1), F.softmax(large_logits / temperature, dim=1), reduction='batchmean')
        label_loss = F.cross_entropy(y_preds, labels)
        # Weighted sum of the two losses
        loss = alpha*(temperature*temperature) * soft_targets_loss + (1-alpha) * label_loss

        #loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        batch_time.update(time.time() - end)
        end = time.time()

    return losses.avg

def valid_fn(test_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()


    model.eval()
    start = end = time.time()
    preds = []
    valid_labels = []
    for step, (images, labels) in tqdm(enumerate(test_loader), total = len(test_loader)):
        data_time.update(time.time() - end)
        images = images.to(device, dtype=torch.float)
        labels = labels.to(device)
        batch_size = labels.size(0)

        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        preds.append(y_preds.softmax(1).to('cpu').numpy())
        valid_labels.append(labels.to('cpu').numpy())
        
        batch_time.update(time.time() - end)
        end = time.time()

    predictions = np.concatenate(preds)
    valid_labels = np.concatenate(valid_labels)
    
    score = get_score(valid_labels, predictions.argmax(1))
    
    return losses.avg, score

In [28]:
def calculate_loss(model, data, target):
        # Standard Learning Loss ( Classification Loss)
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer_teacher.step()
        return output, loss

In [29]:
 def save(model, epoch, name):
        torch.save({"model_state_dict": model.state_dict(), }, name)

In [30]:
train_loader,test_loader=get_cifar(num_classes=10, dataset_dir="./data", batch_size=128,
              use_cifar_10_1=False)

Loading CIFAR10...
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [31]:
criterion = nn.CrossEntropyLoss()

In [32]:
best_score = 0

for epoch in range(200):
    start_time = time.time()
    student_model.to(DEVICE)
    avg_loss = train_fn(train_loader, student_model, criterion, optimizer_student, None, DEVICE,0.5,5)
    avg_val_loss, score = valid_fn(test_loader, student_model, criterion, DEVICE)

    scheduler_student.step()
    
    print(f"Epoch {epoch+1}")
    print(f"Accuracy: {score} | Train loss: {avg_loss} | Valid loss: {avg_val_loss}")
    
    if score > best_score:
        print("YES")
        best_score = score
        torch.save(student_model.state_dict(), f"best_model.pt")
        
    elapsed = time.time() - start_time

100%|██████████| 391/391 [00:29<00:00, 13.11it/s]
100%|██████████| 79/79 [00:02<00:00, 33.11it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 1
Accuracy: 0.5911 | Train loss: 5.2459932881164555 | Valid loss: 1.4532183403015138
YES



100%|██████████| 391/391 [00:24<00:00, 16.29it/s]
100%|██████████| 79/79 [00:02<00:00, 29.26it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 2
Accuracy: 0.6962 | Train loss: 3.6960182596588136 | Valid loss: 1.0531272861480714
YES



100%|██████████| 391/391 [00:23<00:00, 16.85it/s]
100%|██████████| 79/79 [00:02<00:00, 34.13it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 3
Accuracy: 0.6674 | Train loss: 3.136332599945068 | Valid loss: 1.352709206867218



100%|██████████| 391/391 [00:23<00:00, 16.36it/s]
100%|██████████| 79/79 [00:02<00:00, 29.98it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 4
Accuracy: 0.7334 | Train loss: 2.8956486252593994 | Valid loss: 0.9737015485763549
YES


100%|██████████| 391/391 [00:24<00:00, 16.22it/s]
100%|██████████| 79/79 [00:02<00:00, 32.39it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 5
Accuracy: 0.7267 | Train loss: 2.7338262342071533 | Valid loss: 1.063637871170044



100%|██████████| 391/391 [00:23<00:00, 16.77it/s]
100%|██████████| 79/79 [00:02<00:00, 27.26it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 6
Accuracy: 0.755 | Train loss: 2.6197181665802 | Valid loss: 0.8998661253929138
YES



100%|██████████| 391/391 [00:23<00:00, 16.80it/s]
100%|██████████| 79/79 [00:02<00:00, 28.66it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 7
Accuracy: 0.7348 | Train loss: 2.542781431427002 | Valid loss: 0.992973273897171



100%|██████████| 391/391 [00:24<00:00, 16.20it/s]
100%|██████████| 79/79 [00:02<00:00, 27.26it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 8
Accuracy: 0.7487 | Train loss: 2.4900185099792482 | Valid loss: 0.8576085222244263


100%|██████████| 391/391 [00:23<00:00, 16.48it/s]
100%|██████████| 79/79 [00:02<00:00, 29.43it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 9
Accuracy: 0.7674 | Train loss: 2.4486731803512574 | Valid loss: 0.7804741245269775
YES



100%|██████████| 391/391 [00:24<00:00, 15.95it/s]
100%|██████████| 79/79 [00:02<00:00, 33.06it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 10
Accuracy: 0.7222 | Train loss: 2.4044998119354246 | Valid loss: 1.0155027564048766



100%|██████████| 391/391 [00:24<00:00, 16.17it/s]
100%|██████████| 79/79 [00:02<00:00, 27.32it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 11
Accuracy: 0.7521 | Train loss: 2.3758681364440917 | Valid loss: 0.8664790639877319



100%|██████████| 391/391 [00:23<00:00, 16.45it/s]
100%|██████████| 79/79 [00:02<00:00, 34.51it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 12
Accuracy: 0.7031 | Train loss: 2.342071209259033 | Valid loss: 1.230457631111145



100%|██████████| 391/391 [00:24<00:00, 16.02it/s]
100%|██████████| 79/79 [00:02<00:00, 32.66it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 13
Accuracy: 0.7243 | Train loss: 2.3290884481048586 | Valid loss: 1.0909849885940552



100%|██████████| 391/391 [00:24<00:00, 16.05it/s]
100%|██████████| 79/79 [00:02<00:00, 31.03it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 14
Accuracy: 0.7884 | Train loss: 2.291749322090149 | Valid loss: 0.7251778803348541
YES



100%|██████████| 391/391 [00:24<00:00, 15.85it/s]
100%|██████████| 79/79 [00:02<00:00, 33.85it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 15
Accuracy: 0.7783 | Train loss: 2.2937952362823486 | Valid loss: 0.8178634259223938



100%|██████████| 391/391 [00:24<00:00, 15.69it/s]
100%|██████████| 79/79 [00:02<00:00, 33.04it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 16
Accuracy: 0.7398 | Train loss: 2.2746856816864014 | Valid loss: 1.018584143638611



100%|██████████| 391/391 [00:24<00:00, 15.82it/s]
100%|██████████| 79/79 [00:02<00:00, 32.52it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 17
Accuracy: 0.7748 | Train loss: 2.2502646057128906 | Valid loss: 0.8388200464725495



100%|██████████| 391/391 [00:25<00:00, 15.51it/s]
100%|██████████| 79/79 [00:02<00:00, 32.00it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 18
Accuracy: 0.794 | Train loss: 2.2495349439239503 | Valid loss: 0.7385698530197143
YES



100%|██████████| 391/391 [00:23<00:00, 16.88it/s]
100%|██████████| 79/79 [00:03<00:00, 23.78it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 19
Accuracy: 0.762 | Train loss: 2.2463958057403564 | Valid loss: 0.8647711307525635


100%|██████████| 391/391 [00:23<00:00, 16.37it/s]
100%|██████████| 79/79 [00:02<00:00, 28.82it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 20
Accuracy: 0.7333 | Train loss: 2.210467993850708 | Valid loss: 1.0644457162857055



100%|██████████| 391/391 [00:25<00:00, 15.44it/s]
100%|██████████| 79/79 [00:02<00:00, 33.85it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 21
Accuracy: 0.8042 | Train loss: 2.201124726791382 | Valid loss: 0.6953538774490357
YES



100%|██████████| 391/391 [00:24<00:00, 15.72it/s]
100%|██████████| 79/79 [00:02<00:00, 26.52it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 22
Accuracy: 0.6342 | Train loss: 2.2049269438934327 | Valid loss: 1.5623875034332275



100%|██████████| 391/391 [00:24<00:00, 15.86it/s]
100%|██████████| 79/79 [00:03<00:00, 25.86it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 23
Accuracy: 0.786 | Train loss: 2.1907079554748536 | Valid loss: 0.7557400109291077



100%|██████████| 391/391 [00:23<00:00, 16.37it/s]
100%|██████████| 79/79 [00:02<00:00, 29.46it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 24
Accuracy: 0.7367 | Train loss: 2.1772275148010256 | Valid loss: 1.0328127502441407



100%|██████████| 391/391 [00:24<00:00, 15.82it/s]
100%|██████████| 79/79 [00:02<00:00, 28.61it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 25
Accuracy: 0.7782 | Train loss: 2.1733144013977053 | Valid loss: 0.8014558641433716



100%|██████████| 391/391 [00:24<00:00, 15.74it/s]
100%|██████████| 79/79 [00:02<00:00, 27.06it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 26
Accuracy: 0.8043 | Train loss: 2.1719592234039307 | Valid loss: 0.6840579719543457
YES



100%|██████████| 391/391 [00:25<00:00, 15.51it/s]
100%|██████████| 79/79 [00:02<00:00, 33.45it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 27
Accuracy: 0.7895 | Train loss: 2.163428930892944 | Valid loss: 0.715128225183487



100%|██████████| 391/391 [00:23<00:00, 16.34it/s]
100%|██████████| 79/79 [00:02<00:00, 28.30it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 28
Accuracy: 0.772 | Train loss: 2.146210626068115 | Valid loss: 0.9023986013412476



100%|██████████| 391/391 [00:25<00:00, 15.38it/s]
100%|██████████| 79/79 [00:02<00:00, 33.27it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 29
Accuracy: 0.7782 | Train loss: 2.15388124671936 | Valid loss: 0.8747119518756866



100%|██████████| 391/391 [00:25<00:00, 15.15it/s]
100%|██████████| 79/79 [00:02<00:00, 31.86it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 30
Accuracy: 0.768 | Train loss: 2.150815556793213 | Valid loss: 0.832497080039978



100%|██████████| 391/391 [00:24<00:00, 15.79it/s]
100%|██████████| 79/79 [00:03<00:00, 23.29it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 31
Accuracy: 0.7885 | Train loss: 2.147092815170288 | Valid loss: 0.7372188472747803



100%|██████████| 391/391 [00:22<00:00, 17.11it/s]
100%|██████████| 79/79 [00:02<00:00, 28.94it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 32
Accuracy: 0.7623 | Train loss: 2.143392378692627 | Valid loss: 0.8681532872200012



100%|██████████| 391/391 [00:23<00:00, 16.52it/s]
100%|██████████| 79/79 [00:02<00:00, 28.18it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 33
Accuracy: 0.7306 | Train loss: 2.132242653961182 | Valid loss: 1.032042335319519



100%|██████████| 391/391 [00:25<00:00, 15.34it/s]
100%|██████████| 79/79 [00:02<00:00, 26.46it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 34
Accuracy: 0.7868 | Train loss: 2.1255972885131835 | Valid loss: 0.8156742771148682



100%|██████████| 391/391 [00:23<00:00, 16.34it/s]
100%|██████████| 79/79 [00:02<00:00, 33.80it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 35
Accuracy: 0.7693 | Train loss: 2.1219397579193116 | Valid loss: 0.9345163139343262



100%|██████████| 391/391 [00:23<00:00, 16.30it/s]
100%|██████████| 79/79 [00:03<00:00, 22.34it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 36
Accuracy: 0.7693 | Train loss: 2.138714711456299 | Valid loss: 0.8088102536201477



100%|██████████| 391/391 [00:23<00:00, 16.79it/s]
100%|██████████| 79/79 [00:02<00:00, 32.61it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 37
Accuracy: 0.7885 | Train loss: 2.1255067198562623 | Valid loss: 0.7415057097434997



100%|██████████| 391/391 [00:23<00:00, 16.52it/s]
100%|██████████| 79/79 [00:02<00:00, 32.07it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 38
Accuracy: 0.7703 | Train loss: 2.115901065826416 | Valid loss: 0.8404972694396973



100%|██████████| 391/391 [00:26<00:00, 14.81it/s]
100%|██████████| 79/79 [00:02<00:00, 33.31it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 39
Accuracy: 0.7767 | Train loss: 2.1169390394592287 | Valid loss: 0.8032568841934205



100%|██████████| 391/391 [00:23<00:00, 16.63it/s]
100%|██████████| 79/79 [00:02<00:00, 32.50it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 40
Accuracy: 0.7959 | Train loss: 2.1069826663970948 | Valid loss: 0.7413589211702347



100%|██████████| 391/391 [00:24<00:00, 16.24it/s]
100%|██████████| 79/79 [00:04<00:00, 17.45it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 41
Accuracy: 0.7763 | Train loss: 2.104770355834961 | Valid loss: 0.8628595997810363



100%|██████████| 391/391 [00:23<00:00, 16.53it/s]
100%|██████████| 79/79 [00:02<00:00, 31.85it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 42
Accuracy: 0.7151 | Train loss: 2.1055888455200193 | Valid loss: 1.245012561225891



100%|██████████| 391/391 [00:23<00:00, 16.39it/s]
100%|██████████| 79/79 [00:02<00:00, 33.50it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 43
Accuracy: 0.7179 | Train loss: 2.0986851176834107 | Valid loss: 1.2360649406433106



100%|██████████| 391/391 [00:26<00:00, 14.59it/s]
100%|██████████| 79/79 [00:02<00:00, 31.67it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 44
Accuracy: 0.7784 | Train loss: 2.093701414260864 | Valid loss: 0.7931320343017578



100%|██████████| 391/391 [00:23<00:00, 16.31it/s]
100%|██████████| 79/79 [00:02<00:00, 32.97it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 45
Accuracy: 0.7756 | Train loss: 2.090849201774597 | Valid loss: 0.8194795572280884



100%|██████████| 391/391 [00:23<00:00, 16.29it/s]
100%|██████████| 79/79 [00:03<00:00, 20.65it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 46
Accuracy: 0.7511 | Train loss: 2.0970375886154176 | Valid loss: 0.9553573280334473



100%|██████████| 391/391 [00:26<00:00, 14.71it/s]
100%|██████████| 79/79 [00:02<00:00, 32.11it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 47
Accuracy: 0.7864 | Train loss: 2.0780113919067382 | Valid loss: 0.7717699928283691


100%|██████████| 391/391 [00:24<00:00, 16.16it/s]
100%|██████████| 79/79 [00:02<00:00, 28.33it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 48
Accuracy: 0.7993 | Train loss: 2.08648649520874 | Valid loss: 0.7105571467876435



100%|██████████| 391/391 [00:27<00:00, 13.98it/s]
100%|██████████| 79/79 [00:02<00:00, 32.72it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 49
Accuracy: 0.8084 | Train loss: 2.0727580799865724 | Valid loss: 0.6628872490882873
YES



100%|██████████| 391/391 [00:24<00:00, 16.00it/s]
100%|██████████| 79/79 [00:02<00:00, 31.18it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 50
Accuracy: 0.7815 | Train loss: 2.084904588317871 | Valid loss: 0.8177093288421631



100%|██████████| 391/391 [00:23<00:00, 16.67it/s]
100%|██████████| 79/79 [00:02<00:00, 29.87it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 51
Accuracy: 0.7812 | Train loss: 2.0957587628173826 | Valid loss: 0.8293620406150818



100%|██████████| 391/391 [00:27<00:00, 14.44it/s]
100%|██████████| 79/79 [00:02<00:00, 31.95it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 52
Accuracy: 0.7856 | Train loss: 2.079748377532959 | Valid loss: 0.8576093012809753



100%|██████████| 391/391 [00:22<00:00, 17.17it/s]
100%|██████████| 79/79 [00:02<00:00, 33.69it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 53
Accuracy: 0.8151 | Train loss: 2.0830108187103273 | Valid loss: 0.6395877294063568
YES



100%|██████████| 391/391 [00:26<00:00, 14.95it/s]
100%|██████████| 79/79 [00:03<00:00, 24.64it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 54
Accuracy: 0.7672 | Train loss: 2.091962220611572 | Valid loss: 0.8480924875259399



100%|██████████| 391/391 [00:23<00:00, 16.70it/s]
100%|██████████| 79/79 [00:02<00:00, 33.94it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 55
Accuracy: 0.784 | Train loss: 2.0808982690429687 | Valid loss: 0.8733763078689575



100%|██████████| 391/391 [00:23<00:00, 16.65it/s]
100%|██████████| 79/79 [00:02<00:00, 29.35it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 56
Accuracy: 0.7702 | Train loss: 2.074464808769226 | Valid loss: 0.8972636824607849



100%|██████████| 391/391 [00:26<00:00, 14.69it/s]
100%|██████████| 79/79 [00:02<00:00, 33.23it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 57
Accuracy: 0.8019 | Train loss: 2.0760507538223267 | Valid loss: 0.7155349405050278



100%|██████████| 391/391 [00:22<00:00, 17.03it/s]
100%|██████████| 79/79 [00:03<00:00, 21.51it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 58
Accuracy: 0.798 | Train loss: 2.0579007740020754 | Valid loss: 0.7406924889564515


100%|██████████| 391/391 [00:22<00:00, 17.05it/s]
100%|██████████| 79/79 [00:02<00:00, 33.71it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 59
Accuracy: 0.7892 | Train loss: 2.0592841082000732 | Valid loss: 0.790846566581726



100%|██████████| 391/391 [00:26<00:00, 14.54it/s]
100%|██████████| 79/79 [00:02<00:00, 29.38it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 60
Accuracy: 0.8093 | Train loss: 2.0822468092346194 | Valid loss: 0.6479056288719177


100%|██████████| 391/391 [00:23<00:00, 16.78it/s]
100%|██████████| 79/79 [00:02<00:00, 32.16it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 61
Accuracy: 0.7858 | Train loss: 2.0595430970001223 | Valid loss: 0.7783059041023255



100%|██████████| 391/391 [00:23<00:00, 16.61it/s]
100%|██████████| 79/79 [00:02<00:00, 32.11it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 62
Accuracy: 0.7597 | Train loss: 2.062592986984253 | Valid loss: 1.0235928668498993



100%|██████████| 391/391 [00:24<00:00, 16.25it/s]
100%|██████████| 79/79 [00:02<00:00, 33.58it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 63
Accuracy: 0.7648 | Train loss: 2.068670427284241 | Valid loss: 0.9135405831336975



100%|██████████| 391/391 [00:22<00:00, 17.28it/s]
100%|██████████| 79/79 [00:02<00:00, 33.43it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 64
Accuracy: 0.7996 | Train loss: 2.0637632543945315 | Valid loss: 0.7261215435028077



100%|██████████| 391/391 [00:27<00:00, 14.25it/s]
100%|██████████| 79/79 [00:02<00:00, 33.52it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 65
Accuracy: 0.7666 | Train loss: 2.065695621643066 | Valid loss: 0.9639230689048767



100%|██████████| 391/391 [00:23<00:00, 16.95it/s]
100%|██████████| 79/79 [00:02<00:00, 35.24it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 66
Accuracy: 0.7874 | Train loss: 2.0675211535644533 | Valid loss: 0.7903426122665406



100%|██████████| 391/391 [00:23<00:00, 16.47it/s]
100%|██████████| 79/79 [00:02<00:00, 29.56it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 67
Accuracy: 0.8728 | Train loss: 1.687469095954895 | Valid loss: 0.43068924078941345
YES



100%|██████████| 391/391 [00:23<00:00, 16.76it/s]
100%|██████████| 79/79 [00:02<00:00, 34.58it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 68
Accuracy: 0.8733 | Train loss: 1.5996438777923585 | Valid loss: 0.42616369910240176
YES



100%|██████████| 391/391 [00:23<00:00, 16.70it/s]
100%|██████████| 79/79 [00:02<00:00, 28.03it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 69
Accuracy: 0.874 | Train loss: 1.5633667018508912 | Valid loss: 0.4335493165493011
YES



100%|██████████| 391/391 [00:28<00:00, 13.63it/s]
100%|██████████| 79/79 [00:02<00:00, 31.29it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 70
Accuracy: 0.8751 | Train loss: 1.539978230819702 | Valid loss: 0.42285932846069335
YES



100%|██████████| 391/391 [00:23<00:00, 16.44it/s]
100%|██████████| 79/79 [00:02<00:00, 27.87it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 71
Accuracy: 0.8774 | Train loss: 1.5134261266326905 | Valid loss: 0.41495453395843507
YES


100%|██████████| 391/391 [00:24<00:00, 16.12it/s]
100%|██████████| 79/79 [00:02<00:00, 32.58it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 72
Accuracy: 0.8793 | Train loss: 1.5045610260009765 | Valid loss: 0.4209101012706757
YES



100%|██████████| 391/391 [00:23<00:00, 16.59it/s]
100%|██████████| 79/79 [00:02<00:00, 33.71it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 73
Accuracy: 0.8799 | Train loss: 1.49420069480896 | Valid loss: 0.4194325143098831
YES



100%|██████████| 391/391 [00:23<00:00, 16.55it/s]
100%|██████████| 79/79 [00:02<00:00, 32.39it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 74
Accuracy: 0.879 | Train loss: 1.4850942763137818 | Valid loss: 0.42239613938331605



100%|██████████| 391/391 [00:26<00:00, 14.70it/s]
100%|██████████| 79/79 [00:02<00:00, 28.93it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 75
Accuracy: 0.8789 | Train loss: 1.4793572687149048 | Valid loss: 0.411213853764534



100%|██████████| 391/391 [00:23<00:00, 16.85it/s]
100%|██████████| 79/79 [00:02<00:00, 33.98it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 76
Accuracy: 0.8755 | Train loss: 1.4775854949951173 | Valid loss: 0.4271201462745666



100%|██████████| 391/391 [00:23<00:00, 16.96it/s]
100%|██████████| 79/79 [00:02<00:00, 32.93it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 77
Accuracy: 0.8771 | Train loss: 1.4741759379196167 | Valid loss: 0.4198689656257629



100%|██████████| 391/391 [00:23<00:00, 16.69it/s]
100%|██████████| 79/79 [00:02<00:00, 34.29it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 78
Accuracy: 0.8796 | Train loss: 1.4660754024505616 | Valid loss: 0.4033542531967163



100%|██████████| 391/391 [00:23<00:00, 16.71it/s]
100%|██████████| 79/79 [00:02<00:00, 33.97it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 79
Accuracy: 0.8828 | Train loss: 1.46290417137146 | Valid loss: 0.39709066519737246
YES



100%|██████████| 391/391 [00:27<00:00, 14.30it/s]
100%|██████████| 79/79 [00:03<00:00, 23.41it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 80
Accuracy: 0.88 | Train loss: 1.4600545812225343 | Valid loss: 0.41992094311714173



100%|██████████| 391/391 [00:22<00:00, 17.46it/s]
100%|██████████| 79/79 [00:02<00:00, 33.29it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 81
Accuracy: 0.8787 | Train loss: 1.461013398475647 | Valid loss: 0.4156077988624573



100%|██████████| 391/391 [00:24<00:00, 16.08it/s]
100%|██████████| 79/79 [00:02<00:00, 32.11it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 82
Accuracy: 0.8687 | Train loss: 1.4627421419906617 | Valid loss: 0.45904476752281187



100%|██████████| 391/391 [00:23<00:00, 16.78it/s]
100%|██████████| 79/79 [00:02<00:00, 28.29it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 83
Accuracy: 0.8781 | Train loss: 1.4644609492492675 | Valid loss: 0.4169638627052307



100%|██████████| 391/391 [00:23<00:00, 16.98it/s]
100%|██████████| 79/79 [00:02<00:00, 32.23it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 84
Accuracy: 0.8757 | Train loss: 1.4534242136764526 | Valid loss: 0.421073835849762



100%|██████████| 391/391 [00:26<00:00, 14.96it/s]
100%|██████████| 79/79 [00:04<00:00, 16.96it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 85
Accuracy: 0.879 | Train loss: 1.4533411102294922 | Valid loss: 0.42350765647888183



100%|██████████| 391/391 [00:23<00:00, 16.69it/s]
100%|██████████| 79/79 [00:02<00:00, 33.09it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 86
Accuracy: 0.8701 | Train loss: 1.452407183456421 | Valid loss: 0.45476249887943265



100%|██████████| 391/391 [00:23<00:00, 16.70it/s]
100%|██████████| 79/79 [00:02<00:00, 28.47it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 87
Accuracy: 0.8788 | Train loss: 1.4560933234024047 | Valid loss: 0.4184303363323212



100%|██████████| 391/391 [00:22<00:00, 17.11it/s]
100%|██████████| 79/79 [00:02<00:00, 35.13it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 88
Accuracy: 0.8754 | Train loss: 1.4450984339904784 | Valid loss: 0.4383120222091675


100%|██████████| 391/391 [00:24<00:00, 16.18it/s]
100%|██████████| 79/79 [00:02<00:00, 31.47it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 89
Accuracy: 0.8765 | Train loss: 1.4501090004730224 | Valid loss: 0.4353365689754486



100%|██████████| 391/391 [00:25<00:00, 15.60it/s]
100%|██████████| 79/79 [00:04<00:00, 18.06it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 90
Accuracy: 0.8646 | Train loss: 1.4507833004379271 | Valid loss: 0.49327942943573



100%|██████████| 391/391 [00:25<00:00, 15.25it/s]
100%|██████████| 79/79 [00:02<00:00, 28.41it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 91
Accuracy: 0.8783 | Train loss: 1.454761519088745 | Valid loss: 0.4216985266208649



100%|██████████| 391/391 [00:23<00:00, 16.91it/s]
100%|██████████| 79/79 [00:02<00:00, 33.85it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 92
Accuracy: 0.8681 | Train loss: 1.4480263193130494 | Valid loss: 0.4593830427646637



100%|██████████| 391/391 [00:23<00:00, 16.88it/s]
100%|██████████| 79/79 [00:02<00:00, 34.78it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 93
Accuracy: 0.8738 | Train loss: 1.451712084197998 | Valid loss: 0.43926611104011537



100%|██████████| 391/391 [00:22<00:00, 17.24it/s]
100%|██████████| 79/79 [00:02<00:00, 28.94it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 94
Accuracy: 0.8715 | Train loss: 1.4563686330032348 | Valid loss: 0.4615521440982819



100%|██████████| 391/391 [00:23<00:00, 16.88it/s]
100%|██████████| 79/79 [00:03<00:00, 20.35it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 95
Accuracy: 0.875 | Train loss: 1.4511290543746949 | Valid loss: 0.4390024739742279



100%|██████████| 391/391 [00:28<00:00, 13.73it/s]
100%|██████████| 79/79 [00:02<00:00, 26.66it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 96
Accuracy: 0.8622 | Train loss: 1.4539307921218871 | Valid loss: 0.492607169342041


100%|██████████| 391/391 [00:23<00:00, 16.74it/s]
100%|██████████| 79/79 [00:02<00:00, 30.38it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 97
Accuracy: 0.8698 | Train loss: 1.4614700790405273 | Valid loss: 0.4528697712898254



100%|██████████| 391/391 [00:24<00:00, 16.21it/s]
100%|██████████| 79/79 [00:02<00:00, 33.34it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 98
Accuracy: 0.8721 | Train loss: 1.4578680799102783 | Valid loss: 0.4562162552833557



100%|██████████| 391/391 [00:23<00:00, 16.96it/s]
100%|██████████| 79/79 [00:02<00:00, 34.04it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 99
Accuracy: 0.8737 | Train loss: 1.454938731689453 | Valid loss: 0.457194584941864



100%|██████████| 391/391 [00:24<00:00, 16.01it/s]
100%|██████████| 79/79 [00:02<00:00, 27.69it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 100
Accuracy: 0.8749 | Train loss: 1.462174452934265 | Valid loss: 0.4261653784751892



100%|██████████| 391/391 [00:28<00:00, 13.70it/s]
100%|██████████| 79/79 [00:02<00:00, 33.31it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 101
Accuracy: 0.8724 | Train loss: 1.4566146274566651 | Valid loss: 0.4306483284831047



100%|██████████| 391/391 [00:22<00:00, 17.69it/s]
100%|██████████| 79/79 [00:02<00:00, 29.66it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 102
Accuracy: 0.8719 | Train loss: 1.45764461643219 | Valid loss: 0.4688887195587158



100%|██████████| 391/391 [00:23<00:00, 16.39it/s]
100%|██████████| 79/79 [00:02<00:00, 31.25it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 103
Accuracy: 0.8696 | Train loss: 1.458063579788208 | Valid loss: 0.4514457100868225



100%|██████████| 391/391 [00:24<00:00, 15.95it/s]
100%|██████████| 79/79 [00:02<00:00, 30.62it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 104
Accuracy: 0.8677 | Train loss: 1.4465420373916626 | Valid loss: 0.45002357625961303



100%|██████████| 391/391 [00:24<00:00, 15.98it/s]
100%|██████████| 79/79 [00:02<00:00, 34.40it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 105
Accuracy: 0.878 | Train loss: 1.4679697858047485 | Valid loss: 0.40965341720581055


100%|██████████| 391/391 [00:28<00:00, 13.64it/s]
100%|██████████| 79/79 [00:03<00:00, 20.29it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 106
Accuracy: 0.8404 | Train loss: 1.4531523852920532 | Valid loss: 0.5699550251483917



100%|██████████| 391/391 [00:24<00:00, 15.71it/s]
100%|██████████| 79/79 [00:02<00:00, 32.98it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 107
Accuracy: 0.8589 | Train loss: 1.4538647959518434 | Valid loss: 0.4890398461341858



100%|██████████| 391/391 [00:22<00:00, 17.47it/s]
100%|██████████| 79/79 [00:02<00:00, 30.97it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 108
Accuracy: 0.8707 | Train loss: 1.4627762517929077 | Valid loss: 0.4499023980140686



100%|██████████| 391/391 [00:22<00:00, 17.06it/s]
100%|██████████| 79/79 [00:02<00:00, 35.17it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 109
Accuracy: 0.8677 | Train loss: 1.4542396138763427 | Valid loss: 0.4663334119796753


100%|██████████| 391/391 [00:22<00:00, 17.06it/s]
100%|██████████| 79/79 [00:02<00:00, 34.13it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 110
Accuracy: 0.8749 | Train loss: 1.4487161077499389 | Valid loss: 0.4367157715320587



100%|██████████| 391/391 [00:23<00:00, 16.57it/s]
100%|██████████| 79/79 [00:04<00:00, 17.77it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 111
Accuracy: 0.8707 | Train loss: 1.4573065018463134 | Valid loss: 0.4474979465484619



100%|██████████| 391/391 [00:26<00:00, 14.72it/s]
100%|██████████| 79/79 [00:02<00:00, 29.65it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 112
Accuracy: 0.8778 | Train loss: 1.4668670462036133 | Valid loss: 0.42943150033950805



100%|██████████| 391/391 [00:22<00:00, 17.14it/s]
100%|██████████| 79/79 [00:02<00:00, 34.37it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 113
Accuracy: 0.8765 | Train loss: 1.4512048748779296 | Valid loss: 0.4351240891456604



100%|██████████| 391/391 [00:22<00:00, 17.02it/s]
100%|██████████| 79/79 [00:02<00:00, 32.43it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 114
Accuracy: 0.8592 | Train loss: 1.4480751175308229 | Valid loss: 0.48197796379327773



100%|██████████| 391/391 [00:23<00:00, 16.55it/s]
100%|██████████| 79/79 [00:02<00:00, 29.66it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 115
Accuracy: 0.8668 | Train loss: 1.4555742918777466 | Valid loss: 0.46031029472351076



100%|██████████| 391/391 [00:22<00:00, 17.47it/s]
100%|██████████| 79/79 [00:02<00:00, 32.62it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 116
Accuracy: 0.8552 | Train loss: 1.460204626312256 | Valid loss: 0.5071972789764404



100%|██████████| 391/391 [00:28<00:00, 13.93it/s]
100%|██████████| 79/79 [00:03<00:00, 21.51it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 117
Accuracy: 0.8732 | Train loss: 1.4583174825286864 | Valid loss: 0.4361868668317795


100%|██████████| 391/391 [00:23<00:00, 16.78it/s]
100%|██████████| 79/79 [00:02<00:00, 34.23it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 118
Accuracy: 0.8722 | Train loss: 1.4553443015289307 | Valid loss: 0.4530606694459915



100%|██████████| 391/391 [00:22<00:00, 17.26it/s]
100%|██████████| 79/79 [00:02<00:00, 35.53it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 119
Accuracy: 0.8759 | Train loss: 1.453363445930481 | Valid loss: 0.42227767610549927



100%|██████████| 391/391 [00:22<00:00, 17.15it/s]
100%|██████████| 79/79 [00:02<00:00, 30.52it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 120
Accuracy: 0.8654 | Train loss: 1.4594427587890626 | Valid loss: 0.48647419939041137



100%|██████████| 391/391 [00:21<00:00, 17.90it/s]
100%|██████████| 79/79 [00:02<00:00, 26.50it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 121
Accuracy: 0.8592 | Train loss: 1.4557619005203246 | Valid loss: 0.5132104122161866



100%|██████████| 391/391 [00:24<00:00, 16.24it/s]
100%|██████████| 79/79 [00:03<00:00, 21.83it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 122
Accuracy: 0.8624 | Train loss: 1.4451940705108643 | Valid loss: 0.47675495772361753



100%|██████████| 391/391 [00:27<00:00, 13.98it/s]
100%|██████████| 79/79 [00:02<00:00, 32.44it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 123
Accuracy: 0.8522 | Train loss: 1.4605614260482789 | Valid loss: 0.5291010647296905



100%|██████████| 391/391 [00:22<00:00, 17.35it/s]
100%|██████████| 79/79 [00:02<00:00, 29.72it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 124
Accuracy: 0.8624 | Train loss: 1.44694281124115 | Valid loss: 0.5043749778747558



100%|██████████| 391/391 [00:22<00:00, 17.15it/s]
100%|██████████| 79/79 [00:02<00:00, 35.91it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 125
Accuracy: 0.8781 | Train loss: 1.4623797646331786 | Valid loss: 0.418309218454361



100%|██████████| 391/391 [00:22<00:00, 17.73it/s]
100%|██████████| 79/79 [00:02<00:00, 27.68it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 126
Accuracy: 0.8728 | Train loss: 1.4489954662704467 | Valid loss: 0.4432024876356125



100%|██████████| 391/391 [00:21<00:00, 17.97it/s]
100%|██████████| 79/79 [00:02<00:00, 35.57it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 127
Accuracy: 0.853 | Train loss: 1.456100567779541 | Valid loss: 0.52225290350914


100%|██████████| 391/391 [00:27<00:00, 14.23it/s]
100%|██████████| 79/79 [00:03<00:00, 22.71it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 128
Accuracy: 0.8698 | Train loss: 1.4481241052246094 | Valid loss: 0.46386630644798277



100%|██████████| 391/391 [00:24<00:00, 15.70it/s]
100%|██████████| 79/79 [00:02<00:00, 28.55it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 129
Accuracy: 0.87 | Train loss: 1.4549949206161499 | Valid loss: 0.4572920751571655



100%|██████████| 391/391 [00:23<00:00, 16.82it/s]
100%|██████████| 79/79 [00:02<00:00, 35.12it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 130
Accuracy: 0.8697 | Train loss: 1.456803493423462 | Valid loss: 0.44901716833114624



100%|██████████| 391/391 [00:23<00:00, 16.98it/s]
100%|██████████| 79/79 [00:02<00:00, 33.08it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 131
Accuracy: 0.8705 | Train loss: 1.449252235069275 | Valid loss: 0.4551506029129028



100%|██████████| 391/391 [00:22<00:00, 17.15it/s]
100%|██████████| 79/79 [00:02<00:00, 26.40it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 132
Accuracy: 0.8618 | Train loss: 1.4442706924438478 | Valid loss: 0.5151310690879822



100%|██████████| 391/391 [00:25<00:00, 15.21it/s]
100%|██████████| 79/79 [00:03<00:00, 20.14it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 133
Accuracy: 0.8941 | Train loss: 1.3128188554763793 | Valid loss: 0.3674096836566925
YES



100%|██████████| 391/391 [00:29<00:00, 13.15it/s]
100%|██████████| 79/79 [00:02<00:00, 32.90it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 134
Accuracy: 0.8923 | Train loss: 1.2769666068649292 | Valid loss: 0.36535610210895536



100%|██████████| 391/391 [00:25<00:00, 15.56it/s]
100%|██████████| 79/79 [00:02<00:00, 33.19it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 135
Accuracy: 0.8919 | Train loss: 1.2607534320259095 | Valid loss: 0.3682646331310272



100%|██████████| 391/391 [00:23<00:00, 16.95it/s]
100%|██████████| 79/79 [00:02<00:00, 34.40it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 136
Accuracy: 0.8937 | Train loss: 1.2579464547729493 | Valid loss: 0.3703447157382965



100%|██████████| 391/391 [00:23<00:00, 16.59it/s]
100%|██████████| 79/79 [00:02<00:00, 28.57it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 137
Accuracy: 0.8948 | Train loss: 1.250980763092041 | Valid loss: 0.3608965449333191
YES


100%|██████████| 391/391 [00:23<00:00, 16.66it/s]
100%|██████████| 79/79 [00:02<00:00, 27.64it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 138
Accuracy: 0.894 | Train loss: 1.2460487007904053 | Valid loss: 0.3580980136394501



100%|██████████| 391/391 [00:22<00:00, 17.19it/s]
100%|██████████| 79/79 [00:02<00:00, 34.92it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 139
Accuracy: 0.893 | Train loss: 1.2408249195861816 | Valid loss: 0.363515816116333



100%|██████████| 391/391 [00:22<00:00, 17.20it/s]
100%|██████████| 79/79 [00:02<00:00, 28.27it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 140
Accuracy: 0.8971 | Train loss: 1.2434231605911255 | Valid loss: 0.35819362058639526
YES



100%|██████████| 391/391 [00:22<00:00, 17.05it/s]
100%|██████████| 79/79 [00:02<00:00, 34.65it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 141
Accuracy: 0.8944 | Train loss: 1.235989807395935 | Valid loss: 0.3644328718662262


100%|██████████| 391/391 [00:22<00:00, 17.18it/s]
100%|██████████| 79/79 [00:02<00:00, 28.65it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 142
Accuracy: 0.8935 | Train loss: 1.2352693390274048 | Valid loss: 0.35804956834316254



100%|██████████| 391/391 [00:22<00:00, 17.23it/s]
100%|██████████| 79/79 [00:03<00:00, 20.83it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 143
Accuracy: 0.8948 | Train loss: 1.2339738005828857 | Valid loss: 0.36460878248214723



100%|██████████| 391/391 [00:29<00:00, 13.24it/s]
100%|██████████| 79/79 [00:02<00:00, 29.54it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 144
Accuracy: 0.8938 | Train loss: 1.235817244567871 | Valid loss: 0.3623934335708618



100%|██████████| 391/391 [00:23<00:00, 16.74it/s]
100%|██████████| 79/79 [00:02<00:00, 34.00it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 145
Accuracy: 0.8929 | Train loss: 1.2292281300354004 | Valid loss: 0.36620391297340393



100%|██████████| 391/391 [00:22<00:00, 17.28it/s]
100%|██████████| 79/79 [00:02<00:00, 34.23it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 146
Accuracy: 0.8946 | Train loss: 1.2234483070373534 | Valid loss: 0.362254775762558



100%|██████████| 391/391 [00:22<00:00, 17.41it/s]
100%|██████████| 79/79 [00:02<00:00, 28.96it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 147
Accuracy: 0.8955 | Train loss: 1.2237197128295898 | Valid loss: 0.3596883786201477



100%|██████████| 391/391 [00:22<00:00, 17.02it/s]
100%|██████████| 79/79 [00:02<00:00, 34.20it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 148
Accuracy: 0.8941 | Train loss: 1.222674441204071 | Valid loss: 0.36415909276008607



100%|██████████| 391/391 [00:23<00:00, 16.96it/s]
100%|██████████| 79/79 [00:02<00:00, 33.42it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 149
Accuracy: 0.8925 | Train loss: 1.2226631604003906 | Valid loss: 0.3635714005947113



100%|██████████| 391/391 [00:23<00:00, 16.78it/s]
100%|██████████| 79/79 [00:02<00:00, 30.73it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 150
Accuracy: 0.8941 | Train loss: 1.2207009065628052 | Valid loss: 0.36007177720069883



100%|██████████| 391/391 [00:22<00:00, 17.26it/s]
100%|██████████| 79/79 [00:02<00:00, 32.32it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 151
Accuracy: 0.8955 | Train loss: 1.220209896736145 | Valid loss: 0.3585992887973785


100%|██████████| 391/391 [00:22<00:00, 17.44it/s]
100%|██████████| 79/79 [00:02<00:00, 35.08it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 152
Accuracy: 0.8934 | Train loss: 1.2198136115646363 | Valid loss: 0.3696607421875



100%|██████████| 391/391 [00:22<00:00, 17.04it/s]
100%|██████████| 79/79 [00:02<00:00, 35.42it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 153
Accuracy: 0.8937 | Train loss: 1.2166697240066529 | Valid loss: 0.36516737990379333



100%|██████████| 391/391 [00:29<00:00, 13.48it/s]
100%|██████████| 79/79 [00:03<00:00, 22.31it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 154
Accuracy: 0.8937 | Train loss: 1.2197557697677612 | Valid loss: 0.3724964757919311



100%|██████████| 391/391 [00:25<00:00, 15.45it/s]
100%|██████████| 79/79 [00:02<00:00, 29.30it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 155
Accuracy: 0.8959 | Train loss: 1.2247838777542115 | Valid loss: 0.3600522777557373


100%|██████████| 391/391 [00:22<00:00, 17.70it/s]
100%|██████████| 79/79 [00:02<00:00, 34.39it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 156
Accuracy: 0.8944 | Train loss: 1.2149718524551392 | Valid loss: 0.3619368604183197



100%|██████████| 391/391 [00:22<00:00, 17.31it/s]
100%|██████████| 79/79 [00:02<00:00, 35.00it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 157
Accuracy: 0.8955 | Train loss: 1.2133294874954224 | Valid loss: 0.36205226082801817



100%|██████████| 391/391 [00:22<00:00, 17.23it/s]
100%|██████████| 79/79 [00:02<00:00, 31.55it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 158
Accuracy: 0.8939 | Train loss: 1.2136872959136964 | Valid loss: 0.36702404351234436



100%|██████████| 391/391 [00:21<00:00, 17.81it/s]
100%|██████████| 79/79 [00:02<00:00, 27.34it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 159
Accuracy: 0.8947 | Train loss: 1.2097256575012207 | Valid loss: 0.3662039174079895



100%|██████████| 391/391 [00:22<00:00, 17.06it/s]
100%|██████████| 79/79 [00:02<00:00, 34.72it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 160
Accuracy: 0.8942 | Train loss: 1.2160410380554199 | Valid loss: 0.3694980230331421



100%|██████████| 391/391 [00:22<00:00, 17.08it/s]
100%|██████████| 79/79 [00:02<00:00, 29.08it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 161
Accuracy: 0.8922 | Train loss: 1.2078663946914674 | Valid loss: 0.36663564138412474



100%|██████████| 391/391 [00:22<00:00, 17.45it/s]
100%|██████████| 79/79 [00:02<00:00, 35.26it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 162
Accuracy: 0.8951 | Train loss: 1.2088774798965454 | Valid loss: 0.36349221415519717



100%|██████████| 391/391 [00:22<00:00, 17.52it/s]
100%|██████████| 79/79 [00:02<00:00, 33.83it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 163
Accuracy: 0.8944 | Train loss: 1.212663652496338 | Valid loss: 0.3637817650794983



100%|██████████| 391/391 [00:24<00:00, 16.08it/s]
100%|██████████| 79/79 [00:03<00:00, 21.43it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 164
Accuracy: 0.8937 | Train loss: 1.2039389461517334 | Valid loss: 0.3642935595035553



100%|██████████| 391/391 [00:29<00:00, 13.34it/s]
100%|██████████| 79/79 [00:02<00:00, 27.65it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 165
Accuracy: 0.8942 | Train loss: 1.2034868618392944 | Valid loss: 0.3598230676651001



100%|██████████| 391/391 [00:22<00:00, 17.20it/s]
100%|██████████| 79/79 [00:02<00:00, 33.98it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 166
Accuracy: 0.8964 | Train loss: 1.2079791135787963 | Valid loss: 0.36208615341186523



100%|██████████| 391/391 [00:21<00:00, 17.90it/s]
100%|██████████| 79/79 [00:02<00:00, 27.13it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 167
Accuracy: 0.8928 | Train loss: 1.2075090739822387 | Valid loss: 0.3655889443397522



100%|██████████| 391/391 [00:21<00:00, 17.91it/s]
100%|██████████| 79/79 [00:02<00:00, 35.20it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 168
Accuracy: 0.8932 | Train loss: 1.2012581756210328 | Valid loss: 0.3637892292022705



100%|██████████| 391/391 [00:23<00:00, 16.97it/s]
100%|██████████| 79/79 [00:02<00:00, 30.78it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 169
Accuracy: 0.8929 | Train loss: 1.2005584756469727 | Valid loss: 0.3669009956359863



100%|██████████| 391/391 [00:21<00:00, 17.91it/s]
100%|██████████| 79/79 [00:02<00:00, 36.14it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 170
Accuracy: 0.8948 | Train loss: 1.201174807472229 | Valid loss: 0.3630205909729004



100%|██████████| 391/391 [00:23<00:00, 16.87it/s]
100%|██████████| 79/79 [00:02<00:00, 35.16it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 171
Accuracy: 0.8942 | Train loss: 1.2042155499267577 | Valid loss: 0.36071900362968445



100%|██████████| 391/391 [00:22<00:00, 17.06it/s]
100%|██████████| 79/79 [00:02<00:00, 28.65it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 172
Accuracy: 0.8944 | Train loss: 1.200637116317749 | Valid loss: 0.36409578919410707



100%|██████████| 391/391 [00:22<00:00, 17.26it/s]
100%|██████████| 79/79 [00:02<00:00, 35.55it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 173
Accuracy: 0.8916 | Train loss: 1.2072845294952392 | Valid loss: 0.369783136510849


100%|██████████| 391/391 [00:22<00:00, 17.54it/s]
100%|██████████| 79/79 [00:02<00:00, 31.09it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 174
Accuracy: 0.8935 | Train loss: 1.2009292221450805 | Valid loss: 0.35974010255336764


100%|██████████| 391/391 [00:27<00:00, 14.37it/s]
100%|██████████| 79/79 [00:03<00:00, 23.64it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 175
Accuracy: 0.8931 | Train loss: 1.1944147814559936 | Valid loss: 0.3688544098854065



100%|██████████| 391/391 [00:27<00:00, 14.27it/s]
100%|██████████| 79/79 [00:02<00:00, 29.31it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 176
Accuracy: 0.892 | Train loss: 1.1947657234954834 | Valid loss: 0.3680252477169037



100%|██████████| 391/391 [00:22<00:00, 17.32it/s]
100%|██████████| 79/79 [00:02<00:00, 34.85it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 177
Accuracy: 0.8937 | Train loss: 1.2034832590103148 | Valid loss: 0.369477618932724



100%|██████████| 391/391 [00:22<00:00, 17.68it/s]
100%|██████████| 79/79 [00:03<00:00, 26.22it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 178
Accuracy: 0.8941 | Train loss: 1.201838476638794 | Valid loss: 0.3671757544994354



100%|██████████| 391/391 [00:22<00:00, 17.61it/s]
100%|██████████| 79/79 [00:02<00:00, 33.28it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 179
Accuracy: 0.8909 | Train loss: 1.1994615476226806 | Valid loss: 0.3680688755989075



100%|██████████| 391/391 [00:23<00:00, 16.68it/s]
100%|██████████| 79/79 [00:02<00:00, 34.22it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 180
Accuracy: 0.8936 | Train loss: 1.1949782386398315 | Valid loss: 0.371642312335968



100%|██████████| 391/391 [00:22<00:00, 17.51it/s]
100%|██████████| 79/79 [00:02<00:00, 33.21it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 181
Accuracy: 0.8935 | Train loss: 1.196342746734619 | Valid loss: 0.35940567922592165



100%|██████████| 391/391 [00:23<00:00, 16.63it/s]
100%|██████████| 79/79 [00:02<00:00, 33.60it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 182
Accuracy: 0.895 | Train loss: 1.193190768470764 | Valid loss: 0.36526467962265013



100%|██████████| 391/391 [00:23<00:00, 16.95it/s]
100%|██████████| 79/79 [00:02<00:00, 30.74it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 183
Accuracy: 0.894 | Train loss: 1.1895148907470703 | Valid loss: 0.36514121346473694



100%|██████████| 391/391 [00:22<00:00, 17.33it/s]
100%|██████████| 79/79 [00:02<00:00, 35.43it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 184
Accuracy: 0.8914 | Train loss: 1.1894827464675903 | Valid loss: 0.3762835000514984



100%|██████████| 391/391 [00:23<00:00, 16.95it/s]
100%|██████████| 79/79 [00:02<00:00, 33.94it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 185
Accuracy: 0.8933 | Train loss: 1.188984739112854 | Valid loss: 0.3628194869995117



100%|██████████| 391/391 [00:30<00:00, 12.99it/s]
100%|██████████| 79/79 [00:03<00:00, 21.44it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 186
Accuracy: 0.8939 | Train loss: 1.1960937320709228 | Valid loss: 0.3623726310253143



100%|██████████| 391/391 [00:25<00:00, 15.40it/s]
100%|██████████| 79/79 [00:02<00:00, 33.78it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 187
Accuracy: 0.892 | Train loss: 1.1896703522109986 | Valid loss: 0.3666425159931183



100%|██████████| 391/391 [00:22<00:00, 17.41it/s]
100%|██████████| 79/79 [00:02<00:00, 26.68it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 188
Accuracy: 0.8934 | Train loss: 1.1934747325897217 | Valid loss: 0.3622869616985321



100%|██████████| 391/391 [00:22<00:00, 17.48it/s]
100%|██████████| 79/79 [00:02<00:00, 30.40it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 189
Accuracy: 0.8941 | Train loss: 1.191749142074585 | Valid loss: 0.36720313935279847



100%|██████████| 391/391 [00:22<00:00, 17.29it/s]
100%|██████████| 79/79 [00:02<00:00, 34.76it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 190
Accuracy: 0.8949 | Train loss: 1.1993249289512635 | Valid loss: 0.37413579282760623



100%|██████████| 391/391 [00:23<00:00, 16.97it/s]
100%|██████████| 79/79 [00:02<00:00, 35.25it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 191
Accuracy: 0.8942 | Train loss: 1.194734785118103 | Valid loss: 0.37056834836006164



100%|██████████| 391/391 [00:22<00:00, 17.30it/s]
100%|██████████| 79/79 [00:02<00:00, 31.65it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 192
Accuracy: 0.8938 | Train loss: 1.1900806985092163 | Valid loss: 0.36356641340255735



100%|██████████| 391/391 [00:23<00:00, 16.90it/s]
100%|██████████| 79/79 [00:02<00:00, 34.79it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 193
Accuracy: 0.8929 | Train loss: 1.1853979079818726 | Valid loss: 0.36477699255943297



100%|██████████| 391/391 [00:23<00:00, 16.70it/s]
100%|██████████| 79/79 [00:02<00:00, 35.72it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 194
Accuracy: 0.8939 | Train loss: 1.1939055999755859 | Valid loss: 0.3583343743801117



100%|██████████| 391/391 [00:23<00:00, 16.92it/s]
100%|██████████| 79/79 [00:02<00:00, 32.63it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 195
Accuracy: 0.8909 | Train loss: 1.1893105825805663 | Valid loss: 0.3774225848197937



100%|██████████| 391/391 [00:24<00:00, 16.18it/s]
100%|██████████| 79/79 [00:03<00:00, 23.39it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 196
Accuracy: 0.8954 | Train loss: 1.1938683680725097 | Valid loss: 0.36758722648620606



100%|██████████| 391/391 [00:29<00:00, 13.07it/s]
100%|██████████| 79/79 [00:03<00:00, 24.14it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 197
Accuracy: 0.8928 | Train loss: 1.1890825383377075 | Valid loss: 0.366200097823143



100%|██████████| 391/391 [00:23<00:00, 16.81it/s]
100%|██████████| 79/79 [00:02<00:00, 34.92it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 198
Accuracy: 0.8943 | Train loss: 1.1841402425384522 | Valid loss: 0.3654029008388519



100%|██████████| 391/391 [00:22<00:00, 17.22it/s]
100%|██████████| 79/79 [00:02<00:00, 29.81it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 199
Accuracy: 0.8948 | Train loss: 1.1915472158050537 | Valid loss: 0.3620737578868866



100%|██████████| 391/391 [00:22<00:00, 17.56it/s]
100%|██████████| 79/79 [00:02<00:00, 33.19it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 200
Accuracy: 0.8951 | Train loss: 1.1866149585342407 | Valid loss: 0.3673864377975464





In [33]:
best_score

0.8971