In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as  F
import torch.nn as nn
from pathlib import Path
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from collections import OrderedDict
from tqdm import tqdm
import sys
import time
from sklearn.metrics import accuracy_score

In [2]:
DEVICE='cuda'
use_gpu=True

In [3]:
def reproducibilitySeed():
    """
    Ensure reproducibility of results; Seeds to 0
    """
    torch_init_seed = 0
    torch.manual_seed(torch_init_seed)
    numpy_init_seed = 0
    np.random.seed(numpy_init_seed)
    if use_gpu:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

reproducibilitySeed()

In [4]:
NUM_WORKERS = 2


class TensorImgSet(Dataset):
    """TensorDataset with support of transforms.
    """

    def __init__(self, tensors, transform=None):
        self.imgs = tensors[0]
        self.targets = tensors[1]
        self.tensors = tensors
        self.transform = transform
        self.len = len(self.imgs)

    def __getitem__(self, index):
        x = self.imgs[index]
        if self.transform:
            x = self.transform(x)
        y = self.targets[index]
        return x, y

    def __len__(self):
        return self.len

def get_cifar(num_classes=100, dataset_dir="./data", batch_size=128,
              use_cifar_10_1=False):
    if num_classes == 10:
        print("Loading CIFAR10...")
        dataset = torchvision.datasets.CIFAR10
        normalize = transforms.Normalize(
            (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    else:
        print("Loading CIFAR100...")
        dataset = torchvision.datasets.CIFAR100
        normalize = transforms.Normalize(
            mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])

    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    trainset = dataset(root=dataset_dir, train=True,
                       download=True, transform=train_transform)

    test_transform = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])
    testset = dataset(root=dataset_dir, train=False,
                          download=True,
                          transform=test_transform)

    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=batch_size,
                                               num_workers=NUM_WORKERS,
                                               pin_memory=True, shuffle=True)
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=batch_size,
                                              num_workers=NUM_WORKERS,
                                              pin_memory=True, shuffle=False)
    return train_loader, test_loader


In [5]:
train_loader,test_loader=get_cifar(num_classes=100, dataset_dir="./data", batch_size=128,
              use_cifar_10_1=False)

Loading CIFAR100...
Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
criterion=nn.CrossEntropyLoss()

In [7]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(x)
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(x)
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=100):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion, num_classes)
        self.n_channels = [64, 128, 256, 512]

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, is_feat=False, use_relu=True):
        out = self.conv1(x)
        out = self.bn1(out)
        if use_relu:
            out = F.relu(out)
        feat1 = self.layer1(out)
        if use_relu:
            feat1 = F.relu(feat1)
        feat2 = self.layer2(feat1)
        if use_relu:
            feat2 = F.relu(feat2)
        feat3 = self.layer3(feat2)
        if use_relu:
            feat3 = F.relu(feat3)

        feat4 = self.layer4(feat3)
        feat4 = F.relu(feat4)
        pool = F.avg_pool2d(feat4, 4)
        pool = pool.view(pool.size(0), -1)
        out = self.linear(pool)

        if is_feat:
            return pool, out

        return out

    def get_bn_before_relu(self):
        if isinstance(self.layer1[0], Bottleneck):
            bn1 = self.layer1[-1].bn3
            bn2 = self.layer2[-1].bn3
            bn3 = self.layer3[-1].bn3
            bn4 = self.layer4[-1].bn3
        elif isinstance(self.layer1[0], BasicBlock):
            bn1 = self.layer1[-1].bn2
            bn2 = self.layer2[-1].bn2
            bn3 = self.layer3[-1].bn2
            bn4 = self.layer4[-1].bn2
        else:
            print('ResNet unknown block error !!!')

        return [bn1, bn2, bn3, bn4]

    def get_channel_num(self):
        return self.n_channels

    def extract_feature(self, x, preReLU=False):

        x = self.conv1(x)
        x = self.bn1(x)

        feat1 = self.layer1(x)
        feat2 = self.layer2(feat1)
        feat3 = self.layer3(feat2)
        feat4 = self.layer4(feat3)

        x = F.relu(feat4)
        x = F.avg_pool2d(x, 4)
        x = x.view(x.size(0), -1)
        out = self.linear(x)

        if not preReLU:
            feat1 = F.relu(feat1)
            feat2 = F.relu(feat2)
            feat3 = F.relu(feat3)
            feat4 = F.relu(feat4)

        return [feat1, feat2, feat3, feat4], out


class ResNetSmall(nn.Module):
    def __init__(self, block, num_blocks, num_classes=100):
        super(ResNetSmall, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(256 * block.expansion, num_classes)
        self.n_channels = [16, 32, 64]

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, is_feat=False, use_relu=True):
        out = self.conv1(x)
        out = self.bn1(out)
        if use_relu:
            out = F.relu(out)
        feat1 = self.layer1(out)
        if use_relu:
            feat1 = F.relu(feat1)
        feat2 = self.layer2(feat1)
        if use_relu:
            feat2 = F.relu(feat2)
        feat3 = self.layer3(feat2)

        # the last relu is always included
        feat3 = F.relu(feat3)
        pool = F.avg_pool2d(feat3, 4)
        pool = pool.view(pool.size(0), -1)
        out = self.linear(pool)

        if is_feat:
            return pool, out

        return out

    def get_bn_before_relu(self):
        if isinstance(self.layer1[0], Bottleneck):
            bn1 = self.layer1[-1].bn3
            bn2 = self.layer2[-1].bn3
            bn3 = self.layer3[-1].bn3
        elif isinstance(self.layer1[0], BasicBlock):
            bn1 = self.layer1[-1].bn2
            bn2 = self.layer2[-1].bn2
            bn3 = self.layer3[-1].bn2
        else:
            print('ResNet unknown block error !!!')

        return [bn1, bn2, bn3]

    def get_channel_num(self):
        return self.n_channels

    def extract_feature(self, x, preReLU=False):

        x = self.conv1(x)
        x = self.bn1(x)

        feat1 = self.layer1(x)
        feat2 = self.layer2(feat1)
        feat3 = self.layer3(feat2)

        x = F.relu(feat3)
        x = F.avg_pool2d(x, 4)
        x = x.view(x.size(0), -1)
        out = self.linear(x)

        if not preReLU:
            feat1 = F.relu(feat1)
            feat2 = F.relu(feat2)
            feat3 = F.relu(feat3)

        return [feat1, feat2, feat3], out


def resnet8(**kwargs):
    return ResNetSmall(BasicBlock, [1, 1, 1], **kwargs)


def resnet14(**kwargs):
    return ResNetSmall(BasicBlock, [2, 2, 2], **kwargs)


def resnet20(**kwargs):
    return ResNetSmall(BasicBlock, [3, 3, 3], **kwargs)


def resnet26(**kwargs):
    return ResNetSmall(BasicBlock, [4, 4, 4], **kwargs)


def resnet32(**kwargs):
    return ResNetSmall(BasicBlock, [5, 5, 5], **kwargs)


def resnet44(**kwargs):
    return ResNetSmall(BasicBlock, [7, 7, 7], **kwargs)


def resnet56(**kwargs):
    return ResNetSmall(BasicBlock, [9, 9, 9], **kwargs)


def resnet10(**kwargs):
    return ResNet(BasicBlock, [1, 1, 1, 1], **kwargs)


def resnet18(**kwargs):
    return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)


def resnet34(**kwargs):
    return ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)


def resnet50(**kwargs):
    return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)


def resnet101(**kwargs):
    return ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)


def resnet152(**kwargs):
    return ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)


def test():
    net = resnet18()
    y = net(torch.randn(1, 3, 32, 32))
    print(y.size())
test()

torch.Size([1, 100])


In [8]:
class ResNet50(nn.Module):
    output_size = 2048

    def __init__(self, pretrained=True):
        super(ResNet50, self).__init__()
        pretrained = torchvision.models.resnet50(pretrained=pretrained)

        for module_name in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3', 'layer4', 'avgpool']:
            self.add_module(module_name, getattr(pretrained, module_name))

    def forward(self, x, get_ha=False):
        x = self.maxpool(self.relu(self.bn1(self.conv1(x))))
        b1 = self.layer1(x)
        b2 = self.layer2(b1)
        b3 = self.layer3(b2)
        b4 = self.layer4(b3)
        pool = self.avgpool(b4)

        if get_ha:
            return b1, b2, b3, b4, pool

        return pool

In [9]:
class LinearEmbedding(nn.Module):
    def __init__(self, base, output_size=512, embedding_size=100, normalize=True):
        super(LinearEmbedding, self).__init__()
        self.base = base
        self.linear = nn.Linear(output_size, embedding_size)
        self.normalize = normalize

    def forward(self, x, get_ha=False):
        if get_ha:
            b1, b2, b3, b4, pool = self.base(x, True)
        else:
            pool = self.base(x)

        pool = pool.view(x.size(0), -1)
        embedding = self.linear(pool)

        if self.normalize:
            embedding = F.normalize(embedding, p=2, dim=1)

        if get_ha:
            return b1, b2, b3, b4, pool, embedding

        return embedding

In [10]:
def pdist(e, squared=False, eps=1e-12):
    e_square = e.pow(2).sum(dim=1)
    prod = e @ e.t()
    res = (e_square.unsqueeze(1) + e_square.unsqueeze(0) - 2 * prod).clamp(min=eps)

    if not squared:
        res = res.sqrt()

    res = res.clone()
    res[range(len(e)), range(len(e))] = 0
    return res

In [11]:
class RkdDistance(nn.Module):
    def forward(self, student, teacher):
        with torch.no_grad():
            t_d = pdist(teacher, squared=False)
            mean_td = t_d[t_d>0].mean()
            t_d = t_d / mean_td

        d = pdist(student, squared=False)
        mean_d = d[d>0].mean()
        d = d / mean_d

        loss = F.smooth_l1_loss(d, t_d, reduction='elementwise_mean')
        return loss

In [12]:
class RKdAngle(nn.Module):
    def forward(self, student, teacher):
        # N x C
        # N x N x C

        with torch.no_grad():
            td = (teacher.unsqueeze(0) - teacher.unsqueeze(1))
            norm_td = F.normalize(td, p=2, dim=2)
            t_angle = torch.bmm(norm_td, norm_td.transpose(1, 2)).view(-1)

        sd = (student.unsqueeze(0) - student.unsqueeze(1))
        norm_sd = F.normalize(sd, p=2, dim=2)
        s_angle = torch.bmm(norm_sd, norm_sd.transpose(1, 2)).view(-1)

        loss = F.smooth_l1_loss(s_angle, t_angle, reduction='elementwise_mean')
        return loss

In [13]:
teacher_model=resnet50()

In [14]:
PATH="../input/resnet50cifar100/best_model (2).pt"
teacher_model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [15]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def get_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

In [16]:
teacher_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (

In [17]:
student_model=resnet20()

In [18]:
teacher_model.to(DEVICE)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (

In [19]:
learning_rate=0.1
num_epochs=200
optimizer_student =torch.optim.SGD(student_model.parameters(), lr=1e-1, momentum=0.9, weight_decay=0.0005, nesterov=True)
scheduler_student = torch.optim.lr_scheduler.MultiStepLR(optimizer_student, milestones=[60,120,160], gamma=0.1,verbose=True)

Adjusting learning rate of group 0 to 1.0000e-01.


In [20]:
student_model.to(DEVICE)

ResNetSmall(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, 

In [21]:
def train_fn(train_loader, model, criterion, optimizer, scheduler, device,temperature,lambda_d,lambda_a):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()

    
    start = end = time.time()
    for step, (images, labels) in tqdm(enumerate(train_loader), total = len(train_loader)):
        data_time.update(time.time() - end)
        images = images.to(device, dtype=torch.float)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
                teacher_pool,large_logits = teacher_model.forward(images,is_feat=True)
        model.train()
        student_pool,y_preds = model.forward(images,is_feat=True)
        soft_targets_loss = F.kl_div(F.log_softmax(y_preds/ temperature, dim=1), F.softmax(large_logits / temperature, dim=1), reduction='batchmean')
        label_loss = F.cross_entropy(y_preds, labels)
        distance_loss=dist_criterion(student_pool,teacher_pool)
        angle_loss=angle_criterion(student_pool,teacher_pool)
        # Weighted sum of the two losses
        loss = (temperature*temperature) * soft_targets_loss + label_loss+ lambda_d*distance_loss+lambda_a*angle_loss

        #loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        batch_time.update(time.time() - end)
        end = time.time()

    return losses.avg

def valid_fn(test_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()


    model.eval()
    start = end = time.time()
    preds = []
    valid_labels = []
    for step, (images, labels) in tqdm(enumerate(test_loader), total = len(test_loader)):
        data_time.update(time.time() - end)
        images = images.to(device, dtype=torch.float)
        labels = labels.to(device)
        batch_size = labels.size(0)

        with torch.no_grad():
             y_preds = model(images)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        preds.append(y_preds.softmax(1).to('cpu').numpy())
        valid_labels.append(labels.to('cpu').numpy())
        
        batch_time.update(time.time() - end)
        end = time.time()

    predictions = np.concatenate(preds)
    valid_labels = np.concatenate(valid_labels)
    
    score = get_score(valid_labels, predictions.argmax(1))
    
    return losses.avg, score

In [22]:
dist_criterion = RkdDistance()
angle_criterion = RKdAngle()
criterion = nn.CrossEntropyLoss()

In [23]:
def calculate_loss(model, data, target):
        # Standard Learning Loss ( Classification Loss)
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer_teacher.step()
        return output, loss

In [24]:
best_score = 0

for epoch in range(200):
    start_time = time.time()
    student_model.to(DEVICE)
    avg_loss = train_fn(train_loader, student_model, criterion, optimizer_student, None, DEVICE,4,25,50)
    avg_val_loss, score = valid_fn(test_loader, student_model, criterion, DEVICE)

    scheduler_student.step()
    
    print(f"Epoch {epoch+1}")
    print(f"Accuracy: {score} | Train loss: {avg_loss} | Valid loss: {avg_val_loss}")
    
    if score > best_score:
        print("YES")
        best_score = score
        torch.save(student_model.state_dict(), f"best_model.pt")
        
    elapsed = time.time() - start_time

100%|██████████| 391/391 [00:53<00:00,  7.33it/s]
100%|██████████| 79/79 [00:02<00:00, 27.34it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 1
Accuracy: 0.1905 | Train loss: 12.768819295349122 | Valid loss: 3.4315646770477293
YES



100%|██████████| 391/391 [00:46<00:00,  8.41it/s]
100%|██████████| 79/79 [00:03<00:00, 23.97it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 2
Accuracy: 0.2548 | Train loss: 11.08469990600586 | Valid loss: 3.253253562927246
YES


100%|██████████| 391/391 [00:46<00:00,  8.46it/s]
100%|██████████| 79/79 [00:02<00:00, 26.65it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 3
Accuracy: 0.3387 | Train loss: 10.064585584106446 | Valid loss: 2.6931901756286623
YES



100%|██████████| 391/391 [00:46<00:00,  8.43it/s]
100%|██████████| 79/79 [00:03<00:00, 24.00it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 4
Accuracy: 0.3173 | Train loss: 9.09524854522705 | Valid loss: 2.8857390106201173



100%|██████████| 391/391 [00:46<00:00,  8.49it/s]
100%|██████████| 79/79 [00:02<00:00, 26.91it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 5
Accuracy: 0.383 | Train loss: 8.399331358337403 | Valid loss: 2.5329729637145997
YES



100%|██████████| 391/391 [00:46<00:00,  8.46it/s]
100%|██████████| 79/79 [00:03<00:00, 22.43it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 6
Accuracy: 0.4361 | Train loss: 7.895563379058838 | Valid loss: 2.2372500694274904
YES



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:03<00:00, 26.25it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 7
Accuracy: 0.4592 | Train loss: 7.558157205810547 | Valid loss: 2.136507911872864
YES



100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:03<00:00, 24.54it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 8
Accuracy: 0.4577 | Train loss: 7.248992279205322 | Valid loss: 2.14999740486145



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:02<00:00, 27.01it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 9
Accuracy: 0.4504 | Train loss: 7.047020035552978 | Valid loss: 2.2149876636505126



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:03<00:00, 25.22it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 10
Accuracy: 0.4892 | Train loss: 6.900907771759033 | Valid loss: 1.9941044448852538
YES



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:03<00:00, 21.13it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 11
Accuracy: 0.4706 | Train loss: 6.732235449523926 | Valid loss: 2.111750601005554



100%|██████████| 391/391 [00:47<00:00,  8.32it/s]
100%|██████████| 79/79 [00:03<00:00, 23.76it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 12
Accuracy: 0.491 | Train loss: 6.604928817443848 | Valid loss: 1.989502505493164
YES


100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:04<00:00, 19.24it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 13
Accuracy: 0.4732 | Train loss: 6.499069280853272 | Valid loss: 2.1122471616744996



100%|██████████| 391/391 [00:46<00:00,  8.48it/s]
100%|██████████| 79/79 [00:03<00:00, 24.29it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 14
Accuracy: 0.4796 | Train loss: 6.411416404113769 | Valid loss: 2.1184360847473145



100%|██████████| 391/391 [00:46<00:00,  8.46it/s]
100%|██████████| 79/79 [00:03<00:00, 23.07it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 15
Accuracy: 0.5166 | Train loss: 6.342113076477051 | Valid loss: 1.9669174633026123
YES


100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:02<00:00, 26.79it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 16
Accuracy: 0.5269 | Train loss: 6.2869976669311525 | Valid loss: 1.8406429992675781
YES



100%|██████████| 391/391 [00:46<00:00,  8.46it/s]
100%|██████████| 79/79 [00:03<00:00, 21.39it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 17
Accuracy: 0.5345 | Train loss: 6.213721573486328 | Valid loss: 1.8288174810409545
YES



100%|██████████| 391/391 [00:46<00:00,  8.44it/s]
100%|██████████| 79/79 [00:03<00:00, 25.86it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 18
Accuracy: 0.5137 | Train loss: 6.1726793635559085 | Valid loss: 1.9466293655395508



100%|██████████| 391/391 [00:46<00:00,  8.43it/s]
100%|██████████| 79/79 [00:04<00:00, 18.25it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 19
Accuracy: 0.5137 | Train loss: 6.109864019317627 | Valid loss: 1.942410350227356


100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:02<00:00, 26.69it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 20
Accuracy: 0.5279 | Train loss: 6.0679750440979 | Valid loss: 1.8242299015045167


100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:04<00:00, 19.14it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 21
Accuracy: 0.5111 | Train loss: 6.015177316589355 | Valid loss: 1.9787889589309693



100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:02<00:00, 27.07it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 22
Accuracy: 0.5469 | Train loss: 6.0071317504882815 | Valid loss: 1.78942071723938
YES



100%|██████████| 391/391 [00:46<00:00,  8.34it/s]
100%|██████████| 79/79 [00:03<00:00, 24.56it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 23
Accuracy: 0.5284 | Train loss: 5.979992589416504 | Valid loss: 1.8350874305725098



100%|██████████| 391/391 [00:46<00:00,  8.43it/s]
100%|██████████| 79/79 [00:03<00:00, 24.86it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 24
Accuracy: 0.5191 | Train loss: 5.914753184356689 | Valid loss: 1.9094398063659668



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:03<00:00, 24.04it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 25
Accuracy: 0.5383 | Train loss: 5.903348768157959 | Valid loss: 1.7983540060043335


100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:03<00:00, 21.94it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 26
Accuracy: 0.5424 | Train loss: 5.8632181358337405 | Valid loss: 1.8247099960327149


100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:02<00:00, 27.56it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 27
Accuracy: 0.4973 | Train loss: 5.8467476675415035 | Valid loss: 2.155651400756836



100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:04<00:00, 18.25it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 28
Accuracy: 0.5314 | Train loss: 5.821039920043945 | Valid loss: 1.8616576099395752



100%|██████████| 391/391 [00:46<00:00,  8.33it/s]
100%|██████████| 79/79 [00:03<00:00, 26.12it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 29
Accuracy: 0.5454 | Train loss: 5.7920292984008785 | Valid loss: 1.7970846797943114



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:03<00:00, 25.16it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 30
Accuracy: 0.536 | Train loss: 5.784130750579834 | Valid loss: 1.85265080909729



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:03<00:00, 25.55it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 31
Accuracy: 0.5298 | Train loss: 5.763003161621094 | Valid loss: 1.852724404335022



100%|██████████| 391/391 [00:47<00:00,  8.30it/s]
100%|██████████| 79/79 [00:03<00:00, 25.51it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 32
Accuracy: 0.5393 | Train loss: 5.754849680328369 | Valid loss: 1.801655613708496



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:03<00:00, 22.55it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 33
Accuracy: 0.5431 | Train loss: 5.739804600219727 | Valid loss: 1.800593229675293



100%|██████████| 391/391 [00:46<00:00,  8.36it/s]
100%|██████████| 79/79 [00:03<00:00, 26.12it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 34
Accuracy: 0.5631 | Train loss: 5.7007888047790525 | Valid loss: 1.7283151929855347
YES



100%|██████████| 391/391 [00:46<00:00,  8.36it/s]
100%|██████████| 79/79 [00:03<00:00, 23.84it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 35
Accuracy: 0.5313 | Train loss: 5.699866780700684 | Valid loss: 1.8411972116470336



100%|██████████| 391/391 [00:46<00:00,  8.40it/s]
100%|██████████| 79/79 [00:02<00:00, 27.13it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 36
Accuracy: 0.5495 | Train loss: 5.719936823425293 | Valid loss: 1.7904563316345214



100%|██████████| 391/391 [00:46<00:00,  8.36it/s]
100%|██████████| 79/79 [00:03<00:00, 24.11it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 37
Accuracy: 0.5433 | Train loss: 5.684247231445313 | Valid loss: 1.8515059209823608



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:02<00:00, 27.16it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 38
Accuracy: 0.5244 | Train loss: 5.678430082397461 | Valid loss: 1.9514253231048584



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:02<00:00, 27.52it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 39
Accuracy: 0.5231 | Train loss: 5.652159736633301 | Valid loss: 1.92140245552063



100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:02<00:00, 27.39it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 40
Accuracy: 0.5344 | Train loss: 5.630550619354248 | Valid loss: 1.8409625888824463



100%|██████████| 391/391 [00:46<00:00,  8.40it/s]
100%|██████████| 79/79 [00:02<00:00, 26.58it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 41
Accuracy: 0.5506 | Train loss: 5.6278877137756345 | Valid loss: 1.7919261806488036



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:02<00:00, 27.38it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 42
Accuracy: 0.5453 | Train loss: 5.625197903289795 | Valid loss: 1.7689904914855956



100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:02<00:00, 27.55it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 43
Accuracy: 0.5154 | Train loss: 5.606600997772217 | Valid loss: 1.947978009223938



100%|██████████| 391/391 [00:46<00:00,  8.44it/s]
100%|██████████| 79/79 [00:02<00:00, 27.24it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 44
Accuracy: 0.5556 | Train loss: 5.603478600311279 | Valid loss: 1.723874584197998



100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:03<00:00, 26.25it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 45
Accuracy: 0.561 | Train loss: 5.6051065409851075 | Valid loss: 1.6654225303649903



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:02<00:00, 26.57it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 46
Accuracy: 0.5236 | Train loss: 5.577453545684815 | Valid loss: 1.914587382888794



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:03<00:00, 23.96it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 47
Accuracy: 0.5784 | Train loss: 5.567607305297852 | Valid loss: 1.6662926879882813
YES


100%|██████████| 391/391 [00:46<00:00,  8.41it/s]
100%|██████████| 79/79 [00:02<00:00, 26.83it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 48
Accuracy: 0.5582 | Train loss: 5.588811587219238 | Valid loss: 1.7529407402038575



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:03<00:00, 26.32it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 49
Accuracy: 0.5009 | Train loss: 5.558875791168213 | Valid loss: 2.129378884124756



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:03<00:00, 26.30it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 50
Accuracy: 0.527 | Train loss: 5.544875475006103 | Valid loss: 1.8342727130889893



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:02<00:00, 27.27it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 51
Accuracy: 0.5481 | Train loss: 5.5394952272033695 | Valid loss: 1.7964753721237183



100%|██████████| 391/391 [00:46<00:00,  8.44it/s]
100%|██████████| 79/79 [00:03<00:00, 22.64it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 52
Accuracy: 0.5522 | Train loss: 5.547942945404053 | Valid loss: 1.7941687599182128



100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:02<00:00, 27.28it/s]


Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 53
Accuracy: 0.5438 | Train loss: 5.5101395584106445 | Valid loss: 1.7532272354125977


100%|██████████| 391/391 [00:46<00:00,  8.40it/s]
100%|██████████| 79/79 [00:04<00:00, 19.52it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 54
Accuracy: 0.5575 | Train loss: 5.518982931518555 | Valid loss: 1.7598959617614747



100%|██████████| 391/391 [00:46<00:00,  8.36it/s]
100%|██████████| 79/79 [00:02<00:00, 26.53it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 55
Accuracy: 0.5241 | Train loss: 5.530556439971924 | Valid loss: 1.9833354740142821



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:04<00:00, 18.56it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 56
Accuracy: 0.5457 | Train loss: 5.530064601135254 | Valid loss: 1.8477675491333008



100%|██████████| 391/391 [00:47<00:00,  8.29it/s]
100%|██████████| 79/79 [00:03<00:00, 26.07it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 57
Accuracy: 0.5382 | Train loss: 5.5142739268493655 | Valid loss: 1.8588215648651123



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:03<00:00, 24.24it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 58
Accuracy: 0.4973 | Train loss: 5.526522110748291 | Valid loss: 2.0887818508148195



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:03<00:00, 23.76it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Epoch 59
Accuracy: 0.5686 | Train loss: 5.511076864471436 | Valid loss: 1.7113307411193848



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:03<00:00, 21.98it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 60
Accuracy: 0.5074 | Train loss: 5.512207584991455 | Valid loss: 2.026469123077393



100%|██████████| 391/391 [00:46<00:00,  8.34it/s]
100%|██████████| 79/79 [00:03<00:00, 21.80it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 61
Accuracy: 0.6615 | Train loss: 4.4587554602050785 | Valid loss: 1.2406431327819825
YES



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:02<00:00, 27.48it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 62
Accuracy: 0.6688 | Train loss: 4.1462489639282225 | Valid loss: 1.232780694580078
YES



100%|██████████| 391/391 [00:46<00:00,  8.32it/s]
100%|██████████| 79/79 [00:03<00:00, 26.25it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 63
Accuracy: 0.6706 | Train loss: 4.038261229248047 | Valid loss: 1.2135851291656494
YES



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:02<00:00, 27.76it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 64
Accuracy: 0.6725 | Train loss: 3.977205286254883 | Valid loss: 1.2172750869750977
YES



100%|██████████| 391/391 [00:46<00:00,  8.40it/s]
100%|██████████| 79/79 [00:03<00:00, 21.43it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 65
Accuracy: 0.6741 | Train loss: 3.9107290068817138 | Valid loss: 1.209852399635315
YES



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:02<00:00, 27.07it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 66
Accuracy: 0.6785 | Train loss: 3.876210212554932 | Valid loss: 1.2058720495223998
YES



100%|██████████| 391/391 [00:46<00:00,  8.41it/s]
100%|██████████| 79/79 [00:03<00:00, 26.18it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 67
Accuracy: 0.6767 | Train loss: 3.842342932281494 | Valid loss: 1.212095862197876



100%|██████████| 391/391 [00:47<00:00,  8.24it/s]
100%|██████████| 79/79 [00:03<00:00, 22.36it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 68
Accuracy: 0.6774 | Train loss: 3.8063061546325683 | Valid loss: 1.220715309906006



100%|██████████| 391/391 [00:46<00:00,  8.36it/s]
100%|██████████| 79/79 [00:03<00:00, 26.29it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 69
Accuracy: 0.6762 | Train loss: 3.7769437964630126 | Valid loss: 1.2006689447402954



100%|██████████| 391/391 [00:47<00:00,  8.24it/s]
100%|██████████| 79/79 [00:03<00:00, 20.12it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 70
Accuracy: 0.6795 | Train loss: 3.778866645965576 | Valid loss: 1.216966216468811
YES



100%|██████████| 391/391 [00:46<00:00,  8.34it/s]
100%|██████████| 79/79 [00:02<00:00, 27.27it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 71
Accuracy: 0.6739 | Train loss: 3.7544679537963868 | Valid loss: 1.2117611335754395



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:02<00:00, 26.76it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 72
Accuracy: 0.6807 | Train loss: 3.7319870726013185 | Valid loss: 1.2055486173629761
YES



100%|██████████| 391/391 [00:47<00:00,  8.24it/s]
100%|██████████| 79/79 [00:03<00:00, 23.42it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 73
Accuracy: 0.6784 | Train loss: 3.718319437484741 | Valid loss: 1.2038278818130492



100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:02<00:00, 27.49it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 74
Accuracy: 0.6784 | Train loss: 3.7096952378845214 | Valid loss: 1.213663073158264



100%|██████████| 391/391 [00:47<00:00,  8.29it/s]
100%|██████████| 79/79 [00:03<00:00, 20.59it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 75
Accuracy: 0.6771 | Train loss: 3.7077640450286866 | Valid loss: 1.2140300672531128



100%|██████████| 391/391 [00:46<00:00,  8.43it/s]
100%|██████████| 79/79 [00:02<00:00, 26.93it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 76
Accuracy: 0.6714 | Train loss: 3.694904107055664 | Valid loss: 1.2505081182479858



100%|██████████| 391/391 [00:46<00:00,  8.36it/s]
100%|██████████| 79/79 [00:03<00:00, 25.61it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 77
Accuracy: 0.6748 | Train loss: 3.6939777378082277 | Valid loss: 1.22866090965271



100%|██████████| 391/391 [00:47<00:00,  8.29it/s]
100%|██████████| 79/79 [00:03<00:00, 25.82it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 78
Accuracy: 0.6677 | Train loss: 3.6771439533996584 | Valid loss: 1.2565580011367798



100%|██████████| 391/391 [00:47<00:00,  8.32it/s]
100%|██████████| 79/79 [00:02<00:00, 26.89it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 79
Accuracy: 0.6792 | Train loss: 3.683374750213623 | Valid loss: 1.2217558500289918



100%|██████████| 391/391 [00:46<00:00,  8.32it/s]
100%|██████████| 79/79 [00:04<00:00, 17.91it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 80
Accuracy: 0.6717 | Train loss: 3.6753223430633546 | Valid loss: 1.2662193830490112



100%|██████████| 391/391 [00:46<00:00,  8.33it/s]
100%|██████████| 79/79 [00:03<00:00, 26.07it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 81
Accuracy: 0.6735 | Train loss: 3.6577010511016845 | Valid loss: 1.2286179502487182


100%|██████████| 391/391 [00:47<00:00,  8.30it/s]
100%|██████████| 79/79 [00:03<00:00, 23.18it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 82
Accuracy: 0.6805 | Train loss: 3.6729037600708008 | Valid loss: 1.2058833898544312



100%|██████████| 391/391 [00:46<00:00,  8.32it/s]
100%|██████████| 79/79 [00:02<00:00, 27.02it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 83
Accuracy: 0.6749 | Train loss: 3.671122861175537 | Valid loss: 1.2248349397659302



100%|██████████| 391/391 [00:47<00:00,  8.28it/s]
100%|██████████| 79/79 [00:02<00:00, 27.39it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 84
Accuracy: 0.6702 | Train loss: 3.666568038330078 | Valid loss: 1.2647655139923095



100%|██████████| 391/391 [00:46<00:00,  8.40it/s]
100%|██████████| 79/79 [00:04<00:00, 19.33it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 85
Accuracy: 0.6754 | Train loss: 3.6541758146667482 | Valid loss: 1.2333542726516724



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:02<00:00, 26.87it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 86
Accuracy: 0.6705 | Train loss: 3.6752543982696535 | Valid loss: 1.2588416343688964



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:03<00:00, 24.35it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 87
Accuracy: 0.6717 | Train loss: 3.6554595380401613 | Valid loss: 1.2632398635864257



100%|██████████| 391/391 [00:47<00:00,  8.26it/s]
100%|██████████| 79/79 [00:02<00:00, 26.83it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 88
Accuracy: 0.6756 | Train loss: 3.6535015895080565 | Valid loss: 1.2325811464309693



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:02<00:00, 26.36it/s]


Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 89
Accuracy: 0.6677 | Train loss: 3.6596383280944824 | Valid loss: 1.2814587722778321


100%|██████████| 391/391 [00:47<00:00,  8.25it/s]
100%|██████████| 79/79 [00:02<00:00, 27.06it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 90
Accuracy: 0.6678 | Train loss: 3.643154190597534 | Valid loss: 1.2726959217071534



100%|██████████| 391/391 [00:46<00:00,  8.46it/s]
100%|██████████| 79/79 [00:03<00:00, 26.13it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 91
Accuracy: 0.6608 | Train loss: 3.6375862493133546 | Valid loss: 1.3224870903015136



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:02<00:00, 28.41it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 92
Accuracy: 0.6738 | Train loss: 3.641625475997925 | Valid loss: 1.2569480657577514



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:02<00:00, 27.04it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 93
Accuracy: 0.6754 | Train loss: 3.6473416079711916 | Valid loss: 1.2312198692321776



100%|██████████| 391/391 [00:46<00:00,  8.43it/s]
100%|██████████| 79/79 [00:03<00:00, 24.88it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 94
Accuracy: 0.6639 | Train loss: 3.6553319646453857 | Valid loss: 1.2853266590118408



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:02<00:00, 28.02it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 95
Accuracy: 0.6673 | Train loss: 3.6421148973846433 | Valid loss: 1.2646809429168702



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:02<00:00, 27.30it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 96
Accuracy: 0.6684 | Train loss: 3.639780238952637 | Valid loss: 1.2656524417877197



100%|██████████| 391/391 [00:46<00:00,  8.34it/s]
100%|██████████| 79/79 [00:02<00:00, 27.43it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 97
Accuracy: 0.6563 | Train loss: 3.6355058115386965 | Valid loss: 1.3394702278137207



100%|██████████| 391/391 [00:46<00:00,  8.47it/s]
100%|██████████| 79/79 [00:03<00:00, 19.82it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 98
Accuracy: 0.6672 | Train loss: 3.641788819580078 | Valid loss: 1.2820359783172608



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:03<00:00, 26.00it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 99
Accuracy: 0.6703 | Train loss: 3.633244655075073 | Valid loss: 1.2783400806427



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:03<00:00, 24.90it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 100
Accuracy: 0.6658 | Train loss: 3.6421554260253908 | Valid loss: 1.2788547494888305



100%|██████████| 391/391 [00:47<00:00,  8.30it/s]
100%|██████████| 79/79 [00:03<00:00, 26.03it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 101
Accuracy: 0.6622 | Train loss: 3.6442786494445802 | Valid loss: 1.293389630508423



100%|██████████| 391/391 [00:46<00:00,  8.47it/s]
100%|██████████| 79/79 [00:05<00:00, 15.77it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 102
Accuracy: 0.6603 | Train loss: 3.637704542388916 | Valid loss: 1.3164223079681396



100%|██████████| 391/391 [00:46<00:00,  8.47it/s]
100%|██████████| 79/79 [00:02<00:00, 28.69it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 103
Accuracy: 0.6746 | Train loss: 3.6374533066558836 | Valid loss: 1.2461668933868408



100%|██████████| 391/391 [00:47<00:00,  8.31it/s]
100%|██████████| 79/79 [00:03<00:00, 21.63it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 104
Accuracy: 0.6645 | Train loss: 3.618164414138794 | Valid loss: 1.292273847770691



100%|██████████| 391/391 [00:46<00:00,  8.41it/s]
100%|██████████| 79/79 [00:02<00:00, 27.67it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 105
Accuracy: 0.6571 | Train loss: 3.6329120053100588 | Valid loss: 1.3213490909576415



100%|██████████| 391/391 [00:46<00:00,  8.49it/s]
100%|██████████| 79/79 [00:04<00:00, 18.20it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 106
Accuracy: 0.6619 | Train loss: 3.605947534942627 | Valid loss: 1.3196687398910523



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:03<00:00, 25.29it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 107
Accuracy: 0.6653 | Train loss: 3.6192361067199705 | Valid loss: 1.2894280393600464



100%|██████████| 391/391 [00:46<00:00,  8.32it/s]
100%|██████████| 79/79 [00:02<00:00, 26.92it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 108
Accuracy: 0.6607 | Train loss: 3.6225466202545165 | Valid loss: 1.3233214834213256



100%|██████████| 391/391 [00:46<00:00,  8.40it/s]
100%|██████████| 79/79 [00:03<00:00, 22.65it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 109
Accuracy: 0.6654 | Train loss: 3.603583647766113 | Valid loss: 1.2770719398498536



100%|██████████| 391/391 [00:45<00:00,  8.53it/s]
100%|██████████| 79/79 [00:03<00:00, 22.54it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 110
Accuracy: 0.6695 | Train loss: 3.617936156387329 | Valid loss: 1.2903870344161987



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:03<00:00, 24.15it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 111
Accuracy: 0.67 | Train loss: 3.6068959538269043 | Valid loss: 1.281130094718933



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:03<00:00, 25.97it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 112
Accuracy: 0.6735 | Train loss: 3.606205608596802 | Valid loss: 1.2474442405700683



100%|██████████| 391/391 [00:46<00:00,  8.40it/s]
100%|██████████| 79/79 [00:03<00:00, 24.26it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 113
Accuracy: 0.6652 | Train loss: 3.601406053466797 | Valid loss: 1.2874663202285768



100%|██████████| 391/391 [00:46<00:00,  8.49it/s]
100%|██████████| 79/79 [00:02<00:00, 26.59it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 114
Accuracy: 0.6702 | Train loss: 3.598729217453003 | Valid loss: 1.295976512336731



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:03<00:00, 25.10it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 115
Accuracy: 0.6692 | Train loss: 3.5921397598266602 | Valid loss: 1.2849313255310058



100%|██████████| 391/391 [00:46<00:00,  8.32it/s]
100%|██████████| 79/79 [00:02<00:00, 26.87it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 116
Accuracy: 0.6606 | Train loss: 3.6091603693389893 | Valid loss: 1.3115011528015137



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:04<00:00, 19.55it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 117
Accuracy: 0.6756 | Train loss: 3.588989895019531 | Valid loss: 1.2983760431289673



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:02<00:00, 27.28it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 118
Accuracy: 0.6728 | Train loss: 3.596020030212402 | Valid loss: 1.245433082008362



100%|██████████| 391/391 [00:46<00:00,  8.33it/s]
100%|██████████| 79/79 [00:03<00:00, 26.11it/s]

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch 119
Accuracy: 0.6718 | Train loss: 3.581015878982544 | Valid loss: 1.2779916482925415



100%|██████████| 391/391 [00:47<00:00,  8.30it/s]
100%|██████████| 79/79 [00:03<00:00, 24.12it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 120
Accuracy: 0.6602 | Train loss: 3.571062819519043 | Valid loss: 1.3157424486160278



100%|██████████| 391/391 [00:45<00:00,  8.53it/s]
100%|██████████| 79/79 [00:02<00:00, 28.00it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 121
Accuracy: 0.7008 | Train loss: 3.1732457691955567 | Valid loss: 1.1379299982070923
YES


100%|██████████| 391/391 [00:47<00:00,  8.23it/s]
100%|██████████| 79/79 [00:03<00:00, 24.76it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 122
Accuracy: 0.7046 | Train loss: 3.0505182231903074 | Valid loss: 1.1252947193145753
YES



100%|██████████| 391/391 [00:47<00:00,  8.30it/s]
100%|██████████| 79/79 [00:02<00:00, 26.83it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 123
Accuracy: 0.7069 | Train loss: 3.0159090577697754 | Valid loss: 1.1207727922439574
YES



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:04<00:00, 18.21it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 124
Accuracy: 0.7046 | Train loss: 2.9960100453186036 | Valid loss: 1.119762621307373



100%|██████████| 391/391 [00:45<00:00,  8.51it/s]
100%|██████████| 79/79 [00:02<00:00, 26.75it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 125
Accuracy: 0.706 | Train loss: 2.966465616455078 | Valid loss: 1.1252638647079467



100%|██████████| 391/391 [00:47<00:00,  8.26it/s]
100%|██████████| 79/79 [00:02<00:00, 26.67it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 126
Accuracy: 0.7068 | Train loss: 2.959488553237915 | Valid loss: 1.1221457405090332



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:02<00:00, 27.01it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 127
Accuracy: 0.708 | Train loss: 2.949776927108765 | Valid loss: 1.1260947546005249
YES



100%|██████████| 391/391 [00:46<00:00,  8.40it/s]
100%|██████████| 79/79 [00:03<00:00, 20.34it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 128
Accuracy: 0.7058 | Train loss: 2.9455140126037596 | Valid loss: 1.1192431980133057



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:03<00:00, 24.23it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 129
Accuracy: 0.7093 | Train loss: 2.9332597773742677 | Valid loss: 1.120528087615967
YES



100%|██████████| 391/391 [00:47<00:00,  8.30it/s]
100%|██████████| 79/79 [00:02<00:00, 27.02it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 130
Accuracy: 0.7035 | Train loss: 2.9310856030273436 | Valid loss: 1.12683748588562



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:03<00:00, 24.40it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 131
Accuracy: 0.7068 | Train loss: 2.9066612495422364 | Valid loss: 1.1278428031921386



100%|██████████| 391/391 [00:45<00:00,  8.53it/s]
100%|██████████| 79/79 [00:02<00:00, 29.54it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 132
Accuracy: 0.7042 | Train loss: 2.9095403623199463 | Valid loss: 1.1240644445419312



100%|██████████| 391/391 [00:46<00:00,  8.36it/s]
100%|██████████| 79/79 [00:03<00:00, 24.11it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 133
Accuracy: 0.7079 | Train loss: 2.9066146968078614 | Valid loss: 1.1256698734283448



100%|██████████| 391/391 [00:46<00:00,  8.32it/s]
100%|██████████| 79/79 [00:02<00:00, 27.23it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 134
Accuracy: 0.7039 | Train loss: 2.899545623474121 | Valid loss: 1.1225306465148925



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:04<00:00, 18.18it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 135
Accuracy: 0.7072 | Train loss: 2.89619903175354 | Valid loss: 1.1245692293167113



100%|██████████| 391/391 [00:46<00:00,  8.40it/s]
100%|██████████| 79/79 [00:02<00:00, 26.70it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 136
Accuracy: 0.7034 | Train loss: 2.884283203125 | Valid loss: 1.1203717613220214



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:03<00:00, 26.15it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 137
Accuracy: 0.7056 | Train loss: 2.886072237548828 | Valid loss: 1.1269243579864503



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:02<00:00, 26.81it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 138
Accuracy: 0.7041 | Train loss: 2.8706466149902345 | Valid loss: 1.1275426343917847



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:02<00:00, 27.05it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 139
Accuracy: 0.7053 | Train loss: 2.8762020412445066 | Valid loss: 1.1265592601776122



100%|██████████| 391/391 [00:46<00:00,  8.32it/s]
100%|██████████| 79/79 [00:03<00:00, 25.49it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 140
Accuracy: 0.7056 | Train loss: 2.8663806771087645 | Valid loss: 1.1264960733413696



100%|██████████| 391/391 [00:47<00:00,  8.28it/s]
100%|██████████| 79/79 [00:02<00:00, 27.21it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 141
Accuracy: 0.7066 | Train loss: 2.856396382446289 | Valid loss: 1.1266134956359863



100%|██████████| 391/391 [00:46<00:00,  8.49it/s]
100%|██████████| 79/79 [00:04<00:00, 19.16it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 142
Accuracy: 0.7045 | Train loss: 2.867585587539673 | Valid loss: 1.1301038736343383



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:03<00:00, 26.12it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 143
Accuracy: 0.7052 | Train loss: 2.8489874280548095 | Valid loss: 1.133769365119934



100%|██████████| 391/391 [00:46<00:00,  8.32it/s]
100%|██████████| 79/79 [00:02<00:00, 27.20it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 144
Accuracy: 0.7055 | Train loss: 2.8581618798065187 | Valid loss: 1.133059458732605



100%|██████████| 391/391 [00:46<00:00,  8.33it/s]
100%|██████████| 79/79 [00:03<00:00, 20.03it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 145
Accuracy: 0.7042 | Train loss: 2.8393169886016847 | Valid loss: 1.1377997247695923



100%|██████████| 391/391 [00:46<00:00,  8.47it/s]
100%|██████████| 79/79 [00:03<00:00, 23.96it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 146
Accuracy: 0.7044 | Train loss: 2.8643263152313234 | Valid loss: 1.132852089691162



100%|██████████| 391/391 [00:47<00:00,  8.31it/s]
100%|██████████| 79/79 [00:03<00:00, 24.19it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 147
Accuracy: 0.7046 | Train loss: 2.849344277038574 | Valid loss: 1.1297121854782104



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:03<00:00, 22.67it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 148
Accuracy: 0.7053 | Train loss: 2.833729165802002 | Valid loss: 1.125423575592041



100%|██████████| 391/391 [00:45<00:00,  8.51it/s]
100%|██████████| 79/79 [00:03<00:00, 23.74it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 149
Accuracy: 0.704 | Train loss: 2.842831024246216 | Valid loss: 1.1338327320098878



100%|██████████| 391/391 [00:47<00:00,  8.30it/s]
100%|██████████| 79/79 [00:02<00:00, 27.60it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 150
Accuracy: 0.7048 | Train loss: 2.8430793476867677 | Valid loss: 1.1369535120010377



100%|██████████| 391/391 [00:47<00:00,  8.29it/s]
100%|██████████| 79/79 [00:03<00:00, 25.33it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 151
Accuracy: 0.7038 | Train loss: 2.8324102950286867 | Valid loss: 1.1436183839797973



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:04<00:00, 19.04it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 152
Accuracy: 0.7027 | Train loss: 2.8222560209655763 | Valid loss: 1.1359959791183472


100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:02<00:00, 29.00it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 153
Accuracy: 0.7055 | Train loss: 2.825822731933594 | Valid loss: 1.1429097511291504



100%|██████████| 391/391 [00:46<00:00,  8.32it/s]
100%|██████████| 79/79 [00:02<00:00, 27.29it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 154
Accuracy: 0.7034 | Train loss: 2.8287013055419923 | Valid loss: 1.1447954845428467



100%|██████████| 391/391 [00:46<00:00,  8.37it/s]
100%|██████████| 79/79 [00:03<00:00, 22.38it/s]


Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 155
Accuracy: 0.7039 | Train loss: 2.8207297440338133 | Valid loss: 1.1435341430664063


100%|██████████| 391/391 [00:46<00:00,  8.50it/s]
100%|██████████| 79/79 [00:03<00:00, 25.12it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 156
Accuracy: 0.7045 | Train loss: 2.8152401638031006 | Valid loss: 1.1374606746673583



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:02<00:00, 27.72it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 157
Accuracy: 0.7032 | Train loss: 2.8178942218017577 | Valid loss: 1.13908614654541



100%|██████████| 391/391 [00:47<00:00,  8.22it/s]
100%|██████████| 79/79 [00:03<00:00, 24.88it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 158
Accuracy: 0.7031 | Train loss: 2.8108020097351076 | Valid loss: 1.1350353942871094



100%|██████████| 391/391 [00:45<00:00,  8.52it/s]
100%|██████████| 79/79 [00:02<00:00, 28.52it/s]

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch 159
Accuracy: 0.7055 | Train loss: 2.816058041229248 | Valid loss: 1.142383703804016



100%|██████████| 391/391 [00:47<00:00,  8.31it/s]
100%|██████████| 79/79 [00:03<00:00, 24.55it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 160
Accuracy: 0.7024 | Train loss: 2.7995174434661867 | Valid loss: 1.1342228927612306



100%|██████████| 391/391 [00:47<00:00,  8.30it/s]
100%|██████████| 79/79 [00:03<00:00, 23.11it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 161
Accuracy: 0.7051 | Train loss: 2.751010984954834 | Valid loss: 1.1275232753753661



100%|██████████| 391/391 [00:45<00:00,  8.53it/s]
100%|██████████| 79/79 [00:04<00:00, 16.29it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 162
Accuracy: 0.7068 | Train loss: 2.7445163854980468 | Valid loss: 1.1272436853408814



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:02<00:00, 28.55it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 163
Accuracy: 0.7072 | Train loss: 2.7306495822143555 | Valid loss: 1.128717765045166



100%|██████████| 391/391 [00:47<00:00,  8.29it/s]
100%|██████████| 79/79 [00:02<00:00, 28.61it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 164
Accuracy: 0.7068 | Train loss: 2.738125458908081 | Valid loss: 1.1298054048538209



100%|██████████| 391/391 [00:46<00:00,  8.47it/s]
100%|██████████| 79/79 [00:04<00:00, 16.05it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 165
Accuracy: 0.7066 | Train loss: 2.7332545845794676 | Valid loss: 1.1286318887710571



100%|██████████| 391/391 [00:46<00:00,  8.46it/s]
100%|██████████| 79/79 [00:02<00:00, 29.31it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 166
Accuracy: 0.7082 | Train loss: 2.729515671157837 | Valid loss: 1.1304402770996094



100%|██████████| 391/391 [00:46<00:00,  8.33it/s]
100%|██████████| 79/79 [00:03<00:00, 22.58it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 167
Accuracy: 0.7084 | Train loss: 2.7333009367370606 | Valid loss: 1.12527781124115



100%|██████████| 391/391 [00:46<00:00,  8.39it/s]
100%|██████████| 79/79 [00:03<00:00, 22.55it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 168
Accuracy: 0.705 | Train loss: 2.722669853363037 | Valid loss: 1.13027849445343



100%|██████████| 391/391 [00:46<00:00,  8.47it/s]
100%|██████████| 79/79 [00:03<00:00, 24.79it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 169
Accuracy: 0.707 | Train loss: 2.7244901027679442 | Valid loss: 1.129777438545227



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:02<00:00, 28.05it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 170
Accuracy: 0.7075 | Train loss: 2.720713637161255 | Valid loss: 1.1276940435409546



100%|██████████| 391/391 [00:47<00:00,  8.32it/s]
100%|██████████| 79/79 [00:03<00:00, 22.23it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 171
Accuracy: 0.7085 | Train loss: 2.7290458849334716 | Valid loss: 1.127722472000122



100%|██████████| 391/391 [00:45<00:00,  8.51it/s]
100%|██████████| 79/79 [00:02<00:00, 28.01it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 172
Accuracy: 0.7072 | Train loss: 2.725581100769043 | Valid loss: 1.1282091417312623



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:03<00:00, 22.01it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 173
Accuracy: 0.7078 | Train loss: 2.7160228829956057 | Valid loss: 1.1288926706314086



100%|██████████| 391/391 [00:47<00:00,  8.31it/s]
100%|██████████| 79/79 [00:03<00:00, 25.89it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 174
Accuracy: 0.706 | Train loss: 2.72040295173645 | Valid loss: 1.1285284238815307



100%|██████████| 391/391 [00:46<00:00,  8.50it/s]
100%|██████████| 79/79 [00:03<00:00, 25.55it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 175
Accuracy: 0.7058 | Train loss: 2.7289619549560546 | Valid loss: 1.1287186082839966



100%|██████████| 391/391 [00:47<00:00,  8.19it/s]
100%|██████████| 79/79 [00:03<00:00, 23.63it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 176
Accuracy: 0.707 | Train loss: 2.7195993060302732 | Valid loss: 1.1291679939270018



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:03<00:00, 24.77it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 177
Accuracy: 0.7078 | Train loss: 2.7240858666992187 | Valid loss: 1.132203190803528



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:03<00:00, 22.82it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 178
Accuracy: 0.7059 | Train loss: 2.7226063830566405 | Valid loss: 1.1287089853286743



100%|██████████| 391/391 [00:47<00:00,  8.28it/s]
100%|██████████| 79/79 [00:02<00:00, 27.32it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 179
Accuracy: 0.7068 | Train loss: 2.726799395370483 | Valid loss: 1.1276742794036865



100%|██████████| 391/391 [00:47<00:00,  8.17it/s]
100%|██████████| 79/79 [00:02<00:00, 27.94it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 180
Accuracy: 0.7071 | Train loss: 2.7141896379089356 | Valid loss: 1.1285750038146973



100%|██████████| 391/391 [00:46<00:00,  8.44it/s]
100%|██████████| 79/79 [00:05<00:00, 15.55it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 181
Accuracy: 0.7053 | Train loss: 2.7171689530944825 | Valid loss: 1.1330986804962158



100%|██████████| 391/391 [00:46<00:00,  8.41it/s]
100%|██████████| 79/79 [00:02<00:00, 27.83it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 182
Accuracy: 0.7074 | Train loss: 2.7161678841400145 | Valid loss: 1.1302015617370607



100%|██████████| 391/391 [00:47<00:00,  8.28it/s]
100%|██████████| 79/79 [00:03<00:00, 24.09it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 183
Accuracy: 0.7061 | Train loss: 2.7127628630065916 | Valid loss: 1.131193586540222



100%|██████████| 391/391 [00:46<00:00,  8.42it/s]
100%|██████████| 79/79 [00:04<00:00, 19.54it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 184
Accuracy: 0.7073 | Train loss: 2.712003807449341 | Valid loss: 1.1317324228286743



100%|██████████| 391/391 [00:46<00:00,  8.38it/s]
100%|██████████| 79/79 [00:02<00:00, 27.94it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 185
Accuracy: 0.7069 | Train loss: 2.7104144591522217 | Valid loss: 1.1301843032836913



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:02<00:00, 27.56it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 186
Accuracy: 0.7058 | Train loss: 2.7044338214111328 | Valid loss: 1.1294564264297486



100%|██████████| 391/391 [00:47<00:00,  8.25it/s]
100%|██████████| 79/79 [00:03<00:00, 22.30it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 187
Accuracy: 0.706 | Train loss: 2.7189943826293947 | Valid loss: 1.1339593357086182



100%|██████████| 391/391 [00:45<00:00,  8.54it/s]
100%|██████████| 79/79 [00:03<00:00, 25.49it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 188
Accuracy: 0.706 | Train loss: 2.708960012664795 | Valid loss: 1.1301390027999878



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:02<00:00, 29.31it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 189
Accuracy: 0.7066 | Train loss: 2.711976780090332 | Valid loss: 1.1268788215637207



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:03<00:00, 24.60it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 190
Accuracy: 0.7063 | Train loss: 2.6935719435119627 | Valid loss: 1.132045112991333



100%|██████████| 391/391 [00:46<00:00,  8.49it/s]
100%|██████████| 79/79 [00:02<00:00, 29.21it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 191
Accuracy: 0.7062 | Train loss: 2.699705531311035 | Valid loss: 1.1299947067260743



100%|██████████| 391/391 [00:46<00:00,  8.35it/s]
100%|██████████| 79/79 [00:03<00:00, 25.41it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 192
Accuracy: 0.7064 | Train loss: 2.7114665200805663 | Valid loss: 1.131903171157837



100%|██████████| 391/391 [00:47<00:00,  8.20it/s]
100%|██████████| 79/79 [00:02<00:00, 27.71it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 193
Accuracy: 0.7055 | Train loss: 2.707962063369751 | Valid loss: 1.131672529602051



100%|██████████| 391/391 [00:45<00:00,  8.51it/s]
100%|██████████| 79/79 [00:03<00:00, 25.06it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 194
Accuracy: 0.7059 | Train loss: 2.7080348672485353 | Valid loss: 1.1336706855773926



100%|██████████| 391/391 [00:47<00:00,  8.21it/s]
100%|██████████| 79/79 [00:02<00:00, 27.83it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 195
Accuracy: 0.7077 | Train loss: 2.7079046046447752 | Valid loss: 1.1293615447998047



100%|██████████| 391/391 [00:47<00:00,  8.27it/s]
100%|██████████| 79/79 [00:02<00:00, 27.87it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 196
Accuracy: 0.707 | Train loss: 2.709723265762329 | Valid loss: 1.1280424699783325



100%|██████████| 391/391 [00:46<00:00,  8.45it/s]
100%|██████████| 79/79 [00:02<00:00, 27.16it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 197
Accuracy: 0.7075 | Train loss: 2.703980841217041 | Valid loss: 1.132608974647522



100%|██████████| 391/391 [00:47<00:00,  8.22it/s]
100%|██████████| 79/79 [00:02<00:00, 28.43it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 198
Accuracy: 0.7071 | Train loss: 2.7045868785858156 | Valid loss: 1.1298591506958007



100%|██████████| 391/391 [00:47<00:00,  8.16it/s]
100%|██████████| 79/79 [00:03<00:00, 23.61it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 199
Accuracy: 0.7071 | Train loss: 2.704822714920044 | Valid loss: 1.1312798254013061



100%|██████████| 391/391 [00:45<00:00,  8.50it/s]
100%|██████████| 79/79 [00:02<00:00, 26.99it/s]

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 200
Accuracy: 0.7077 | Train loss: 2.705971658859253 | Valid loss: 1.1319500926971435



