# Imports and Installs

In [1]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
# import standard PyTorch modules
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter # TensorBoard support
import torch.profiler
# import torchvision module to handle image manipulation
import torchvision
from torchvision import datasets 
import torchvision.transforms as transforms
import torchvision.models as models
from torchsummary import summary
from tqdm import tqdm
# calculate train time, writing train data to files etc.
import time
import pandas as pd
import json
import math
import copy
from IPython.display import clear_output
from time import perf_counter
from torch.autograd import Variable
torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)     # On by default, leave it here for clarity

<torch.autograd.grad_mode.set_grad_enabled at 0x7fdba2d7fe80>

In [3]:
import numpy as np
from torch.utils.data import Dataset, DataLoader, random_split

In [4]:
import matplotlib.pyplot as plt
import seaborn

In [5]:
import matplotlib.image as mpimg

In [6]:
# check PyTorch versions
print(torch.__version__)
print(torchvision.__version__)

2.0.0+cu118
0.15.1+cu118


In [7]:
import wandb

# Logging into WandB


In [8]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mjain-49[0m ([33mdl-codes[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

# Loading Dataset


In [9]:
from torchvision import transforms
from torchvision.transforms import ToTensor, Normalize

train_transforms = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])


In [10]:
dataset_train = torchvision.datasets.CIFAR10('./data', download=True, train=True, transform=train_transforms)

# Download and load the test set
dataset_test = torchvision.datasets.CIFAR10('./data', download=True, train=False, transform=test_transforms)

Files already downloaded and verified
Files already downloaded and verified


In [11]:
trainset, valset = torch.utils.data.random_split(dataset_train, [40000, 10000])
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=0)
val_loader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True, num_workers=0)
test_loader = DataLoader(dataset_test, batch_size=64, shuffle=False)

# set cuda to device

In [12]:
#torch.cuda.memory_summary(device=None, abbreviated=True)

In [13]:
torch.cuda.empty_cache()

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


#Some key definitions

## Count of correct predictions

In [15]:
def log_test_predictions(images, labels, outputs, predicted, test_table, log_counter):
  # obtain confidence scores for all classes
  scores = F.softmax(outputs.data, dim=1)
  log_scores = scores.cpu().numpy()
  log_images = images.cpu().numpy()
  log_labels = labels.cpu().numpy()
  log_preds = predicted.cpu().numpy()
  # adding ids based on the order of the images
  _id = 0
  for i, l, p, s in zip(log_images, log_labels, log_preds, log_scores):
    # add required info to data table:
    # id, image pixels, model's guess, true label, scores for all classes
    img_id = str(_id) + "_" + str(log_counter)
    test_table.add_data(img_id, wandb.Image(np.transpose(i, (1, 2, 0))), p, l, *s)
    _id += 1
    if _id == NUM_IMAGES_PER_BATCH:
      break


In [16]:
def get_num_correct(preds, labels):
  return preds.argmax(dim=1).eq(labels).sum().item()

In [17]:
def get_topk_correct(preds,labels,k=5):
    batch_size = preds.shape[0]

    # get the top-5 predictions for each sample
    topk_values, topk_indices = torch.topk(preds, k=5, dim=1)

    # check if the target is in the top-5 predictions
    correct_topk = topk_indices.eq(labels.view(-1, 1).expand_as(topk_indices))
    return correct_topk
## calculate top-5 accuracy
#top5_accuracy = correct_topk.float().sum() / batch_size

## taking value of parameters and trying them in all combinations

In [18]:
# import modules to build RunBuilder and RunManager helper classes
from collections  import OrderedDict
from collections import namedtuple
from itertools import product

# Read in the hyper-parameters and return a Run namedtuple containing all the 
# combinations of hyper-parameters
class RunBuilder():
  @staticmethod
  def get_runs(params):

    Run = namedtuple('Run', params.keys())

    runs = []
    for v in product(*params.values()):
      runs.append(Run(*v))
    
    return runs

## defining Manager class 

This helps in calculating both train and validation accuracy and losses also help us to maintain a tensorboard and print all the data collected for every epoch and save it in a dataset 

In [19]:
# Helper class, help track loss, accuracy, epoch time, run time, 
# hyper-parameters etc. Also record to TensorBoard and write into csv, json
class RunManager():
  def __init__(self):

    # tracking every epoch count, loss, accuracy, time
    self.epoch_count = 0
    self.epoch_loss = 0
    self.epoch_val_loss = 0
    self.epoch_num_correct_train = 0
    self.epoch_num_correct_test = 0
    self.epoch_start_time = None

    # tracking every run count, run data, hyper-params used, time
    self.run_params = None
    self.run_count = 0
    self.run_data = []
    self.run_start_time = None

    # record model, loader and TensorBoard 
    self.network = None
    self.trainloader = None
    self.valloader = None
    self.tb = None

  # record the count, hyper-param, model, loader of each run
  # record sample images and network graph to TensorBoard  
  def begin_run(self, run, network, trainloader,valloader):

    self.run_start_time = time.time()

    self.run_params = run
    self.run_count += 1

    self.network = network
    self.trainloader = trainloader
    self.valloader = valloader
    self.tb = SummaryWriter(comment=f'-{run}')

    wandb.init(
        # set the wandb project where this run will be logged
        project="dl_project_2023",
        
        # track hyperparameters and run metadata
        config={
        "learning_rate": run.lr,
        "architecture": run.arch,
        "dataset": run.dataset,
        "epochs": 30,
        "activation_function":run.act_func,
        "Optimizer":run.opt,
        "Regularisation":run.reg
        }
    )
    #images, labels = next(iter(self.trainloader))
    #images, labels = images.to(device), labels.to(device)
    #grid = torchvision.utils.make_grid(images)

    #self.tb.add_image('images', grid)
    #self.tb.add_graph(self.network, images)

  # when run ends, close TensorBoard, zero epoch count
  def end_run(self):
    wandb.finish()
    self.tb.close()
    self.epoch_count = 0

  # zero epoch count, loss, accuracy, 
  def begin_epoch(self):
    self.epoch_start_time = time.time()

    self.epoch_count += 1
    self.epoch_loss = 0
    self.epoch_val_loss = 0
    self.epoch_num_correct_train = 0
    self.epoch_num_correct_test = 0
    print("Training Epoch",self.epoch_count)

  # 
  def end_epoch(self):
    # calculate epoch duration and run duration(accumulate)
    epoch_duration = time.time() - self.epoch_start_time
    run_duration = time.time() - self.run_start_time

    # record epoch loss and accuracy
    train_loss = self.epoch_loss / len(self.trainloader.dataset)
    val_loss = self.epoch_val_loss / len(self.valloader.dataset)
    train_accuracy = self.epoch_num_correct_train / len(self.trainloader.dataset)
    test_accuracy = self.epoch_num_correct_test / len(self.valloader.dataset)
    # Record epoch loss and accuracy to TensorBoard 
    self.tb.add_scalar('Train Loss', train_loss, self.epoch_count)
    self.tb.add_scalar('train Accuracy', train_accuracy, self.epoch_count)
    self.tb.add_scalar('Validation Loss', val_loss, self.epoch_count)
    self.tb.add_scalar('Validation Accuracy', test_accuracy, self.epoch_count)

    # Record params to TensorBoard
    #for name, param in self.network.named_parameters():
      #self.tb.add_histogram(name, param, self.epoch_count)
      #self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
    
    # Write into 'results' (OrderedDict) for all run related data
    results = OrderedDict()
    results["run"] = self.run_count
    results["epoch"] = self.epoch_count
    results["train_loss"] = train_loss
    results["val_loss"] = val_loss
    results["train_accuracy"] = train_accuracy
    results["val_accuracy"] = test_accuracy
    results["epoch duration"] = epoch_duration
    results["run duration"] = run_duration
    print("train_acc = ", train_accuracy, "train_loss = ", train_loss, "Validation_acc = ", test_accuracy, "Validation_loss = ", val_loss)
    wandb.log({"train_acc": train_accuracy, "train_loss": train_loss, "Validation_acc": test_accuracy, "Validation_loss": val_loss})
    
    # Record hyper-params into 'results'
    for k,v in self.run_params._asdict().items(): results[k] = v
    self.run_data.append(results)
    df = pd.DataFrame.from_dict(self.run_data, orient = 'columns')

    # display epoch information and show progress
    # clear_output(wait=True)
    # display(df)

  # accumulate loss of batch into entire epoch loss
  def track_loss(self, loss):
    # multiply batch size so variety of batch sizes can be compared
    self.epoch_loss += loss.item() * self.trainloader.batch_size
  def track_val_loss(self, loss):
    # multiply batch size so variety of batch sizes can be compared
    self.epoch_val_loss += loss.item() * self.valloader.batch_size
  # accumulate number of corrects of batch into entire epoch num_correct
  def track_num_correct_train(self, preds, labels):
    self.epoch_num_correct_train += self._get_num_correct(preds, labels)
  def track_num_correct_test(self, preds, labels):
    self.epoch_num_correct_test += self._get_num_correct(preds, labels)
  @torch.no_grad()
  def _get_num_correct(self, preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()
  
  # save end results of all runs into csv, json for further a
  def save(self, fileName):

    pd.DataFrame.from_dict(
        self.run_data, 
        orient = 'columns',
    ).to_csv(f'{fileName}.csv')

    with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
      json.dump(self.run_data, f, ensure_ascii=False, indent=4)

# Defining Shake Drop Regularisation


In [20]:
class ShakeDropFunction(torch.autograd.Function):

    @staticmethod
    def forward(ctx, x, training=True, p_drop=0.5, alpha_range=[-1, 1]):
        if training:
            gate = torch.cuda.FloatTensor([0]).bernoulli_(1 - p_drop)
            ctx.save_for_backward(gate)
            if gate.item() == 0:
                alpha = torch.cuda.FloatTensor(x.size(0)).uniform_(*alpha_range)
                alpha = alpha.view(alpha.size(0), 1, 1, 1).expand_as(x)
                return alpha * x
            else:
                return x
        else:
            return (1 - p_drop) * x

    @staticmethod
    def backward(ctx, grad_output):
        gate = ctx.saved_tensors[0]
        if gate.item() == 0:
            beta = torch.cuda.FloatTensor(grad_output.size(0)).uniform_(0, 1)
            beta = beta.view(beta.size(0), 1, 1, 1).expand_as(grad_output)
            beta = Variable(beta)
            return beta * grad_output, None, None, None
        else:
            return grad_output, None, None, None


class ShakeDrop(nn.Module):

    def __init__(self, p_drop=0.5, alpha_range=[-1, 1]):
        super(ShakeDrop, self).__init__()
        self.p_drop = p_drop
        self.alpha_range = alpha_range

    def forward(self, x):
        return ShakeDropFunction.apply(x, self.training, self.p_drop, self.alpha_range)

# Building Network as per given Instructions

In [21]:
def nothing(x):
  return x

In [22]:
def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    outchannel_ratio = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None, p_shakedrop = 1.0, reg='no_reg'):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes)
        self.bn3 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        if reg == 'shake-drop':
            self.drop = ShakeDrop(p_shakedrop)
        elif reg == 'dropout':
            self.drop = nn.Dropout(p=p_shakedrop)
        else:
            self.drop = nothing

    def forward(self, x):

        out = self.bn1(x)
        out = self.conv1(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn3(out)
        out = self.drop(out)

        if self.downsample is not None:
            shortcut = self.downsample(x)
            featuremap_size = shortcut.size()[2:4]
        else:
            shortcut = x
            featuremap_size = out.size()[2:4]

        batch_size = out.size()[0]
        residual_channel = out.size()[1]
        shortcut_channel = shortcut.size()[1]

        
        
        
        
        if residual_channel != shortcut_channel:
            padding = torch.autograd.Variable(
                torch.cuda.FloatTensor(batch_size, residual_channel - shortcut_channel, featuremap_size[0],
                                       featuremap_size[1]).fill_(0))
            out = out + torch.cat((shortcut, padding), 1)
        else:
            out = out + shortcut

        return out


class Bottleneck(nn.Module):
    outchannel_ratio = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None,p_shakedrop=1.0, reg='no_reg'):
        super(Bottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, (planes * 1), kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d((planes * 1))
        self.conv3 = nn.Conv2d((planes * 1), planes * Bottleneck.outchannel_ratio, kernel_size=1, bias=False)
        self.bn4 = nn.BatchNorm2d(planes * Bottleneck.outchannel_ratio)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
        if reg == 'shake-drop':
            self.drop = ShakeDrop(p_shakedrop)
        elif reg == 'dropout':
            self.drop = nn.Dropout(p=p_shakedrop)
        else:
            self.drop = nothing

    def forward(self, x):
        out = self.bn1(x)
        out = self.conv1(out)

        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)

        out = self.bn3(out)
        out = self.relu(out)
        out = self.conv3(out)

        out = self.bn4(out)

        out = self.drop(out)
        if self.downsample is not None:
            shortcut = self.downsample(x)
            featuremap_size = shortcut.size()[2:4]
        else:
            shortcut = x
            featuremap_size = out.size()[2:4]

        batch_size = out.size()[0]
        residual_channel = out.size()[1]
        shortcut_channel = shortcut.size()[1]

        if residual_channel != shortcut_channel:
            padding = torch.autograd.Variable(
                torch.cuda.FloatTensor(batch_size, residual_channel - shortcut_channel, featuremap_size[0],
                                       featuremap_size[1]).fill_(0))
            out += torch.cat((shortcut, padding), 1)
        else:
            out += shortcut

        return out


class PyramidNet_ShakeDrop(nn.Module):

    def __init__(self, depth, alpha, num_classes, bottleneck=False, reg='no_reg'):
        super(PyramidNet_ShakeDrop, self).__init__()
        blocks = {18: BasicBlock, 34: BasicBlock, 50: Bottleneck, 101: Bottleneck, 152: Bottleneck, 200: Bottleneck}
        layers_list = {18: [2, 2, 2, 2], 34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3],
                  200: [3, 24, 36, 3]}

        if layers_list.get(depth) is None:
            if bottleneck == True:
                blocks[depth] = Bottleneck
                temp_cfg = int((depth - 2) / 12)
            else:
                blocks[depth] = BasicBlock
                temp_cfg = int((depth - 2) / 8)

            layers_list[depth] = [temp_cfg, temp_cfg, temp_cfg, temp_cfg]
            print('=> the layer configuration for each stage is set to', layers_list[depth])

        # self.u_idx is the index of self.p_drop
        # self.p_drop is initialized to an geometric sequence, also can refer to the parameter setting method in the paper
        self.u_idx = 0
        all_depth = sum(layers_list[depth])
        self.p_drop = [0.5/all_depth * (i + 1) for i in range(all_depth)]

        self.inplanes = 64
        self.addrate = alpha / (sum(layers_list[depth]) * 1.0)

        self.input_featuremap_dim = self.inplanes
        # down1
        self.conv1 = nn.Conv2d(3, self.input_featuremap_dim, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.input_featuremap_dim)
        self.relu = nn.ReLU(inplace=True)
        # down2
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.featuremap_dim = self.input_featuremap_dim
        self.layer1 = self.pyramidal_make_layer(blocks[depth], layers_list[depth][0],reg=reg)
        # down 3,4,5
        self.layer2 = self.pyramidal_make_layer(blocks[depth], layers_list[depth][1], stride=2, reg=reg)
        self.layer3 = self.pyramidal_make_layer(blocks[depth], layers_list[depth][2], stride=2, reg=reg)
        self.layer4 = self.pyramidal_make_layer(blocks[depth], layers_list[depth][3], stride=2, reg=reg)

        self.final_featuremap_dim = self.input_featuremap_dim
        self.bn_final = nn.BatchNorm2d(self.final_featuremap_dim)
        self.relu_final = nn.ReLU(inplace=True)
        self.avgpool = nn.AvgPool2d(7)
        self.fc = nn.Linear(self.final_featuremap_dim, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def pyramidal_make_layer(self, block, block_depth, stride=1, reg='no_reg'):
        downsample = None
        if stride != 1:  # or self.inplanes != int(round(featuremap_dim_1st)) * block.outchannel_ratio:
            downsample = nn.AvgPool2d((2, 2), stride=(2, 2), ceil_mode=True)

        layers = []
        self.featuremap_dim = self.featuremap_dim + self.addrate
        layers.append(block(self.input_featuremap_dim, int(round(self.featuremap_dim)), stride, downsample,self.p_drop[self.u_idx],reg=reg))
        self.u_idx += 1
        for i in range(1, block_depth):
            temp_featuremap_dim = self.featuremap_dim + self.addrate
            layers.append(
                block(int(round(self.featuremap_dim)) * block.outchannel_ratio, int(round(temp_featuremap_dim)), 1, None,self.p_drop[self.u_idx]))
            self.u_idx += 1
            self.featuremap_dim = temp_featuremap_dim
        self.input_featuremap_dim = int(round(self.featuremap_dim)) * block.outchannel_ratio

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.bn_final(x)
        x = self.relu_final(x)
        # x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


# Training the Model

we write the code to train our network 

In [23]:
params = OrderedDict(
    lr = [.001],
    batch_size = [64],
    shuffle = [True],
    arch = ["pyramidNet-110"],
    dataset = ["Cifar10"],
    act_func = ["ReLU"],
    opt = ["ADAM"],
    reg = ['dropout']

)
epochs = 30
# Number of batches to log from the test data for each test step
# (default set low to simplify demo)
NUM_BATCHES_TO_LOG = 10 #79

# Number of images to log per test batch
# (default set low to simplify demo)
NUM_IMAGES_PER_BATCH = 32 #128

In [24]:
m = RunManager()



for run in RunBuilder.get_runs(params):
    
    model = PyramidNet_ShakeDrop(depth=110, alpha=270, num_classes=10, reg=run.reg).to(device)
    
    if run.act_func == "Tanh":
        model.relu = nn.Tanh()
    
    if run.opt == "ADAM":
        optimizer = optim.Adam(model.parameters(), lr=run.lr)
    elif run.opt == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=run.lr)
    
    m.begin_run(run, model, train_loader,val_loader)
    for epoch in range(epochs):
        
        m.begin_epoch()
        batch_count = 0
        for batch in tqdm(train_loader):
            images = batch[0]
            labels = batch[1]
            images, labels = images.to(device), labels.to(device)
            preds = model(images)
            loss = F.cross_entropy(preds, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            m.track_loss(loss)
            m.track_num_correct_train(preds, labels)
        for batch in tqdm(val_loader):
        
            images = batch[0]
            labels = batch[1]
            images, labels = images.to(device), labels.to(device)
            preds = model(images)
            loss = F.cross_entropy(preds, labels)
            m.track_val_loss(loss)
            m.track_num_correct_test(preds, labels)
        
        m.end_epoch()
    columns=["id", "image", "guess", "truth"]
    for digit in range(10):
      columns.append("score_" + str(digit))
    test_table = wandb.Table(columns=columns)
    log_counter = 0
    sum = 0
    for batch in test_loader:
            
        images = batch[0]
        labels = batch[1]
        images, labels = images.to(device), labels.to(device)
        preds = model(images)
        _, predicted = torch.max(preds.data, 1)
        #loss = F.cross_entropy(preds, labels)

        #optimizer.zero_grad()
        #loss.backward()
        #optimizer.step()
        if log_counter < NUM_BATCHES_TO_LOG:
            log_test_predictions(images, labels, preds, predicted, test_table, log_counter)
            log_counter += 1
        #m.track_val_loss(loss)
        sum += get_num_correct(preds, labels)
    print('Test Accuracy is ',sum/len(test_loader.dataset))
    wandb.log({"test_predictions" : test_table})
    m.end_run()

# when all runs are done, save results to files
m.save('results_pyramidNet-110')

=> the layer configuration for each stage is set to [13, 13, 13, 13]


Training Epoch 1


100%|██████████| 625/625 [01:24<00:00,  7.36it/s]
100%|██████████| 157/157 [00:09<00:00, 16.49it/s]


train_acc =  0.439475 train_loss =  1.534812271118164 Validation_acc =  0.5164 Validation_loss =  1.321688229370117
Training Epoch 2


100%|██████████| 625/625 [01:16<00:00,  8.17it/s]
100%|██████████| 157/157 [00:10<00:00, 15.30it/s]


train_acc =  0.56295 train_loss =  1.219044563484192 Validation_acc =  0.5839 Validation_loss =  1.1632534271240234
Training Epoch 3


100%|██████████| 625/625 [01:16<00:00,  8.14it/s]
100%|██████████| 157/157 [00:09<00:00, 16.91it/s]


train_acc =  0.619275 train_loss =  1.0715324453353883 Validation_acc =  0.6238 Validation_loss =  1.0534807006835938
Training Epoch 4


100%|██████████| 625/625 [01:16<00:00,  8.13it/s]
100%|██████████| 157/157 [00:09<00:00, 15.89it/s]


train_acc =  0.658975 train_loss =  0.9672967629432678 Validation_acc =  0.6474 Validation_loss =  1.0211552478790282
Training Epoch 5


100%|██████████| 625/625 [01:17<00:00,  8.10it/s]
100%|██████████| 157/157 [00:10<00:00, 15.64it/s]


train_acc =  0.681375 train_loss =  0.9013655522346496 Validation_acc =  0.6807 Validation_loss =  0.9267399787902832
Training Epoch 6


100%|██████████| 625/625 [01:16<00:00,  8.12it/s]
100%|██████████| 157/157 [00:09<00:00, 17.37it/s]


train_acc =  0.7095 train_loss =  0.8276527692317963 Validation_acc =  0.7069 Validation_loss =  0.8417357391357422
Training Epoch 7


100%|██████████| 625/625 [01:16<00:00,  8.16it/s]
100%|██████████| 157/157 [00:09<00:00, 15.76it/s]


train_acc =  0.72825 train_loss =  0.7785090065956116 Validation_acc =  0.7148 Validation_loss =  0.8384621101379395
Training Epoch 8


100%|██████████| 625/625 [01:16<00:00,  8.19it/s]
100%|██████████| 157/157 [00:09<00:00, 16.51it/s]


train_acc =  0.746925 train_loss =  0.7333882863998413 Validation_acc =  0.7273 Validation_loss =  0.8001389125823974
Training Epoch 9


100%|██████████| 625/625 [01:15<00:00,  8.25it/s]
100%|██████████| 157/157 [00:09<00:00, 16.02it/s]


train_acc =  0.75695 train_loss =  0.6962113155841827 Validation_acc =  0.7406 Validation_loss =  0.7569013101577758
Training Epoch 10


100%|██████████| 625/625 [01:16<00:00,  8.19it/s]
100%|██████████| 157/157 [00:09<00:00, 15.82it/s]


train_acc =  0.7691 train_loss =  0.6605822034358978 Validation_acc =  0.7435 Validation_loss =  0.7461442785263062
Training Epoch 11


100%|██████████| 625/625 [01:16<00:00,  8.18it/s]
100%|██████████| 157/157 [00:09<00:00, 16.91it/s]


train_acc =  0.7817 train_loss =  0.6278314087390899 Validation_acc =  0.7581 Validation_loss =  0.7021161689758301
Training Epoch 12


100%|██████████| 625/625 [01:16<00:00,  8.17it/s]
100%|██████████| 157/157 [00:09<00:00, 15.80it/s]


train_acc =  0.786025 train_loss =  0.6094140236854553 Validation_acc =  0.7672 Validation_loss =  0.6894272026062012
Training Epoch 13


100%|██████████| 625/625 [01:15<00:00,  8.24it/s]
100%|██████████| 157/157 [00:09<00:00, 17.22it/s]


train_acc =  0.797925 train_loss =  0.5774062523841857 Validation_acc =  0.7695 Validation_loss =  0.6771221000671387
Training Epoch 14


100%|██████████| 625/625 [01:16<00:00,  8.20it/s]
100%|██████████| 157/157 [00:09<00:00, 15.82it/s]


train_acc =  0.808225 train_loss =  0.5462269353628159 Validation_acc =  0.7732 Validation_loss =  0.6652543827056885
Training Epoch 15


100%|██████████| 625/625 [01:15<00:00,  8.24it/s]
100%|██████████| 157/157 [00:09<00:00, 16.11it/s]


train_acc =  0.8173 train_loss =  0.5252034623146057 Validation_acc =  0.7816 Validation_loss =  0.6403742870330811
Training Epoch 16


100%|██████████| 625/625 [01:15<00:00,  8.28it/s]
100%|██████████| 157/157 [00:09<00:00, 16.29it/s]


train_acc =  0.825625 train_loss =  0.5008059145212174 Validation_acc =  0.7898 Validation_loss =  0.6246906663894654
Training Epoch 17


100%|██████████| 625/625 [01:16<00:00,  8.16it/s]
100%|██████████| 157/157 [00:09<00:00, 16.04it/s]


train_acc =  0.830025 train_loss =  0.4815726857185364 Validation_acc =  0.7809 Validation_loss =  0.6573236986160278
Training Epoch 18


100%|██████████| 625/625 [01:16<00:00,  8.20it/s]
100%|██████████| 157/157 [00:08<00:00, 17.68it/s]


train_acc =  0.83965 train_loss =  0.4635285535812378 Validation_acc =  0.79 Validation_loss =  0.6279816486358643
Training Epoch 19


100%|██████████| 625/625 [01:16<00:00,  8.22it/s]
100%|██████████| 157/157 [00:09<00:00, 15.95it/s]


train_acc =  0.843925 train_loss =  0.44039249465465546 Validation_acc =  0.7871 Validation_loss =  0.6335332213401794
Training Epoch 20


100%|██████████| 625/625 [01:16<00:00,  8.20it/s]
100%|██████████| 157/157 [00:08<00:00, 17.48it/s]


train_acc =  0.849375 train_loss =  0.427140296292305 Validation_acc =  0.7997 Validation_loss =  0.6109304080963135
Training Epoch 21


100%|██████████| 625/625 [01:15<00:00,  8.25it/s]
100%|██████████| 157/157 [00:09<00:00, 16.05it/s]


train_acc =  0.8566 train_loss =  0.40612103090286256 Validation_acc =  0.7964 Validation_loss =  0.61684361743927
Training Epoch 22


100%|██████████| 625/625 [01:16<00:00,  8.22it/s]
100%|██████████| 157/157 [00:09<00:00, 16.67it/s]


train_acc =  0.862275 train_loss =  0.3883163670063019 Validation_acc =  0.7981 Validation_loss =  0.6152241847038269
Training Epoch 23


100%|██████████| 625/625 [01:15<00:00,  8.23it/s]
100%|██████████| 157/157 [00:09<00:00, 16.12it/s]


train_acc =  0.868375 train_loss =  0.3710276901245117 Validation_acc =  0.798 Validation_loss =  0.619141981124878
Training Epoch 24


100%|██████████| 625/625 [01:17<00:00,  8.09it/s]
100%|██████████| 157/157 [00:10<00:00, 15.49it/s]


train_acc =  0.8728 train_loss =  0.35900491148233415 Validation_acc =  0.8038 Validation_loss =  0.6215565608978272
Training Epoch 25


100%|██████████| 625/625 [01:16<00:00,  8.16it/s]
100%|██████████| 157/157 [00:08<00:00, 17.52it/s]


train_acc =  0.8782 train_loss =  0.34501007573604586 Validation_acc =  0.8044 Validation_loss =  0.6125572639465332
Training Epoch 26


100%|██████████| 625/625 [01:16<00:00,  8.15it/s]
100%|██████████| 157/157 [00:09<00:00, 15.91it/s]


train_acc =  0.8857 train_loss =  0.32134156230688093 Validation_acc =  0.8028 Validation_loss =  0.6211304546356201
Training Epoch 27


100%|██████████| 625/625 [01:16<00:00,  8.18it/s]
100%|██████████| 157/157 [00:09<00:00, 16.48it/s]


train_acc =  0.886875 train_loss =  0.31705840629339216 Validation_acc =  0.8081 Validation_loss =  0.6177968559265137
Training Epoch 28


100%|██████████| 625/625 [01:16<00:00,  8.20it/s]
100%|██████████| 157/157 [00:10<00:00, 15.61it/s]


train_acc =  0.896 train_loss =  0.29362230796813965 Validation_acc =  0.8099 Validation_loss =  0.6236858552932739
Training Epoch 29


100%|██████████| 625/625 [01:16<00:00,  8.20it/s]
100%|██████████| 157/157 [00:09<00:00, 15.99it/s]


train_acc =  0.900475 train_loss =  0.2776105480790138 Validation_acc =  0.8042 Validation_loss =  0.62913772315979
Training Epoch 30


100%|██████████| 625/625 [01:16<00:00,  8.22it/s]
100%|██████████| 157/157 [00:09<00:00, 16.76it/s]


train_acc =  0.90495 train_loss =  0.2694366504430771 Validation_acc =  0.8041 Validation_loss =  0.6447902956008911
Test Accuracy is  0.8181


0,1
Validation_acc,▁▃▄▄▅▆▆▆▆▆▇▇▇▇▇█▇█▇███████████
Validation_loss,█▆▅▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_acc,▁▃▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████
train_loss,█▆▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁

0,1
Validation_acc,0.8041
Validation_loss,0.64479
train_acc,0.90495
train_loss,0.26944
