In [1]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import ast
import time
from typing import Type, Any, Callable, Union, List, Optional
from torch import Tensor 
from torchsummary import summary
if torch.cuda.is_available():
    print("Using GPUs")
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

Using GPUs


In [2]:
torch.manual_seed(43)
batch_size = 32

### for CIFAR 10
# stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
## for CIFAR 100
stats = ((0.507, 0.487, 0.441), (0.267, 0.256, 0.276))

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(*stats),
    torchvision.transforms.RandomCrop(32, padding=4, padding_mode='constant'),
    torchvision.transforms.RandomHorizontalFlip(p=0.5)
])

train_set = torchvision.datasets.CIFAR100(root="data", train=True, download=True, transform=transform)
train_size = len(train_set)
test_set = torchvision.datasets.CIFAR100(root="data", train=False, download=True, transform=transform)
test_set, validation_set = torch.utils.data.random_split(test_set, [5000, 5000])
test_size = len(test_set)
validation_size = len(validation_set)


train_loader = torch.utils.data.DataLoader(train_set, batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size, num_workers=4, pin_memory=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size, num_workers=4, pin_memory=True)

data_loaders = {"train": train_loader, "test": test_loader, "validation": validation_loader}
dataset_sizes = {"train": train_size, "test": test_size, "validation": validation_size}
print(dataset_sizes)

Files already downloaded and verified
Files already downloaded and verified
{'train': 50000, 'test': 5000, 'validation': 5000}


In [3]:
class BasicBlock(nn.Module):
    """Basic DSNet. Given input [in_channels, height, width], 
    - First pass through Conv2d(in_channels, in_channels) + BatchNorm + ReLU 
        -> Output dimensions: [in_channels, height, width] (1)
    - Then, add with the ("normalization and channel-wise weight")(input)
        -> Output dimensions: [in_channels, height width] (2)
    - Pass through another Conv2d(outchannels, outchannels) + BN + ReLU
        -> Output dimensions: [in_channels, height, width] (3)
    - Add again with ("normalized + channel-wise weight")(1) and ("normalized + channel-wise weight")(2)
        -> Output dimensions: [in_channels, height, width]
    Caveat: The normalization and channel-wise weight is not shared.
    Attributes:
        in_planes: # of Input channels
        n_models: Number of layers. Have to specify here as we need to connect all the layers
    """

    def __init__(self, planes, n_models, device=torch.device("cpu"), stride=1, down=False, downsample=None):
        super().__init__()

        self.layers = nn.ModuleList([])
        self.channel_wise_w_list = []  # Result is list of list of weights at each steps
        self.norm_layers = nn.ModuleList([])
        self.downsample = downsample
        
        if down:
            inplanes = planes//2
            self.downsample = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride)
        else:
            inplanes = planes

        for i in range(n_models):
            if i == 0:
                first_conv = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1, stride=stride)
            else:
                first_conv = nn.Conv2d(planes, planes, kernel_size=3, padding=1, stride=1)
            
            self.layers.append(nn.Sequential(
                first_conv,
                nn.BatchNorm2d(planes),
                nn.ReLU(inplace=True),
                nn.Conv2d(planes, planes, kernel_size=3, padding=1),
                nn.BatchNorm2d(planes)
            ))

            self.norm_layers.append(
                nn.ModuleList([nn.GroupNorm(num_groups=4, num_channels=planes).to(device) for _ in range(i+1)])
            )

            # One variable for each channel for each time, [[w00], [w10, w11], [w20, w21, w22], ...]
            self.channel_wise_w_list.append(
                [nn.Parameter(torch.randn(1, planes, 1, 1).to(device), requires_grad=True)
                 for _ in range(i+1)]
            )
            
            for j, p_list in enumerate(self.channel_wise_w_list):
                for k, p in enumerate(p_list):
                    self.register_parameter("channel_weight_{}_{}".format(j,k), p)
                       
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x: Tensor) -> Tensor:


        if self.downsample is not None:
            original_x = x
            x = self.downsample(x)
        
        # Consisting of output of each layer.
        outputs = [x]
        
        for i,(layer, ch_ws, norm_layer) in enumerate\
        (zip(self.layers, self.channel_wise_w_list, self.norm_layers)):
            
            if i==0 and self.downsample is not None:
                output = layer(original_x)
            else:
                output = layer(outputs[-1])

            assert len(outputs) == len(ch_ws), "Length not equal"
            dense_normalized_inputs = [norm(o) * ch_weight
                                       for o, ch_weight, norm in zip(outputs, ch_ws, norm_layer)]
            for dense_normalized_input in dense_normalized_inputs:

                output += dense_normalized_input

            output = self.relu(output)
            outputs.append(output)

        return outputs[-1]



class DSNet(nn.Module):
    """Defining the whole model. 
    In high level: 
        - Input -> [batch, 3, height, width]
        - Beginning Layer -> [batch, 3, height, width]
        - First Block: n*BasicBlock(16) -> [batch, 16, height, width]
        - Transition: TransitionBlock(16, 32) -> [batch, 32, height, width]
        - Second Block: n*BasicBlock(32) -> [batch, 32, height, width]
        - Transition: TransitionBlock(32, 64) -> [batch, 32, height, width]
        - Third Block: n*BasicBlock(64) -> [batch, 64, height, width]
        - FinalLayer: AdaptiveAvgPool2d + Linear(64, num_classes)
    Attributes:
        model_n: # of layers, based on CIFAR-ResNet 
        num_classes: Number of classes
        device: needed for GPU vs CPU.
    """

    def __init__(self, model_n, num_classes: int = 10, device=torch.device("cpu")):
        super().__init__()

        self.residual_layers = nn.ModuleList([])
        self.model_n = model_n
        self.device = device

        # begining layers
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)

        # ResNet blocks [16, 32, 64]
        # first block, 16 channels
        self.residual_layers.append(BasicBlock(16, self.model_n, device).to(device))
        
        # second block, 32 channels
        self.residual_layers.append(BasicBlock(32, self.model_n, device, stride=2, down=True).to(device))

        # third block, 64 channels
        self.residual_layers.append(BasicBlock(64, self.model_n, device, stride=2, down=True).to(device))


        # output layers
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x: Tensor) -> Tensor:
        # begining layers
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        # DSNet blocks
        for i, layer in enumerate(self.residual_layers):
            x = layer(x)

        # output layers
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [4]:
#### Train Configurations, based on DSNet and ResNet paper
model_n = 8
epochs = 100 ### should be 180
milestones = [int(epochs*0.5), int(epochs*0.75)]
momentum = 0.9
weight_decay = 0.0001
gamma = 0.1
lr = 0.1

model = DSNet(model_n, num_classes=100, device=device)
model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)

summary(model, (3, 32, 32))
print('Total Number of Parameters:', sum(p.numel() for p in model.parameters()))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             448
       BatchNorm2d-2           [-1, 16, 32, 32]              32
              ReLU-3           [-1, 16, 32, 32]               0
            Conv2d-4           [-1, 16, 32, 32]           2,320
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
            Conv2d-7           [-1, 16, 32, 32]           2,320
       BatchNorm2d-8           [-1, 16, 32, 32]              32
         GroupNorm-9           [-1, 16, 32, 32]              32
             ReLU-10           [-1, 16, 32, 32]               0
           Conv2d-11           [-1, 16, 32, 32]           2,320
      BatchNorm2d-12           [-1, 16, 32, 32]              32
             ReLU-13           [-1, 16, 32, 32]               0
           Conv2d-14           [-1, 16,

In [5]:
### Train loop + validation/ also test at the end
print("Configuration: ", "model:ResNet(small)", " model_n:", model_n, " batch size:", batch_size, 
      " optimizer:SGD", " lr:", lr, " epochs:", epochs)

all_epoch_loss = {"train": [], "validation": []}
all_epoch_acc = {"train":  [], "validation": []}

print("----------------------------- Train --------------------------------")
for epoch in range(epochs):
    start_time = time.time()
    print("Epoch {}/{}".format(epoch+1, epochs))
    print("-" * 30)
    
    
    epoch_loss = {"train": 0.0, "validation": 0.0}
    epoch_acc = {"train": 0.0, "validation": 0.0}
    
    running_loss = {"train": 0.0, "validation": 0.0}
    running_corrects = {"train": 0, "validation": 0}
    
    for phase in ["train", "validation"]:
        if phase == "train":
            model.train(True)
        else:
            model.train(False)
        
        for data in data_loaders[phase]:
            inputs, labels = data 
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad() # clear all gradients
            
            outputs = model(inputs) # batch_size x num_classes
            _, preds = torch.max(outputs.data, 1) # values, indices
            loss = loss_fn(outputs, labels)
            
            if phase == "train":
                loss.backward()  # compute gradients
                optimizer.step() # update weights/biases
               
            running_loss[phase] += loss.data.item() * inputs.size(0)
            running_corrects[phase] += torch.sum(preds == labels.data).item()
        
        all_epoch_loss[phase].append(running_loss[phase] / dataset_sizes[phase])
        all_epoch_acc[phase].append(running_corrects[phase] / dataset_sizes[phase])
        
        epoch_loss[phase] = running_loss[phase] / dataset_sizes[phase]
        epoch_acc[phase] =  running_corrects[phase] / dataset_sizes[phase]

    # Visualize the loss and accuracy values.
    print({
        'time': np.round(time.time()-start_time, 5),
        'train_loss': np.round(epoch_loss["train"], 5),
        'train_acc': np.round(epoch_acc["train"], 5),
        'val_loss': np.round(epoch_loss["validation"], 5),
        'val_acc': np.round(epoch_acc["validation"], 5),
    })
    
    scheduler.step()

    
    
with open('DSNet_16_bs_32_opt_SGD_lr_1_epochs_100_loss.txt', 'w') as f_loss:
    print(all_epoch_loss, file=f_loss)
    
with open('DSNet_16_bs_32_opt_SGD_lr_1_epochs_100_acc.txt', 'w') as f_acc:
    print(all_epoch_acc, file=f_acc)
    
### evaluating the model with test set
print("----------------------------- Test --------------------------------")
with torch.no_grad():
    model.eval()
    running_loss = 0
    running_corrects = 0

    for data in test_loader:
        inputs, labels = data 

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad() # clear all gradients

        outputs = model(inputs) # batch_size x num_classes
        _, preds = torch.max(outputs.data, 1) # values, indices
        loss = loss_fn(outputs, labels)

        running_loss += loss.data.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data).item()

    # Visualize the loss and accuracy values.
    print({
    'time': np.round(time.time()-start_time, 5),
    'test_loss': np.round(running_loss/ dataset_sizes['test'], 5),
    'test_acc': np.round(running_corrects/ dataset_sizes['test'], 5),
    })

Configuration:  model:ResNet(small)  model_n: 16  batch size: 32  optimizer:SGD  lr: 0.1  epochs: 100
----------------------------- Train --------------------------------
Epoch 1/100
------------------------------
{'time': 279.71594, 'train_loss': 4.01924, 'train_acc': 0.08278, 'val_loss': 3.5675, 'val_acc': 0.148}
Epoch 2/100
------------------------------
{'time': 292.86714, 'train_loss': 3.30653, 'train_acc': 0.19474, 'val_loss': 3.19971, 'val_acc': 0.2188}
Epoch 3/100
------------------------------
{'time': 283.46673, 'train_loss': 2.9321, 'train_acc': 0.26618, 'val_loss': 2.77584, 'val_acc': 0.3004}
Epoch 4/100
------------------------------
{'time': 289.4087, 'train_loss': 2.62582, 'train_acc': 0.32358, 'val_loss': 2.60833, 'val_acc': 0.338}
Epoch 5/100
------------------------------
{'time': 287.04795, 'train_loss': 2.38185, 'train_acc': 0.37352, 'val_loss': 2.40697, 'val_acc': 0.377}
Epoch 6/100
------------------------------
{'time': 285.86358, 'train_loss': 2.18485, 'train_ac

{'time': 287.78348, 'train_loss': 0.24747, 'train_acc': 0.92142, 'val_loss': 1.55283, 'val_acc': 0.6694}
Epoch 56/100
------------------------------
{'time': 288.41983, 'train_loss': 0.2316, 'train_acc': 0.92496, 'val_loss': 1.60121, 'val_acc': 0.6682}
Epoch 57/100
------------------------------
{'time': 295.69739, 'train_loss': 0.22243, 'train_acc': 0.9282, 'val_loss': 1.60443, 'val_acc': 0.6688}
Epoch 58/100
------------------------------
{'time': 281.26394, 'train_loss': 0.21477, 'train_acc': 0.93126, 'val_loss': 1.61174, 'val_acc': 0.6742}
Epoch 59/100
------------------------------
{'time': 299.68252, 'train_loss': 0.20878, 'train_acc': 0.932, 'val_loss': 1.61015, 'val_acc': 0.6754}
Epoch 60/100
------------------------------
{'time': 285.37876, 'train_loss': 0.20375, 'train_acc': 0.93278, 'val_loss': 1.67472, 'val_acc': 0.6594}
Epoch 61/100
------------------------------
{'time': 297.01413, 'train_loss': 0.19225, 'train_acc': 0.93768, 'val_loss': 1.62396, 'val_acc': 0.6768}
Epoch

In [5]:
### Train loop + validation/ also test at the end
print("Configuration: ", "model:ResNet(small)", " model_n:", model_n, " batch size:", batch_size, 
      " optimizer:SGD", " lr:", lr, " epochs:", epochs)

all_epoch_loss = {"train": [], "validation": []}
all_epoch_acc = {"train":  [], "validation": []}

print("----------------------------- Train --------------------------------")
for epoch in range(epochs):
    start_time = time.time()
    print("Epoch {}/{}".format(epoch+1, epochs))
    print("-" * 30)
    
    
    epoch_loss = {"train": 0.0, "validation": 0.0}
    epoch_acc = {"train": 0.0, "validation": 0.0}
    
    running_loss = {"train": 0.0, "validation": 0.0}
    running_corrects = {"train": 0, "validation": 0}
    
    for phase in ["train", "validation"]:
        if phase == "train":
            model.train(True)
        else:
            model.train(False)
        
        for data in data_loaders[phase]:
            inputs, labels = data 
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad() # clear all gradients
            
            outputs = model(inputs) # batch_size x num_classes
            _, preds = torch.max(outputs.data, 1) # values, indices
            loss = loss_fn(outputs, labels)
            
            if phase == "train":
                loss.backward()  # compute gradients
                optimizer.step() # update weights/biases
               
            running_loss[phase] += loss.data.item() * inputs.size(0)
            running_corrects[phase] += torch.sum(preds == labels.data).item()
        
        all_epoch_loss[phase].append(running_loss[phase] / dataset_sizes[phase])
        all_epoch_acc[phase].append(running_corrects[phase] / dataset_sizes[phase])
        
        epoch_loss[phase] = running_loss[phase] / dataset_sizes[phase]
        epoch_acc[phase] =  running_corrects[phase] / dataset_sizes[phase]

    # Visualize the loss and accuracy values.
    print({
        'time': np.round(time.time()-start_time, 5),
        'train_loss': np.round(epoch_loss["train"], 5),
        'train_acc': np.round(epoch_acc["train"], 5),
        'val_loss': np.round(epoch_loss["validation"], 5),
        'val_acc': np.round(epoch_acc["validation"], 5),
    })
    
    scheduler.step()

    
    
with open('DSNet_3_bs_32_opt_SGD_lr_1_epochs_100_loss.txt', 'w') as f_loss:
    print(all_epoch_loss, file=f_loss)
    
with open('DSNet_3_bs_32_opt_SGD_lr_1_epochs_100_acc.txt', 'w') as f_acc:
    print(all_epoch_acc, file=f_acc)
    
### evaluating the model with test set
print("----------------------------- Test --------------------------------")
with torch.no_grad():
    model.eval()
    running_loss = 0
    running_corrects = 0

    for data in test_loader:
        inputs, labels = data 

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad() # clear all gradients

        outputs = model(inputs) # batch_size x num_classes
        _, preds = torch.max(outputs.data, 1) # values, indices
        loss = loss_fn(outputs, labels)

        running_loss += loss.data.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data).item()

    # Visualize the loss and accuracy values.
    print({
    'time': np.round(time.time()-start_time, 5),
    'test_loss': np.round(running_loss/ dataset_sizes['test'], 5),
    'test_acc': np.round(running_corrects/ dataset_sizes['test'], 5),
    })

Configuration:  model:ResNet(small)  model_n: 3  batch size: 32  optimizer:SGD  lr: 0.1  epochs: 100
----------------------------- Train --------------------------------
Epoch 1/100
------------------------------
{'time': 43.03238, 'train_loss': 3.93896, 'train_acc': 0.08844, 'val_loss': 3.7258, 'val_acc': 0.1158}
Epoch 2/100
------------------------------
{'time': 39.82316, 'train_loss': 3.32306, 'train_acc': 0.18918, 'val_loss': 3.63235, 'val_acc': 0.1532}
Epoch 3/100
------------------------------
{'time': 39.09446, 'train_loss': 2.93877, 'train_acc': 0.25852, 'val_loss': 2.92494, 'val_acc': 0.2666}
Epoch 4/100
------------------------------
{'time': 38.06372, 'train_loss': 2.67967, 'train_acc': 0.3105, 'val_loss': 2.64212, 'val_acc': 0.314}
Epoch 5/100
------------------------------
{'time': 38.86179, 'train_loss': 2.48919, 'train_acc': 0.35034, 'val_loss': 2.58991, 'val_acc': 0.3532}
Epoch 6/100
------------------------------
{'time': 40.15102, 'train_loss': 2.33412, 'train_acc': 

{'time': 38.4888, 'train_loss': 0.74898, 'train_acc': 0.77456, 'val_loss': 1.45537, 'val_acc': 0.6276}
Epoch 57/100
------------------------------
{'time': 47.45086, 'train_loss': 0.74303, 'train_acc': 0.7748, 'val_loss': 1.43415, 'val_acc': 0.6318}
Epoch 58/100
------------------------------
{'time': 39.86901, 'train_loss': 0.74032, 'train_acc': 0.77564, 'val_loss': 1.41765, 'val_acc': 0.638}
Epoch 59/100
------------------------------
{'time': 39.40376, 'train_loss': 0.73168, 'train_acc': 0.77786, 'val_loss': 1.41036, 'val_acc': 0.634}
Epoch 60/100
------------------------------
{'time': 39.53482, 'train_loss': 0.72805, 'train_acc': 0.77914, 'val_loss': 1.44855, 'val_acc': 0.6326}
Epoch 61/100
------------------------------
{'time': 37.93341, 'train_loss': 0.72714, 'train_acc': 0.77912, 'val_loss': 1.44036, 'val_acc': 0.6354}
Epoch 62/100
------------------------------
{'time': 38.85932, 'train_loss': 0.71507, 'train_acc': 0.78402, 'val_loss': 1.45224, 'val_acc': 0.6284}
Epoch 63/100

In [5]:
### Train loop + validation/ also test at the end
print("Configuration: ", "model:ResNet(small)", " model_n:", model_n, " batch size:", batch_size, 
      " optimizer:SGD", " lr:", lr, " epochs:", epochs)

all_epoch_loss = {"train": [], "validation": []}
all_epoch_acc = {"train":  [], "validation": []}

print("----------------------------- Train --------------------------------")
for epoch in range(epochs):
    start_time = time.time()
    print("Epoch {}/{}".format(epoch+1, epochs))
    print("-" * 30)
    
    
    epoch_loss = {"train": 0.0, "validation": 0.0}
    epoch_acc = {"train": 0.0, "validation": 0.0}
    
    running_loss = {"train": 0.0, "validation": 0.0}
    running_corrects = {"train": 0, "validation": 0}
    
    for phase in ["train", "validation"]:
        if phase == "train":
            model.train(True)
        else:
            model.train(False)
        
        for data in data_loaders[phase]:
            inputs, labels = data 
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad() # clear all gradients
            
            outputs = model(inputs) # batch_size x num_classes
            _, preds = torch.max(outputs.data, 1) # values, indices
            loss = loss_fn(outputs, labels)
            
            if phase == "train":
                loss.backward()  # compute gradients
                optimizer.step() # update weights/biases
               
            running_loss[phase] += loss.data.item() * inputs.size(0)
            running_corrects[phase] += torch.sum(preds == labels.data).item()
        
        all_epoch_loss[phase].append(running_loss[phase] / dataset_sizes[phase])
        all_epoch_acc[phase].append(running_corrects[phase] / dataset_sizes[phase])
        
        epoch_loss[phase] = running_loss[phase] / dataset_sizes[phase]
        epoch_acc[phase] =  running_corrects[phase] / dataset_sizes[phase]

    # Visualize the loss and accuracy values.
    print({
        'time': np.round(time.time()-start_time, 5),
        'train_loss': np.round(epoch_loss["train"], 5),
        'train_acc': np.round(epoch_acc["train"], 5),
        'val_loss': np.round(epoch_loss["validation"], 5),
        'val_acc': np.round(epoch_acc["validation"], 5),
    })
    
    scheduler.step()

    
    
with open('DSNet_8_bs_32_opt_SGD_lr_1_epochs_100_loss.txt', 'w') as f_loss:
    print(all_epoch_loss, file=f_loss)
    
with open('DSNet_8_bs_32_opt_SGD_lr_1_epochs_100_acc.txt', 'w') as f_acc:
    print(all_epoch_acc, file=f_acc)
    
### evaluating the model with test set
print("----------------------------- Test --------------------------------")
with torch.no_grad():
    model.eval()
    running_loss = 0
    running_corrects = 0

    for data in test_loader:
        inputs, labels = data 

        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad() # clear all gradients

        outputs = model(inputs) # batch_size x num_classes
        _, preds = torch.max(outputs.data, 1) # values, indices
        loss = loss_fn(outputs, labels)

        running_loss += loss.data.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data).item()

    # Visualize the loss and accuracy values.
    print({
    'time': np.round(time.time()-start_time, 5),
    'test_loss': np.round(running_loss/ dataset_sizes['test'], 5),
    'test_acc': np.round(running_corrects/ dataset_sizes['test'], 5),
    })

Configuration:  model:ResNet(small)  model_n: 8  batch size: 32  optimizer:SGD  lr: 0.1  epochs: 100
----------------------------- Train --------------------------------
Epoch 1/100
------------------------------
{'time': 110.94868, 'train_loss': 4.0303, 'train_acc': 0.07534, 'val_loss': 3.60206, 'val_acc': 0.1384}
Epoch 2/100
------------------------------
{'time': 101.95567, 'train_loss': 3.30763, 'train_acc': 0.18874, 'val_loss': 3.16371, 'val_acc': 0.2228}
Epoch 3/100
------------------------------
{'time': 104.07128, 'train_loss': 2.88058, 'train_acc': 0.27182, 'val_loss': 2.70973, 'val_acc': 0.3124}
Epoch 4/100
------------------------------
{'time': 110.60394, 'train_loss': 2.60071, 'train_acc': 0.3298, 'val_loss': 2.5096, 'val_acc': 0.3434}
Epoch 5/100
------------------------------
{'time': 102.97587, 'train_loss': 2.41331, 'train_acc': 0.36602, 'val_loss': 2.43238, 'val_acc': 0.3772}
Epoch 6/100
------------------------------
{'time': 101.99725, 'train_loss': 2.25006, 'train_

{'time': 114.85168, 'train_loss': 0.45815, 'train_acc': 0.85558, 'val_loss': 1.51351, 'val_acc': 0.6424}
Epoch 56/100
------------------------------
{'time': 110.55982, 'train_loss': 0.44635, 'train_acc': 0.85822, 'val_loss': 1.53981, 'val_acc': 0.6498}
Epoch 57/100
------------------------------
{'time': 109.60731, 'train_loss': 0.43899, 'train_acc': 0.86186, 'val_loss': 1.57624, 'val_acc': 0.6422}
Epoch 58/100
------------------------------
{'time': 115.54546, 'train_loss': 0.42794, 'train_acc': 0.86502, 'val_loss': 1.56144, 'val_acc': 0.645}
Epoch 59/100
------------------------------
{'time': 112.33629, 'train_loss': 0.42334, 'train_acc': 0.865, 'val_loss': 1.5888, 'val_acc': 0.644}
Epoch 60/100
------------------------------
{'time': 109.69264, 'train_loss': 0.41142, 'train_acc': 0.86996, 'val_loss': 1.58164, 'val_acc': 0.6442}
Epoch 61/100
------------------------------
{'time': 108.66302, 'train_loss': 0.41188, 'train_acc': 0.8681, 'val_loss': 1.60067, 'val_acc': 0.6418}
Epoch 6