In [27]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import os
import numpy
from PIL import Image
import torch
import numpy as np
from torch import nn
from tqdm.auto import tqdm
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib.image as implt
import pandas as pd
import  glob
import random
import cv2

In [29]:
os.rmdir('.virtual_documents') 

FileNotFoundError: [Errno 2] No such file or directory: '.virtual_documents'

In [None]:
class Generator(nn.Module):
    '''
    Generator Class
    Values:
        z_dim: the dimension of the noise vector, a scalar
        im_chan: the number of channels of the output image, a scalar
              (MNIST is black-and-white, so 1 channel is your default)
        hidden_dim: the inner dimension, a scalar
    '''
    def __init__(self, input_dim=171, im_chan=3, hidden_dim=64):
        super(Generator, self).__init__()
        self.input_dim = input_dim
        # Build the neural network
        self.gen = nn.Sequential(
            self.make_gen_block(input_dim, hidden_dim * 4),
            self.make_gen_block(hidden_dim * 4, hidden_dim * 2, kernel_size=4, stride=1),
            self.make_gen_block(hidden_dim * 2, hidden_dim),
            self.make_gen_block(hidden_dim, im_chan, kernel_size=4, final_layer=True),
        )

    def make_gen_block(self, input_channels, output_channels, kernel_size=3, stride=2, final_layer=False):
        '''
        Function to return a sequence of operations corresponding to a generator block of DCGAN;
        a transposed convolution, a batchnorm (except in the final layer), and an activation.
        Parameters:
            input_channels: how many channels the input feature representation has
            output_channels: how many channels the output feature representation should have
            kernel_size: the size of each convolutional filter, equivalent to (kernel_size, kernel_size)
            stride: the stride of the convolution
            final_layer: a boolean, true if it is the final layer and false otherwise 
                      (affects activation and batchnorm)
        '''
        if not final_layer:
            return nn.Sequential(
                nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
                nn.BatchNorm2d(output_channels),
                nn.ReLU(inplace=True),
            )
        else:
            return nn.Sequential(
                nn.ConvTranspose2d(input_channels, output_channels, kernel_size, stride),
                nn.Tanh(),
            )

    def forward(self, noise):
        '''
        Function for completing a forward pass of the generator: Given a noise tensor,
        returns generated images.
        Parameters:
            noise: a noise tensor with dimensions (n_samples, z_dim)
        '''
        x = noise.view(len(noise), self.input_dim, 1, 1)
        return self.gen(x)

In [None]:
def get_one_hot_labels(labels, n_classes=35):
    return F.one_hot(labels, n_classes)

def combine_vectors(x, y):
    return torch.cat((x.float(), y.float()), 1)

def get_input_dimensions(z_dim, img_shape=(3, 64, 64), n_classes=35):
    gen_input_dim = z_dim + n_classes
    disc_input_dim = img_shape[0] + n_classes
    
    return gen_input_dim, disc_input_dim

def get_noise(n_samples, z_dim, device='cpu'):
    '''
    Function for creating noise vectors: Given the dimensions (n_samples, z_dim)
    creates a tensor of that shape filled with random numbers from the normal distribution.
    Parameters:
      n_samples: the number of samples to generate, a scalar
      z_dim: the dimension of the noise vector, a scalar
      device: the device type
    '''
    return torch.randn(n_samples, z_dim, device=device)

In [None]:
z_dim = 128
gen_in_dim, _ = get_input_dimensions(z_dim)

In [None]:
device='cuda'
gen = Generator(input_dim=gen_in_dim).to(device)

In [None]:
def show_tensor_images(image_tensor, num_images=25, size=(1, 64, 64)):
    '''
    Function for visualizing images: Given a tensor of images, number of images, and
    size per image, plots and prints the images in an uniform grid.
    '''
    image_tensor = (image_tensor + 1) / 2
    image_unflat = image_tensor.detach().cpu()
    image_grid = make_grid(image_unflat[:num_images], nrow=5)
    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
    plt.show()
    
def make_grad_hook():
    '''
    Function to keep track of gradients for visualization purposes, 
    which fills the grads list when using model.apply(grad_hook).
    '''
    grads = []
    def grad_hook(m):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
            grads.append(m.weight.grad)
    return grads, grad_hook


In [None]:
classes= sorted(os.listdir('/kaggle/input/lisadataset/croppedimages'))

In [None]:
gen_weights = torch.load('/kaggle/input/notebookdfef0d9731/models/generator-gtsrb.pth')
gen.load_state_dict(gen_weights)
gen.eval()
count = 2800*35

labels=[]
for i in range(35):
    labels += [i]*2800


test_noise = get_noise(count, z_dim, device)
test_label = torch.tensor(labels)
test_OH_label = get_one_hot_labels(test_label.to(device))

In [None]:
test_noise_and_label = combine_vectors(test_noise, test_OH_label)
test_image_fake = gen(test_noise_and_label)

In [None]:
for i in classes:
    os.mkdir(i)

In [None]:
def add_nofilter(img):
    return img

def add_highBeam(img):
    
    # Define gamma and gain
    gamma = 0.7
    gain = 1

    # Apply gamma correction
    x_transformed = gain * torch.pow((img), gamma)

    return transforms.functional.adjust_gamma(img, 0.1,1)

def add_fog(img):
    # Define fog intensity and contrast reduction
    fog_intensity = 0.2  # adjust this value to control fog density
    contrast_reduction = 0.5  # adjust this value to control contrast

    # Create a fog layer
    fog_layer = torch.rand_like(img) * fog_intensity

    # Reduce contrast and sharpness
    img_foggy = torch.pow(img, contrast_reduction)

    # Combine the fog layer and the image
    img_foggy = img_foggy * (1 - fog_layer) + fog_layer
    img_foggy = img_foggy*0.35
    
    return img_foggy

def add_snow(img_batch):
    img_batch = img_batch[None,:]
    imgcopy = img_batch.clone()
    num_images, _, row, col = img_batch.shape
    for i in range(num_images):
        number_of_pixels = random.randint(15, 25)
        for _ in range(number_of_pixels): 
            y_coord = random.randint(0, row - 1)
            x_coord = random.randint(0, col - 1) 
            imgcopy[i, :, y_coord, x_coord] = 1.0
    return imgcopy[0]

def add_rain(img_batch):
    img_batch = img_batch[None,:]
    imgcopy = img_batch.clone()
    num_images, _, row, col = img_batch.shape
    for i in range(num_images):
        number_of_pixels = random.randint(15, 25)
        for _ in range(number_of_pixels): 
            y_coord = random.randint(0, row - 2)
            x_coord = random.randint(0, col - 1) 
            imgcopy[i, :, y_coord, x_coord] = 128/255
            if random.randint(0, 1):
                imgcopy[i, :, y_coord+1, x_coord] = 128/255
                
    return imgcopy[0]

In [None]:
list_of_functions = [add_nofilter, add_snow, add_rain, add_highBeam, add_fog]

In [None]:
for index, i in enumerate(test_image_fake):
    random_function = random.sample(list_of_functions,1)[0]
    i = (i+1)/2
    numpy_image = random_function(i).to('cpu').detach().permute(1,2,0).numpy()
    numpy_image = (numpy_image-np.min(numpy_image))/(np.max(numpy_image) - np.min(numpy_image))
    plt.imsave(os.path.join('/kaggle/working/',classes[index//2800],str(index%2800)) + '.jpg', numpy_image)
    

In [None]:
for index, i in enumerate(test_image_fake):
    random_function = random.sample(list_of_functions,1)[0]
    i = (i+1)/2
    numpy_image = random_function(i).to('cpu').detach().permute(1,2,0).numpy()
    numpy_image = (numpy_image-np.min(numpy_image))/(np.max(numpy_image) - np.min(numpy_image))
    plt.imsave(os.path.join('/kaggle/working/',classes[index//2800],str(2800+index%2800)) + '.jpg', numpy_image)
    

In [None]:
for className in classes:
    classPath = os.path.join('/kaggle/input/lisadataset/croppedimages',className)
    for index, imageName in enumerate(os.listdir(classPath)):
        random_function = random.sample(list_of_functions,1)[0]
        img = cv2.imread(os.path.join(classPath, imageName))
        filtered_image = random_function(torch.tensor(img/255., device='cpu').permute(2,0,1)).permute(1,2,0).numpy()
        filtered_image = (filtered_image-np.min(filtered_image))/(np.max(filtered_image) - np.min(filtered_image))
        outputPath = os.path.join('/kaggle/working/', className, str(5600+index)) + '.jpg'
        plt.imsave(outputPath, filtered_image)

In [None]:
import shutil
shutil.make_archive('/kaggle/working/generated_images', 'zip', '/kaggle/working/')

In [None]:
s=0
for i in os.listdir('/kaggle/working/'):
    s += len(os.listdir(os.path.join('/kaggle/working/', i)))
s

# **VIT** 

In [2]:
# Install necessary modules
!pip install torch-summary
!pip install torchmetrics

Collecting torch-summary
  Downloading torch_summary-1.4.5-py3-none-any.whl (16 kB)
Installing collected packages: torch-summary
Successfully installed torch-summary-1.4.5


In [3]:
# Import Dependencies
import os
import numpy as np
import pandas as pd
import cv2
import random
from glob import glob
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import math
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init
import torch
from torchsummary import summary
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from typing import Dict, List, Tuple
from tqdm import tqdm
from torchmetrics import Accuracy, F1Score, Recall, Precision



In [4]:
os.rmdir('.virtual_documents') 

In [5]:
# List of class names as considered by data loader
x = sorted(os.listdir('/kaggle/working/'))
# Convert list of class names into dictionary
classes = {key: value for key, value in enumerate(x)}



In [6]:
# Define Image Transforms
transform = transforms.Compose([
    transforms.Resize((81, 81)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create Dataset of entire data
full_dataset = datasets.ImageFolder(root='/kaggle/working', transform=transform)

In [7]:
train_size = int(0.8 * len(full_dataset)) # Split data count into training and validation splits in the ratio 80% to 20%
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) # Split the data

In [8]:
batch_size = 128  #Set batch size
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) # Create train dataloader
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4) # Create valiation dataloader

In [9]:
# Patch Embedding Class
class PatchEmbedding(nn.Module):
    '''
    Creates Patch Embedding layer to patch the input features from Xception model backbone, and flattens it.
    '''
    def __init__(self, 
                 in_channels:int=3,
                 patch_size:int=3,
                 embedding_dim:int=3*3*3):
        super().__init__()
        
        #  Layer to turn features into patches
        self.patcher = nn.Conv2d(in_channels=in_channels,
                                 out_channels=embedding_dim,
                                 kernel_size=patch_size,
                                 stride=patch_size,
                                 padding=0)

        #  Layer to flatten the patch feature maps into a single dimension
        self.flatten = nn.Flatten(start_dim=2, end_dim=3)
        
        
    def forward(self, x):
        
        x_patched = self.patcher(x)
        x_flattened = self.flatten(x_patched) 
        
        return x_flattened.permute(0, 2, 1)

In [10]:
# Multi-head Self Attention Class
class MultiheadSelfAttentionBlock(nn.Module):
    """
    Creates a multi-head self-attention block.
    """

    def __init__(self,
                 embedding_dim:int=3*3*3, 
                 num_heads:int=3, 
                 attn_dropout:float=0): 
        super().__init__()
 
        # Layer Normalization
        self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
        
        # Multi-Head Attention layer
        self.multihead_attn = nn.MultiheadAttention(embed_dim=embedding_dim,
                                                    num_heads=num_heads,
                                                    dropout=attn_dropout,
                                                    batch_first=True)

    def forward(self, x):
        x = self.layer_norm(x)
        attn_output, _ = self.multihead_attn(query=x, 
                                             key=x,
                                             value=x,
                                             need_weights=False)
        return attn_output

In [11]:
# MLP (Multilayer Perceptron) Class
class MLPBlock(nn.Module):
    """Creates a Layer Normalized Multilayer Perceptron block."""
    def __init__(self,
                 embedding_dim:int=3*3*3,
                 mlp_size:int=3072,
                 dropout:float=0.1):
        super().__init__()
        
        # Norm layer 
        self.layer_norm = nn.LayerNorm(normalized_shape=embedding_dim)
        
        # MLP layer
        self.mlp = nn.Sequential(
            nn.Linear(in_features=embedding_dim,
                      out_features=mlp_size),
            nn.GELU(),
            nn.Dropout(p=dropout),
            nn.Linear(in_features=mlp_size,
                      out_features=embedding_dim), 
            nn.Dropout(p=dropout)
        )

    def forward(self, x):
        x = self.layer_norm(x)
        x = self.mlp(x)
        return x

In [12]:
# Transformer Encoder Class
class TransformerEncoderBlock(nn.Module):
    """
    Creates a Transformer Encoder block.
    """
    
    def __init__(self,
                 embedding_dim:int=3*3*3,
                 num_heads:int=3, 
                 mlp_size:int=3072,
                 mlp_dropout:float=0.1, 
                 attn_dropout:float=0): 
        super().__init__()

        # Multi-head Self Attention Block
        self.msa_block = MultiheadSelfAttentionBlock(embedding_dim=embedding_dim,
                                                     num_heads=num_heads,
                                                     attn_dropout=attn_dropout)
        
        # MLP Block
        self.mlp_block =  MLPBlock(embedding_dim=embedding_dim,
                                   mlp_size=mlp_size,
                                   dropout=mlp_dropout)

    def forward(self, x):
        
        x =  self.msa_block(x) + x 
        x = self.mlp_block(x) + x 
        
        return x

In [13]:
class SeparableConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
        super(SeparableConv2d,self).__init__()

        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)
    
    def forward(self,x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x

In [14]:
# Xception Block Class
class Block(nn.Module):
    def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
        super(Block, self).__init__()

        if out_filters != in_filters or strides!=1:
            self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
            self.skipbn = nn.BatchNorm2d(out_filters)
        else:
            self.skip=None
        
        self.relu = nn.ReLU(inplace=True)
        rep=[]

        filters=in_filters
        if grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))
            filters = out_filters

        for i in range(reps-1):
            rep.append(self.relu)
            rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(filters))
        
        if not grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))

        if not start_with_relu:
            rep = rep[1:]
        else:
            rep[0] = nn.ReLU(inplace=False)

        if strides != 1:
            rep.append(nn.MaxPool2d(3,strides,1))
        self.rep = nn.Sequential(*rep)

    def forward(self,inp):
        x = self.rep(inp)

        if self.skip is not None:
            skip = self.skip(inp)
            skip = self.skipbn(skip)
        else:
            skip = inp

        x+=skip
        return x

In [15]:
class Xception(nn.Module):
    def __init__(self, num_classes=1000):
        super(Xception, self).__init__()
        
        self.num_classes = num_classes

        self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(32,64,3,bias=False)
        self.bn2 = nn.BatchNorm2d(64)

        self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
        self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
        self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)

        self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)

        self.conv3 = SeparableConv2d(1024,1536,3,1,1)
        self.bn3 = nn.BatchNorm2d(1536)

        self.conv4 = SeparableConv2d(1536,2048,3,1,1)
        self.bn4 = nn.BatchNorm2d(2048)
        
        self.upsample = nn.Upsample((2, 2))
        self.convtranspose = nn.ConvTranspose2d(2048, 3, kernel_size=5, stride=4)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()


    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.block9(x)
        x = self.block10(x)
        x = self.block11(x)
        x = self.block12(x)
        
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)
        
        x = self.upsample(x)
        x = self.upsample(x)
        x = self.convtranspose(x)

        return x

In [16]:
class ViT(nn.Module):
    """Creates a Vision Transformer architecture with Xception Model backbone."""
    # 2. Initialize the class with hyperparameters from Table 1 and Table 3
    def __init__(self,
                 img_size:int=9, # Feature map resolution
                 in_channels:int=3, # Number of channels in input feature map
                 patch_size:int=3, # Patch size
                 num_transformer_layers:int=12,
                 embedding_dim:int=3*3*3,
                 mlp_size:int=3072, # MLP size
                 num_heads:int=3,
                 attn_dropout:float=0, # Dropout for Attention Projection
                 mlp_dropout:float=0.1, # Dropout for MLP layers 
                 embedding_dropout:float=0.1, # Dropout for Patch and Position Embeddings
                 num_classes:int= 35): # Total number of traffic sign classes 
        
        super().__init__()
        
        self.xception_model = Xception()
        
        # Calculate Number of Patches ((height * width)/(patch^2))
        self.num_patches = (img_size * img_size) // patch_size**2
                 
        # Learnable Class Embedding
        self.class_embedding = nn.Parameter(data=torch.randn(1, 1, embedding_dim),
                                            requires_grad=True)
        
        # Learnable Position Embedding
        self.position_embedding = nn.Parameter(data=torch.randn(1, self.num_patches+1, embedding_dim),
                                               requires_grad=True)
                
        # Embedding Dropout
        self.embedding_dropout = nn.Dropout(p=embedding_dropout)
        
        # Patch Embedding Layer
        self.patch_embedding = PatchEmbedding(in_channels=in_channels,
                                              patch_size=patch_size,
                                              embedding_dim=embedding_dim)
        
        # Create Transformer Encoder Blocks
        self.transformer_encoder = nn.Sequential(*[TransformerEncoderBlock(embedding_dim=embedding_dim,
                                                                            num_heads=num_heads,
                                                                            mlp_size=mlp_size,
                                                                            mlp_dropout=mlp_dropout) for _ in range(num_transformer_layers)])
       
        # Create Classifier Head
        self.classifier = nn.Sequential(
            nn.LayerNorm(normalized_shape=embedding_dim),
            nn.Linear(in_features=embedding_dim, 
                      out_features=num_classes)
        )
        
    def forward(self, x):
        
        x = self.xception_model(x)
        
        # Get Batch size
        batch_size = x.shape[0]

        class_token = self.class_embedding.expand(batch_size, -1, -1)

        x = self.patch_embedding(x)
        x = torch.cat((class_token, x), dim=1)
        x = self.position_embedding + x

        x = self.embedding_dropout(x)
        x = self.transformer_encoder(x)
        
        x = self.classifier(x[:, 0])
        
        return x   

In [17]:
def train_step(model: torch.nn.Module, 
               dataloader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer,
               device: torch.device,
               train_acc_metric,
               train_f1_metric,
               train_recall_metric,
               train_prec_metric) -> Tuple[float, float, float, float, float]:
    
    model.train()
    
    train_loss, acc, f1, recall, precision = 0, 0, 0, 0, 0

    for batch, (X, y) in tqdm(enumerate(dataloader)):
        X, y = X.to(device), y.to(device)

        # Forward pass
        y_pred = model(X)

        # Calculate and accumulate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item() 

        # Zero gradients
        optimizer.zero_grad()

        # Backward pass
        loss.backward()

        # Optimizer step
        optimizer.step()

        # Update metrics
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        acc += train_acc_metric(y_pred_class, y)
        f1 += train_f1_metric(y_pred_class, y)
        recall += train_recall_metric(y_pred_class, y)
        precision += train_prec_metric(y_pred_class, y)

    # Compute metrics
    train_loss = train_loss / len(dataloader)
    train_acc = acc / len(dataloader)
    train_f1 = f1 / len(dataloader)
    train_rec = recall / len(dataloader)
    train_precision = precision / len(dataloader)

    return train_loss, train_acc, train_f1, train_rec, train_precision

In [18]:
def test_step(model: torch.nn.Module, 
              dataloader: torch.utils.data.DataLoader, 
              loss_fn: torch.nn.Module, 
              device: torch.device,
              test_f1_metric,
              test_recall_metric,
              test_acc_metric,
              test_prec_metric) -> Tuple[float, float, float, float, float]:
    
    model.eval()

    test_loss, acc, f1, recall, precision = 0, 0, 0, 0, 0

    with torch.no_grad():
        for batch, (X, y) in tqdm(enumerate(dataloader)):
            X, y = X.to(device), y.to(device)

            # Forward pass
            y_pred = model(X)

            # Calculate and accumulate loss
            loss = loss_fn(y_pred, y)
            test_loss += loss.item() 

            # Update metrics
            y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
            acc += test_acc_metric(y_pred_class, y)
            f1 += test_f1_metric(y_pred_class, y)
            recall += test_recall_metric(y_pred_class, y)
            precision += test_prec_metric(y_pred_class, y)

    # Compute metrics
    test_loss = test_loss / len(dataloader)
    test_acc = acc / len(dataloader)
    test_f1 = f1 / len(dataloader)
    test_rec = recall / len(dataloader)
    test_precision = precision / len(dataloader)

    return test_loss, test_acc, test_f1, test_rec, test_precision

In [19]:
def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:

    # Initialize metrics
    train_acc_metric = Accuracy(task="multiclass", num_classes=35).to(device)
    train_f1_metric = F1Score(task="multiclass", average='macro', num_classes=35).to(device)
    train_recall_metric = Recall(task="multiclass", average='macro', num_classes=35).to(device)
    train_prec_metric = Precision(task="multiclass", average='macro', num_classes=35).to(device)

    test_acc_metric = Accuracy(task="multiclass", num_classes=35).to(device)
    test_f1_metric = F1Score(task="multiclass", average='macro', num_classes=35).to(device)
    test_recall_metric = Recall(task="multiclass", average='macro', num_classes=35).to(device)
    test_prec_metric = Precision(task="multiclass", average='macro', num_classes=35).to(device)

    results = {"train_loss": [],
               "train_acc": [],
               "train_f1_score": [],
               "train_recall": [],
               "train_precision": [],
               "val_loss": [],
               "val_acc": [],
               "val_f1_score": [],
               "val_recall": [],
               "val_precision": []
    }
    
    model.to(device)

    for epoch in range(epochs):
       
        
        train_loss, train_acc, train_f1_score, train_recall, train_precision = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device,
                                          train_acc_metric=train_acc_metric,
                                          train_f1_metric=train_f1_metric,
                                          train_recall_metric=train_recall_metric,
                                          train_prec_metric=train_prec_metric)
                                          
        test_loss, test_acc, test_f1_score, test_recall, test_precision = test_step(model=model,
                                  dataloader=test_dataloader,
                                  loss_fn=loss_fn,
                                  device=device,
                                  test_acc_metric=test_acc_metric,
                                  test_f1_metric=test_f1_metric,
                                  test_recall_metric=test_recall_metric,
                                  test_prec_metric=test_prec_metric)

        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"train_F1_score: {train_f1_score:.4f} | "
          f"train_recall: {train_recall:.4f} | "
          f"train_precision: {train_precision:.4f} | "
          f"val_loss: {test_loss:.4f} | "
          f"val_acc: {test_acc:.4f} | "
          f"val_F1_score: {test_f1_score:.4f} | "
          f"val_recall: {test_recall:.4f} | "
          f"val_precision: {test_precision:.4f}"
        )

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["train_f1_score"].append(train_f1_score)
        results["train_recall"].append(train_recall)
        results["train_precision"].append(train_precision)
        results["val_loss"].append(test_loss)
        results["val_acc"].append(test_acc)
        results["val_f1_score"].append(test_f1_score)
        results["val_recall"].append(test_recall)
        results["val_precision"].append(test_precision)
        if epoch in [31, 63, 95,127]:
            torch.save(
                obj=model.state_dict(),
                f=f"/kaggle/working/epoch_{epoch+1}.pth"
            )

    return results


In [22]:
# Initialize Model
model = ViT()

In [23]:
# Initialize optimizer, loss and device to train on
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))
loss = nn.CrossEntropyLoss()
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# Train the model and store the results in "history" variable
history = train(model=model,
       train_dataloader=train_dataloader,
       test_dataloader=val_dataloader,
       optimizer=optimizer,
       loss_fn=loss,
       epochs=128,
       device=device)

1243it [04:53,  4.24it/s]
311it [00:20, 15.25it/s]

Epoch: 1 | train_loss: 0.3561 | train_acc: 0.9194 | train_F1_score: 0.9070 | train_recall: 0.9176 | train_precision: 0.9116 | val_loss: 0.2517 | val_acc: 0.9466 | val_F1_score: 0.9350 | val_recall: 0.9439 | val_precision: 0.9461



1243it [04:47,  4.33it/s]
311it [00:20, 15.27it/s]

Epoch: 2 | train_loss: 0.0408 | train_acc: 0.9919 | train_F1_score: 0.9900 | train_recall: 0.9916 | train_precision: 0.9912 | val_loss: 0.0463 | val_acc: 0.9900 | val_F1_score: 0.9881 | val_recall: 0.9899 | val_precision: 0.9896



1243it [04:46,  4.33it/s]
311it [00:20, 15.44it/s]

Epoch: 3 | train_loss: 0.0281 | train_acc: 0.9941 | train_F1_score: 0.9926 | train_recall: 0.9937 | train_precision: 0.9934 | val_loss: 0.0854 | val_acc: 0.9712 | val_F1_score: 0.9621 | val_recall: 0.9692 | val_precision: 0.9663



1243it [04:46,  4.33it/s]
311it [00:20, 15.45it/s]

Epoch: 4 | train_loss: 0.0204 | train_acc: 0.9957 | train_F1_score: 0.9945 | train_recall: 0.9953 | train_precision: 0.9952 | val_loss: 0.0102 | val_acc: 0.9977 | val_F1_score: 0.9973 | val_recall: 0.9976 | val_precision: 0.9976



1243it [04:47,  4.33it/s]
311it [00:20, 15.40it/s]

Epoch: 5 | train_loss: 0.0156 | train_acc: 0.9963 | train_F1_score: 0.9954 | train_recall: 0.9961 | train_precision: 0.9958 | val_loss: 0.0139 | val_acc: 0.9973 | val_F1_score: 0.9967 | val_recall: 0.9971 | val_precision: 0.9971



1243it [04:46,  4.33it/s]
311it [00:20, 15.32it/s]

Epoch: 6 | train_loss: 0.0145 | train_acc: 0.9967 | train_F1_score: 0.9958 | train_recall: 0.9964 | train_precision: 0.9964 | val_loss: 0.0134 | val_acc: 0.9971 | val_F1_score: 0.9959 | val_recall: 0.9963 | val_precision: 0.9964



1243it [04:46,  4.33it/s]
311it [00:20, 15.03it/s]

Epoch: 7 | train_loss: 0.0125 | train_acc: 0.9973 | train_F1_score: 0.9966 | train_recall: 0.9972 | train_precision: 0.9970 | val_loss: 0.0138 | val_acc: 0.9966 | val_F1_score: 0.9958 | val_recall: 0.9964 | val_precision: 0.9962



1243it [04:47,  4.33it/s]
311it [00:20, 15.21it/s]

Epoch: 8 | train_loss: 0.0104 | train_acc: 0.9976 | train_F1_score: 0.9968 | train_recall: 0.9973 | train_precision: 0.9971 | val_loss: 0.0102 | val_acc: 0.9977 | val_F1_score: 0.9972 | val_recall: 0.9975 | val_precision: 0.9977



1243it [04:46,  4.34it/s]
311it [00:20, 15.42it/s]

Epoch: 9 | train_loss: 0.0104 | train_acc: 0.9976 | train_F1_score: 0.9970 | train_recall: 0.9975 | train_precision: 0.9973 | val_loss: 0.0200 | val_acc: 0.9956 | val_F1_score: 0.9944 | val_recall: 0.9952 | val_precision: 0.9949



1243it [04:46,  4.34it/s]
311it [00:20, 15.43it/s]

Epoch: 10 | train_loss: 0.0077 | train_acc: 0.9982 | train_F1_score: 0.9977 | train_recall: 0.9981 | train_precision: 0.9980 | val_loss: 0.0114 | val_acc: 0.9974 | val_F1_score: 0.9970 | val_recall: 0.9971 | val_precision: 0.9976



1243it [04:46,  4.34it/s]
311it [00:20, 15.47it/s]

Epoch: 11 | train_loss: 0.0066 | train_acc: 0.9983 | train_F1_score: 0.9979 | train_recall: 0.9982 | train_precision: 0.9980 | val_loss: 0.0136 | val_acc: 0.9973 | val_F1_score: 0.9967 | val_recall: 0.9971 | val_precision: 0.9972



1243it [04:46,  4.34it/s]
311it [00:20, 15.41it/s]

Epoch: 12 | train_loss: 0.0059 | train_acc: 0.9987 | train_F1_score: 0.9984 | train_recall: 0.9987 | train_precision: 0.9986 | val_loss: 0.0081 | val_acc: 0.9983 | val_F1_score: 0.9980 | val_recall: 0.9983 | val_precision: 0.9983



1243it [04:46,  4.33it/s]
311it [00:20, 15.54it/s]

Epoch: 13 | train_loss: 0.0057 | train_acc: 0.9987 | train_F1_score: 0.9983 | train_recall: 0.9986 | train_precision: 0.9984 | val_loss: 0.0083 | val_acc: 0.9977 | val_F1_score: 0.9972 | val_recall: 0.9975 | val_precision: 0.9976



1243it [04:46,  4.34it/s]
311it [00:20, 15.48it/s]

Epoch: 14 | train_loss: 0.0042 | train_acc: 0.9989 | train_F1_score: 0.9987 | train_recall: 0.9990 | train_precision: 0.9988 | val_loss: 0.0126 | val_acc: 0.9975 | val_F1_score: 0.9968 | val_recall: 0.9971 | val_precision: 0.9971



1243it [04:46,  4.34it/s]
311it [00:20, 15.46it/s]

Epoch: 15 | train_loss: 0.0068 | train_acc: 0.9985 | train_F1_score: 0.9981 | train_recall: 0.9983 | train_precision: 0.9983 | val_loss: 0.0058 | val_acc: 0.9987 | val_F1_score: 0.9985 | val_recall: 0.9987 | val_precision: 0.9986



1243it [04:46,  4.33it/s]
311it [00:20, 15.35it/s]

Epoch: 16 | train_loss: 0.0047 | train_acc: 0.9989 | train_F1_score: 0.9986 | train_recall: 0.9987 | train_precision: 0.9987 | val_loss: 0.0065 | val_acc: 0.9986 | val_F1_score: 0.9983 | val_recall: 0.9985 | val_precision: 0.9986



1243it [04:46,  4.33it/s]
311it [00:20, 15.30it/s]

Epoch: 17 | train_loss: 0.0053 | train_acc: 0.9989 | train_F1_score: 0.9986 | train_recall: 0.9989 | train_precision: 0.9988 | val_loss: 0.0068 | val_acc: 0.9987 | val_F1_score: 0.9983 | val_recall: 0.9985 | val_precision: 0.9985



1243it [04:46,  4.33it/s]
311it [00:20, 15.27it/s]

Epoch: 18 | train_loss: 0.0037 | train_acc: 0.9991 | train_F1_score: 0.9988 | train_recall: 0.9990 | train_precision: 0.9989 | val_loss: 0.0058 | val_acc: 0.9990 | val_F1_score: 0.9988 | val_recall: 0.9989 | val_precision: 0.9990



1243it [04:46,  4.33it/s]
311it [00:20, 15.15it/s]

Epoch: 19 | train_loss: 0.0038 | train_acc: 0.9991 | train_F1_score: 0.9988 | train_recall: 0.9991 | train_precision: 0.9989 | val_loss: 0.0080 | val_acc: 0.9982 | val_F1_score: 0.9979 | val_recall: 0.9982 | val_precision: 0.9982



1243it [04:48,  4.31it/s]
311it [00:20, 15.17it/s]

Epoch: 20 | train_loss: 0.0029 | train_acc: 0.9992 | train_F1_score: 0.9991 | train_recall: 0.9992 | train_precision: 0.9992 | val_loss: 0.0049 | val_acc: 0.9989 | val_F1_score: 0.9987 | val_recall: 0.9988 | val_precision: 0.9989



1243it [04:47,  4.32it/s]
311it [00:20, 15.22it/s]

Epoch: 21 | train_loss: 0.0027 | train_acc: 0.9994 | train_F1_score: 0.9992 | train_recall: 0.9994 | train_precision: 0.9993 | val_loss: 0.0066 | val_acc: 0.9987 | val_F1_score: 0.9982 | val_recall: 0.9984 | val_precision: 0.9984



1243it [04:48,  4.31it/s]
311it [00:20, 15.29it/s]

Epoch: 22 | train_loss: 0.0032 | train_acc: 0.9993 | train_F1_score: 0.9991 | train_recall: 0.9993 | train_precision: 0.9992 | val_loss: 0.0041 | val_acc: 0.9993 | val_F1_score: 0.9991 | val_recall: 0.9992 | val_precision: 0.9993



1243it [04:47,  4.32it/s]
311it [00:20, 15.42it/s]

Epoch: 23 | train_loss: 0.0021 | train_acc: 0.9995 | train_F1_score: 0.9992 | train_recall: 0.9993 | train_precision: 0.9993 | val_loss: 0.0062 | val_acc: 0.9989 | val_F1_score: 0.9987 | val_recall: 0.9989 | val_precision: 0.9987



1243it [04:47,  4.32it/s]
311it [00:20, 15.34it/s]

Epoch: 24 | train_loss: 0.0037 | train_acc: 0.9992 | train_F1_score: 0.9990 | train_recall: 0.9991 | train_precision: 0.9991 | val_loss: 0.0109 | val_acc: 0.9980 | val_F1_score: 0.9975 | val_recall: 0.9979 | val_precision: 0.9978



1243it [04:47,  4.33it/s]
311it [00:20, 15.27it/s]

Epoch: 25 | train_loss: 0.0025 | train_acc: 0.9994 | train_F1_score: 0.9993 | train_recall: 0.9994 | train_precision: 0.9993 | val_loss: 0.0071 | val_acc: 0.9983 | val_F1_score: 0.9978 | val_recall: 0.9982 | val_precision: 0.9979



1243it [04:47,  4.33it/s]
311it [00:20, 15.24it/s]

Epoch: 27 | train_loss: 0.0023 | train_acc: 0.9995 | train_F1_score: 0.9993 | train_recall: 0.9994 | train_precision: 0.9993 | val_loss: 0.0060 | val_acc: 0.9987 | val_F1_score: 0.9985 | val_recall: 0.9986 | val_precision: 0.9987



1243it [04:47,  4.32it/s]
311it [00:20, 15.28it/s]

Epoch: 28 | train_loss: 0.0027 | train_acc: 0.9993 | train_F1_score: 0.9992 | train_recall: 0.9993 | train_precision: 0.9993 | val_loss: 0.0042 | val_acc: 0.9992 | val_F1_score: 0.9991 | val_recall: 0.9992 | val_precision: 0.9993



1243it [04:47,  4.32it/s]
311it [00:20, 15.38it/s]

Epoch: 29 | train_loss: 0.0022 | train_acc: 0.9995 | train_F1_score: 0.9993 | train_recall: 0.9994 | train_precision: 0.9994 | val_loss: 0.0064 | val_acc: 0.9986 | val_F1_score: 0.9981 | val_recall: 0.9983 | val_precision: 0.9984



1243it [04:46,  4.34it/s]
311it [00:20, 15.54it/s]

Epoch: 30 | train_loss: 0.0018 | train_acc: 0.9996 | train_F1_score: 0.9995 | train_recall: 0.9996 | train_precision: 0.9996 | val_loss: 0.0032 | val_acc: 0.9993 | val_F1_score: 0.9992 | val_recall: 0.9993 | val_precision: 0.9994



1243it [04:46,  4.34it/s]
311it [00:20, 15.44it/s]

Epoch: 31 | train_loss: 0.0023 | train_acc: 0.9995 | train_F1_score: 0.9994 | train_recall: 0.9996 | train_precision: 0.9994 | val_loss: 0.0058 | val_acc: 0.9989 | val_F1_score: 0.9989 | val_recall: 0.9990 | val_precision: 0.9991



1243it [04:46,  4.35it/s]
311it [00:20, 15.08it/s]


Epoch: 32 | train_loss: 0.0017 | train_acc: 0.9996 | train_F1_score: 0.9996 | train_recall: 0.9996 | train_precision: 0.9997 | val_loss: 0.0056 | val_acc: 0.9989 | val_F1_score: 0.9988 | val_recall: 0.9990 | val_precision: 0.9990


1243it [04:47,  4.32it/s]
311it [00:20, 15.09it/s]

Epoch: 33 | train_loss: 0.0018 | train_acc: 0.9996 | train_F1_score: 0.9995 | train_recall: 0.9995 | train_precision: 0.9995 | val_loss: 0.0046 | val_acc: 0.9991 | val_F1_score: 0.9989 | val_recall: 0.9991 | val_precision: 0.9991



1243it [04:47,  4.32it/s]
311it [00:20, 15.25it/s]

Epoch: 34 | train_loss: 0.0011 | train_acc: 0.9997 | train_F1_score: 0.9997 | train_recall: 0.9997 | train_precision: 0.9997 | val_loss: 0.0079 | val_acc: 0.9986 | val_F1_score: 0.9983 | val_recall: 0.9984 | val_precision: 0.9986



1243it [04:48,  4.31it/s]
311it [00:20, 15.18it/s]

Epoch: 35 | train_loss: 0.0018 | train_acc: 0.9996 | train_F1_score: 0.9994 | train_recall: 0.9995 | train_precision: 0.9994 | val_loss: 0.0064 | val_acc: 0.9987 | val_F1_score: 0.9980 | val_recall: 0.9982 | val_precision: 0.9982



1243it [04:48,  4.31it/s]
311it [00:20, 15.17it/s]

Epoch: 36 | train_loss: 0.0029 | train_acc: 0.9994 | train_F1_score: 0.9993 | train_recall: 0.9994 | train_precision: 0.9993 | val_loss: 0.0055 | val_acc: 0.9989 | val_F1_score: 0.9987 | val_recall: 0.9988 | val_precision: 0.9988



1243it [04:48,  4.32it/s]
311it [00:20, 15.16it/s]

Epoch: 37 | train_loss: 0.0010 | train_acc: 0.9998 | train_F1_score: 0.9997 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0039 | val_acc: 0.9991 | val_F1_score: 0.9988 | val_recall: 0.9990 | val_precision: 0.9989



1243it [04:48,  4.31it/s]
311it [00:20, 15.15it/s]

Epoch: 38 | train_loss: 0.0015 | train_acc: 0.9996 | train_F1_score: 0.9995 | train_recall: 0.9996 | train_precision: 0.9996 | val_loss: 0.0048 | val_acc: 0.9991 | val_F1_score: 0.9989 | val_recall: 0.9990 | val_precision: 0.9991



1243it [04:47,  4.32it/s]
311it [00:20, 15.20it/s]

Epoch: 39 | train_loss: 0.0019 | train_acc: 0.9996 | train_F1_score: 0.9995 | train_recall: 0.9996 | train_precision: 0.9995 | val_loss: 0.0038 | val_acc: 0.9993 | val_F1_score: 0.9991 | val_recall: 0.9992 | val_precision: 0.9992



1243it [04:47,  4.32it/s]
311it [00:20, 15.23it/s]

Epoch: 40 | train_loss: 0.0016 | train_acc: 0.9997 | train_F1_score: 0.9996 | train_recall: 0.9996 | train_precision: 0.9996 | val_loss: 0.0070 | val_acc: 0.9987 | val_F1_score: 0.9985 | val_recall: 0.9986 | val_precision: 0.9988



1243it [04:48,  4.31it/s]
311it [00:20, 15.23it/s]

Epoch: 41 | train_loss: 0.0017 | train_acc: 0.9996 | train_F1_score: 0.9995 | train_recall: 0.9996 | train_precision: 0.9995 | val_loss: 0.0059 | val_acc: 0.9988 | val_F1_score: 0.9985 | val_recall: 0.9987 | val_precision: 0.9987



1243it [04:48,  4.31it/s]
311it [00:20, 15.06it/s]

Epoch: 42 | train_loss: 0.0006 | train_acc: 0.9999 | train_F1_score: 0.9999 | train_recall: 0.9999 | train_precision: 0.9999 | val_loss: 0.0050 | val_acc: 0.9991 | val_F1_score: 0.9989 | val_recall: 0.9991 | val_precision: 0.9991



1243it [04:48,  4.31it/s]
311it [00:20, 15.11it/s]

Epoch: 43 | train_loss: 0.0019 | train_acc: 0.9996 | train_F1_score: 0.9995 | train_recall: 0.9996 | train_precision: 0.9996 | val_loss: 0.0060 | val_acc: 0.9988 | val_F1_score: 0.9983 | val_recall: 0.9985 | val_precision: 0.9984



1243it [04:47,  4.32it/s]
311it [00:20, 15.09it/s]

Epoch: 44 | train_loss: 0.0017 | train_acc: 0.9996 | train_F1_score: 0.9996 | train_recall: 0.9997 | train_precision: 0.9996 | val_loss: 0.0052 | val_acc: 0.9990 | val_F1_score: 0.9990 | val_recall: 0.9991 | val_precision: 0.9991



1243it [04:47,  4.32it/s]
311it [00:20, 15.09it/s]

Epoch: 45 | train_loss: 0.0007 | train_acc: 0.9999 | train_F1_score: 0.9998 | train_recall: 0.9999 | train_precision: 0.9999 | val_loss: 0.0063 | val_acc: 0.9988 | val_F1_score: 0.9985 | val_recall: 0.9986 | val_precision: 0.9987



1243it [04:47,  4.32it/s]
311it [00:20, 15.12it/s]

Epoch: 46 | train_loss: 0.0017 | train_acc: 0.9997 | train_F1_score: 0.9996 | train_recall: 0.9996 | train_precision: 0.9996 | val_loss: 0.0048 | val_acc: 0.9991 | val_F1_score: 0.9988 | val_recall: 0.9989 | val_precision: 0.9990



1243it [04:47,  4.32it/s]
311it [00:20, 15.25it/s]

Epoch: 47 | train_loss: 0.0011 | train_acc: 0.9998 | train_F1_score: 0.9997 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0059 | val_acc: 0.9989 | val_F1_score: 0.9986 | val_recall: 0.9987 | val_precision: 0.9988



1243it [04:47,  4.32it/s]
311it [00:20, 15.25it/s]

Epoch: 48 | train_loss: 0.0017 | train_acc: 0.9997 | train_F1_score: 0.9997 | train_recall: 0.9997 | train_precision: 0.9997 | val_loss: 0.0067 | val_acc: 0.9988 | val_F1_score: 0.9982 | val_recall: 0.9984 | val_precision: 0.9985



1243it [04:48,  4.31it/s]
311it [00:20, 15.10it/s]

Epoch: 49 | train_loss: 0.0007 | train_acc: 0.9999 | train_F1_score: 0.9999 | train_recall: 0.9999 | train_precision: 0.9999 | val_loss: 0.0040 | val_acc: 0.9993 | val_F1_score: 0.9993 | val_recall: 0.9995 | val_precision: 0.9993



1243it [04:48,  4.31it/s]
311it [00:20, 15.16it/s]

Epoch: 50 | train_loss: 0.0001 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0042 | val_acc: 0.9993 | val_F1_score: 0.9992 | val_recall: 0.9992 | val_precision: 0.9994



1243it [04:48,  4.31it/s]
311it [00:20, 15.17it/s]

Epoch: 51 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0047 | val_acc: 0.9992 | val_F1_score: 0.9990 | val_recall: 0.9990 | val_precision: 0.9991



1243it [04:49,  4.30it/s]
311it [00:20, 15.20it/s]

Epoch: 52 | train_loss: 0.0016 | train_acc: 0.9997 | train_F1_score: 0.9996 | train_recall: 0.9997 | train_precision: 0.9997 | val_loss: 0.0110 | val_acc: 0.9978 | val_F1_score: 0.9968 | val_recall: 0.9972 | val_precision: 0.9971



1243it [04:47,  4.32it/s]
311it [00:20, 15.19it/s]

Epoch: 53 | train_loss: 0.0022 | train_acc: 0.9995 | train_F1_score: 0.9995 | train_recall: 0.9996 | train_precision: 0.9995 | val_loss: 0.0057 | val_acc: 0.9990 | val_F1_score: 0.9989 | val_recall: 0.9991 | val_precision: 0.9991



1243it [04:47,  4.32it/s]
311it [00:20, 15.22it/s]

Epoch: 54 | train_loss: 0.0007 | train_acc: 0.9999 | train_F1_score: 0.9998 | train_recall: 0.9999 | train_precision: 0.9999 | val_loss: 0.0069 | val_acc: 0.9988 | val_F1_score: 0.9984 | val_recall: 0.9987 | val_precision: 0.9985



1243it [04:48,  4.31it/s]
311it [00:20, 15.22it/s]

Epoch: 55 | train_loss: 0.0014 | train_acc: 0.9997 | train_F1_score: 0.9997 | train_recall: 0.9997 | train_precision: 0.9997 | val_loss: 0.0061 | val_acc: 0.9990 | val_F1_score: 0.9988 | val_recall: 0.9989 | val_precision: 0.9991



1243it [04:47,  4.32it/s]
311it [00:20, 15.21it/s]

Epoch: 56 | train_loss: 0.0008 | train_acc: 0.9998 | train_F1_score: 0.9998 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0051 | val_acc: 0.9993 | val_F1_score: 0.9991 | val_recall: 0.9992 | val_precision: 0.9993



1243it [04:47,  4.32it/s]
311it [00:20, 15.29it/s]

Epoch: 57 | train_loss: 0.0011 | train_acc: 0.9998 | train_F1_score: 0.9997 | train_recall: 0.9997 | train_precision: 0.9997 | val_loss: 0.0060 | val_acc: 0.9990 | val_F1_score: 0.9988 | val_recall: 0.9989 | val_precision: 0.9989



1243it [04:47,  4.32it/s]
311it [00:20, 15.32it/s]

Epoch: 58 | train_loss: 0.0014 | train_acc: 0.9996 | train_F1_score: 0.9995 | train_recall: 0.9996 | train_precision: 0.9996 | val_loss: 0.0058 | val_acc: 0.9989 | val_F1_score: 0.9986 | val_recall: 0.9988 | val_precision: 0.9986



1243it [04:47,  4.32it/s]
311it [00:20, 15.34it/s]

Epoch: 59 | train_loss: 0.0010 | train_acc: 0.9998 | train_F1_score: 0.9998 | train_recall: 0.9999 | train_precision: 0.9998 | val_loss: 0.0062 | val_acc: 0.9989 | val_F1_score: 0.9987 | val_recall: 0.9989 | val_precision: 0.9988



1243it [04:47,  4.32it/s]
311it [00:20, 15.34it/s]

Epoch: 60 | train_loss: 0.0011 | train_acc: 0.9998 | train_F1_score: 0.9998 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0064 | val_acc: 0.9991 | val_F1_score: 0.9990 | val_recall: 0.9992 | val_precision: 0.9992



1243it [04:47,  4.32it/s]
311it [00:20, 15.24it/s]

Epoch: 61 | train_loss: 0.0008 | train_acc: 0.9998 | train_F1_score: 0.9998 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0044 | val_acc: 0.9993 | val_F1_score: 0.9990 | val_recall: 0.9992 | val_precision: 0.9991



1243it [04:47,  4.32it/s]
311it [00:20, 15.32it/s]

Epoch: 62 | train_loss: 0.0010 | train_acc: 0.9997 | train_F1_score: 0.9996 | train_recall: 0.9997 | train_precision: 0.9997 | val_loss: 0.0060 | val_acc: 0.9992 | val_F1_score: 0.9989 | val_recall: 0.9991 | val_precision: 0.9991



1243it [04:47,  4.32it/s]
311it [00:20, 15.32it/s]

Epoch: 63 | train_loss: 0.0021 | train_acc: 0.9996 | train_F1_score: 0.9996 | train_recall: 0.9996 | train_precision: 0.9996 | val_loss: 0.0045 | val_acc: 0.9991 | val_F1_score: 0.9990 | val_recall: 0.9991 | val_precision: 0.9992



1243it [04:47,  4.32it/s]
311it [00:20, 15.33it/s]


Epoch: 64 | train_loss: 0.0002 | train_acc: 1.0000 | train_F1_score: 0.9999 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0039 | val_acc: 0.9993 | val_F1_score: 0.9993 | val_recall: 0.9994 | val_precision: 0.9994


1243it [04:47,  4.32it/s]
311it [00:20, 15.41it/s]

Epoch: 65 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0041 | val_acc: 0.9994 | val_F1_score: 0.9992 | val_recall: 0.9993 | val_precision: 0.9994



1243it [04:47,  4.32it/s]
311it [00:20, 15.24it/s]

Epoch: 66 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0040 | val_acc: 0.9994 | val_F1_score: 0.9993 | val_recall: 0.9995 | val_precision: 0.9994



1243it [04:47,  4.32it/s]
311it [00:20, 15.40it/s]

Epoch: 67 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0042 | val_acc: 0.9994 | val_F1_score: 0.9994 | val_recall: 0.9994 | val_precision: 0.9994



1243it [04:47,  4.32it/s]
311it [00:20, 15.38it/s]

Epoch: 68 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0042 | val_acc: 0.9994 | val_F1_score: 0.9991 | val_recall: 0.9992 | val_precision: 0.9992



1243it [04:47,  4.32it/s]
311it [00:20, 15.30it/s]

Epoch: 69 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0042 | val_acc: 0.9994 | val_F1_score: 0.9994 | val_recall: 0.9994 | val_precision: 0.9995



1243it [04:48,  4.31it/s]
311it [00:20, 15.17it/s]

Epoch: 70 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0043 | val_acc: 0.9994 | val_F1_score: 0.9994 | val_recall: 0.9995 | val_precision: 0.9994



1243it [04:48,  4.31it/s]
311it [00:20, 15.25it/s]

Epoch: 71 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0044 | val_acc: 0.9994 | val_F1_score: 0.9995 | val_recall: 0.9996 | val_precision: 0.9995



1243it [04:47,  4.32it/s]
311it [00:20, 15.33it/s]

Epoch: 72 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0044 | val_acc: 0.9994 | val_F1_score: 0.9994 | val_recall: 0.9995 | val_precision: 0.9995



1243it [04:47,  4.32it/s]
311it [00:20, 14.84it/s]

Epoch: 73 | train_loss: 0.0000 | train_acc: 1.0000 | train_F1_score: 1.0000 | train_recall: 1.0000 | train_precision: 1.0000 | val_loss: 0.0046 | val_acc: 0.9994 | val_F1_score: 0.9992 | val_recall: 0.9993 | val_precision: 0.9993



1243it [04:47,  4.32it/s]
311it [00:20, 15.33it/s]

Epoch: 74 | train_loss: 0.0028 | train_acc: 0.9995 | train_F1_score: 0.9993 | train_recall: 0.9994 | train_precision: 0.9993 | val_loss: 0.0059 | val_acc: 0.9988 | val_F1_score: 0.9985 | val_recall: 0.9986 | val_precision: 0.9987



1243it [04:47,  4.32it/s]
311it [00:20, 15.35it/s]

Epoch: 75 | train_loss: 0.0013 | train_acc: 0.9998 | train_F1_score: 0.9997 | train_recall: 0.9997 | train_precision: 0.9997 | val_loss: 0.0073 | val_acc: 0.9986 | val_F1_score: 0.9984 | val_recall: 0.9985 | val_precision: 0.9987



1243it [04:47,  4.32it/s]
311it [00:20, 15.07it/s]

Epoch: 76 | train_loss: 0.0006 | train_acc: 0.9999 | train_F1_score: 0.9999 | train_recall: 0.9999 | train_precision: 0.9999 | val_loss: 0.0062 | val_acc: 0.9989 | val_F1_score: 0.9986 | val_recall: 0.9988 | val_precision: 0.9988



1243it [04:48,  4.31it/s]
311it [00:20, 15.42it/s]

Epoch: 77 | train_loss: 0.0008 | train_acc: 0.9998 | train_F1_score: 0.9996 | train_recall: 0.9996 | train_precision: 0.9996 | val_loss: 0.0057 | val_acc: 0.9990 | val_F1_score: 0.9989 | val_recall: 0.9990 | val_precision: 0.9991



1243it [04:47,  4.33it/s]
311it [00:20, 15.42it/s]

Epoch: 78 | train_loss: 0.0013 | train_acc: 0.9998 | train_F1_score: 0.9997 | train_recall: 0.9998 | train_precision: 0.9997 | val_loss: 0.0033 | val_acc: 0.9994 | val_F1_score: 0.9992 | val_recall: 0.9994 | val_precision: 0.9993



1243it [04:47,  4.33it/s]
311it [00:20, 15.37it/s]

Epoch: 79 | train_loss: 0.0006 | train_acc: 0.9998 | train_F1_score: 0.9998 | train_recall: 0.9999 | train_precision: 0.9998 | val_loss: 0.0029 | val_acc: 0.9994 | val_F1_score: 0.9992 | val_recall: 0.9993 | val_precision: 0.9993



1243it [04:46,  4.33it/s]
311it [00:20, 15.38it/s]

Epoch: 80 | train_loss: 0.0010 | train_acc: 0.9998 | train_F1_score: 0.9998 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0043 | val_acc: 0.9992 | val_F1_score: 0.9989 | val_recall: 0.9991 | val_precision: 0.9991



1243it [04:47,  4.33it/s]
311it [00:20, 15.34it/s]

Epoch: 81 | train_loss: 0.0004 | train_acc: 0.9999 | train_F1_score: 0.9999 | train_recall: 0.9999 | train_precision: 0.9999 | val_loss: 0.0031 | val_acc: 0.9993 | val_F1_score: 0.9992 | val_recall: 0.9994 | val_precision: 0.9993



1243it [04:47,  4.32it/s]
311it [00:20, 15.32it/s]


Epoch: 82 | train_loss: 0.0020 | train_acc: 0.9996 | train_F1_score: 0.9995 | train_recall: 0.9996 | train_precision: 0.9995 | val_loss: 0.0061 | val_acc: 0.9987 | val_F1_score: 0.9985 | val_recall: 0.9986 | val_precision: 0.9987


1243it [04:47,  4.33it/s]
311it [00:20, 15.30it/s]

Epoch: 83 | train_loss: 0.0007 | train_acc: 0.9999 | train_F1_score: 0.9998 | train_recall: 0.9999 | train_precision: 0.9998 | val_loss: 0.0050 | val_acc: 0.9992 | val_F1_score: 0.9991 | val_recall: 0.9991 | val_precision: 0.9992



1243it [04:47,  4.32it/s]
311it [00:20, 15.44it/s]

Epoch: 84 | train_loss: 0.0008 | train_acc: 0.9999 | train_F1_score: 0.9998 | train_recall: 0.9999 | train_precision: 0.9998 | val_loss: 0.0045 | val_acc: 0.9991 | val_F1_score: 0.9991 | val_recall: 0.9993 | val_precision: 0.9992



1243it [04:47,  4.33it/s]
311it [00:20, 15.39it/s]

Epoch: 85 | train_loss: 0.0009 | train_acc: 0.9998 | train_F1_score: 0.9997 | train_recall: 0.9997 | train_precision: 0.9997 | val_loss: 0.0039 | val_acc: 0.9992 | val_F1_score: 0.9990 | val_recall: 0.9991 | val_precision: 0.9992



1243it [04:47,  4.32it/s]
311it [00:20, 15.32it/s]

Epoch: 86 | train_loss: 0.0006 | train_acc: 0.9999 | train_F1_score: 0.9998 | train_recall: 0.9999 | train_precision: 0.9998 | val_loss: 0.0095 | val_acc: 0.9986 | val_F1_score: 0.9985 | val_recall: 0.9987 | val_precision: 0.9987



1243it [04:47,  4.32it/s]
311it [00:20, 15.41it/s]

Epoch: 87 | train_loss: 0.0008 | train_acc: 0.9998 | train_F1_score: 0.9998 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0058 | val_acc: 0.9990 | val_F1_score: 0.9987 | val_recall: 0.9989 | val_precision: 0.9990



1243it [04:47,  4.33it/s]
311it [00:20, 15.41it/s]

Epoch: 88 | train_loss: 0.0004 | train_acc: 0.9999 | train_F1_score: 0.9999 | train_recall: 0.9999 | train_precision: 0.9999 | val_loss: 0.0093 | val_acc: 0.9984 | val_F1_score: 0.9977 | val_recall: 0.9981 | val_precision: 0.9978



1243it [04:47,  4.32it/s]
311it [00:20, 15.47it/s]

Epoch: 89 | train_loss: 0.0008 | train_acc: 0.9999 | train_F1_score: 0.9998 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0063 | val_acc: 0.9990 | val_F1_score: 0.9988 | val_recall: 0.9989 | val_precision: 0.9989



1243it [04:47,  4.33it/s]
311it [00:20, 15.44it/s]

Epoch: 90 | train_loss: 0.0006 | train_acc: 0.9999 | train_F1_score: 0.9999 | train_recall: 0.9999 | train_precision: 0.9999 | val_loss: 0.0052 | val_acc: 0.9992 | val_F1_score: 0.9990 | val_recall: 0.9992 | val_precision: 0.9991



1243it [04:47,  4.33it/s]
311it [00:20, 15.36it/s]

Epoch: 91 | train_loss: 0.0009 | train_acc: 0.9998 | train_F1_score: 0.9997 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0068 | val_acc: 0.9989 | val_F1_score: 0.9988 | val_recall: 0.9990 | val_precision: 0.9990



1243it [04:47,  4.32it/s]
311it [00:20, 15.41it/s]

Epoch: 92 | train_loss: 0.0014 | train_acc: 0.9997 | train_F1_score: 0.9996 | train_recall: 0.9997 | train_precision: 0.9996 | val_loss: 0.0061 | val_acc: 0.9989 | val_F1_score: 0.9986 | val_recall: 0.9987 | val_precision: 0.9988



1243it [04:47,  4.32it/s]
311it [00:20, 15.36it/s]

Epoch: 93 | train_loss: 0.0008 | train_acc: 0.9998 | train_F1_score: 0.9998 | train_recall: 0.9998 | train_precision: 0.9998 | val_loss: 0.0045 | val_acc: 0.9992 | val_F1_score: 0.9990 | val_recall: 0.9992 | val_precision: 0.9990



817it [03:08,  4.31it/s]

In [None]:
# Save Model
torch.save(
        obj=model.state_dict(),
        f=f"/kaggle/working/128_epochs.pth"
)