In [2]:
import torch
import argparse
import pickle
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms
import time

def one_hot_encode(labels, num_classes):
    return torch.nn.functional.one_hot(labels, num_classes=num_classes)

def load_data(data_path, batch_size, num_workers):
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),  
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  
        transforms.RandomRotation(degrees=10),  
        transforms.GaussianBlur(kernel_size=3),  
        transforms.Resize((256, 256)),  
        transforms.ToTensor(),  
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))   
    ])

    data_root = data_path
    
    print("Starting to load data...")

    # Create ImageFolder datasets for train, val, and test
    train_dataset = ImageFolder(root=data_root + '/train', transform=transform)
    val_dataset = ImageFolder(root=data_root + '/val', transform=transform)
    test_dataset = ImageFolder(root=data_root + '/test', transform=transform)

    # Create DataLoaders for train, val, and test
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    # Start a separate thread to display a spinning bar
#     def show_spinner():
#         spinner = ['-', '\\', '|', '/']
#         idx = 0
#         while True:
#             print(f"\rData Loading in Process... {spinner[idx % len(spinner)]}", end="", flush=True)
#             idx += 1
#             time.sleep(0.1)

#     spinner_thread = threading.Thread(target=show_spinner)
#     spinner_thread.daemon = True
#     spinner_thread.start()
    
    class_labels_dict = {class_name: label for label, class_name in enumerate(train_dataset.classes, start=0)}
    reverse_class_labels_dict = {label: class_name for class_name, label in class_labels_dict.items()}
    
    print("\n")
    for class_name in class_labels_dict:
        print(f"{class_name} has Class Index {class_labels_dict[class_name]}\n")

    for data, labels in train_loader:
        one_hot_labels = one_hot_encode(labels, num_classes=25)
            
    print("\nData loading completed.")
            
    return train_loader, val_loader, test_loader

# Example usage:
data_path = "/kaggle/input/data-files/Birds_25"
batch_size = 32
num_workers = 4
train_loader, val_loader,test_loader = load_data(data_path, batch_size, num_workers)


Starting to load data...


Asian-Green-Bee-Eater has Class Index 0

Brown-Headed-Barbet has Class Index 1

Cattle-Egret has Class Index 2

Common-Kingfisher has Class Index 3

Common-Myna has Class Index 4

Common-Rosefinch has Class Index 5

Common-Tailorbird has Class Index 6

Coppersmith-Barbet has Class Index 7

Forest-Wagtail has Class Index 8

Gray-Wagtail has Class Index 9

Hoopoe has Class Index 10

House-Crow has Class Index 11

Indian-Grey-Hornbill has Class Index 12

Indian-Peacock has Class Index 13

Indian-Pitta has Class Index 14

Indian-Roller has Class Index 15

Jungle-Babbler has Class Index 16

Northern-Lapwing has Class Index 17

Red-Wattled-Lapwing has Class Index 18

Ruddy-Shelduck has Class Index 19

Rufous-Treepie has Class Index 20

Sarus-Crane has Class Index 21

White-Breasted-Kingfisher has Class Index 22

White-Breasted-Waterhen has Class Index 23

White-Wagtail has Class Index 24


Data loading completed.


In [3]:
import torch
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class IdentityNorm(nn.Module):
    def __init__(self, num_features, eps=1e-5, momentum=0.1):
        super(IdentityNorm, self).__init__()
        self.momentum = momentum
        self.running_mean = 0
        self.running_var = 0
        self.eps = torch.tensor(eps)
        self.num_features = num_features
        shape = (1, self.num_features, 1, 1)
        self.gamma = nn.Parameter(torch.empty(shape))
        self.beta = nn.Parameter(torch.empty(shape))

        self._param_init()

    def _param_init(self):
        nn.init.zeros_(self.beta)
        nn.init.ones_(self.gamma)

    def forward(self, x):
        return x


class StandardBatchNorm(nn.Module):
    def __init__(self, num_features, eps=1e-5, momentum=0.1):
        super(StandardBatchNorm, self).__init__()
        self.momentum = momentum
        self.running_mean = 0
        self.running_var = 0
        self.eps = torch.tensor(eps)
        self.num_features = num_features
        shape = (1, self.num_features, 1, 1)
        self.gamma = nn.Parameter(torch.empty(shape))
        self.beta = nn.Parameter(torch.empty(shape))

        self._param_init()

    def _param_init(self):
        nn.init.zeros_(self.beta)
        nn.init.ones_(self.gamma)

    def forward(self, x):
        if self.training:
            n = x.numel() / x.size(1)
            dimensions = (0, 2, 3)
            var = x.var(dim=dimensions, keepdim=True, unbiased=False)
            mean = x.mean(dim=dimensions, keepdim=True)

            with torch.no_grad():
                self.running_mean = self.momentum * mean + (1 - self.momentum) * self.running_mean
                self.running_var = self.momentum * (n / (n - 1)) * var + (1 - self.momentum) * self.running_var

        else:
            mean = self.running_mean
            var = self.running_var
        dn = torch.sqrt(var + self.eps)
        x = (x - mean) / dn

        x = x * self.gamma + self.beta

        return x


class InstanceNorm(nn.Module):
    def __init__(self, num_features, eps=1e-5, momentum=0.1):
        super(InstanceNorm, self).__init__()
        self.momentum = momentum
        self.running_mean = 0
        self.running_var = 0
        self.eps = torch.tensor(eps)
        self.num_features = num_features
        shape = (1, self.num_features, 1, 1)


        self.gamma = nn.Parameter(torch.empty(shape))
        self.beta = nn.Parameter(torch.empty(shape))

        self._param_init()


    def _param_init(self):
        nn.init.zeros_(self.beta)
        nn.init.ones_(self.gamma)


    def forward(self, x):
        N, C, H, W = x.shape

        assert C == self.num_features
        dimensions = (2,3)
        if self.training:
            mean = x.mean(dim=dimensions, keepdim=True)
            var = x.var(dim=dimensions, keepdim=True)
        else:            
            mean = x.mean(dim=dimensions, keepdim=True)
            var = x.var(dim=dimensions, keepdim=True)
        dn = torch.sqrt(var + self.eps)
        x = (x - mean)/ dn
        x = x * self.gamma + self.beta

        return x


class LayerNorm(nn.Module):
    def __init__(self, num_features, eps=1e-5, momentum=0.1):
        super(LayerNorm, self).__init__()
        self.eps = torch.tensor(eps)
        self.num_features = num_features
        shape = (1, self.num_features, 1, 1)

        self.gamma = nn.Parameter(torch.empty(shape))
        self.beta = nn.Parameter(torch.empty(shape))

        self._param_init()


    def _param_init(self):
        nn.init.zeros_(self.beta)
        nn.init.ones_(self.gamma)

    def forward(self, x):
        
        N, C, H, W = x.shape

        assert C == self.num_features
        dimensions = (1,2,3)
        if self.training:
            mean = x.mean(dim=dimensions, keepdim=True)            
            var = x.var(dim=dimensions, keepdim=True)
        else:
            mean = x.mean(dim=dimensions, keepdim=True)            
            var = x.var(dim=dimensions, keepdim=True)
        dn = torch.sqrt(var + self.eps)
        x = (x - mean)/ dn

        x = x * self.gamma + self.beta

        return x



class GroupNorm(nn.Module):
    def __init__(self, num_features, eps=1e-5, group=4):
        super(GroupNorm,self).__init__()
        self.eps = torch.tensor(eps)
        self.num_features = num_features
        self.group = group        
        shape = (1, self.num_features, 1, 1)


        self.gamma = nn.Parameter(torch.empty(shape))
        self.beta = nn.Parameter(torch.empty(shape))

        self._param_init()


    def _param_init(self):
        nn.init.zeros_(self.beta)
        nn.init.ones_(self.gamma)

    def forward(self, x):
        N, C, H, W = x.shape
        assert C % self.group == 0
        assert self.num_features == C

        x = x.view(N, self.group, int(C / self.group), H, W)
        dimensions = (1,2,3)
        mean = x.mean(dim=dimensions, keepdim=True)
        var = x.var(dim=dimensions, keepdim=True)
        dn = torch.sqrt(var + self.eps)
        x = (x - mean)/ dn
        x = x.view(N, C, H, W)
        
        x = x * self.gamma + self.beta

        return x

class BatchInstanceNorm(nn.Module):
    def __init__(self, num_features, momentum = 0.1, eps=1e-5, rho=0.5):
        super(BatchInstanceNorm, self).__init__()
        self.momentum = momentum
        self.running_mean = 0
        self.running_var = 0
        self.eps = torch.tensor(eps)
        self.num_features = num_features
        self.rho = rho
        shape = (1, self.num_features, 1, 1)


        self.gamma = nn.Parameter(torch.empty(shape))
        self.beta = nn.Parameter(torch.empty(shape))

        self._param_init()


    def _param_init(self):
        nn.init.zeros_(self.beta)
        nn.init.ones_(self.gamma)

    
    def forward(self, x):
        if self.training:            
                
            n = x.numel() / x.size(1)
            dimensions = (0,2,3)
            var_bn = x.var(dim=dimensions, keepdim=True, unbiased=False)
            mean_bn = x.mean(dim=dimensions, keepdim=True)

            with torch.no_grad():
                
                self.running_mean = self.momentum * mean_bn + (1 - self.momentum) * self.running_mean
                self.running_var = self.momentum * (n/(n-1)) * var_bn + (1 - self.momentum) * self.running_var

        else:
            mean_bn = self.running_mean
            var_bn = self.running_var
        dn = torch.sqrt(var_bn + self.eps)
        x_bn = (x - mean_bn)/ dn
        dimensions = (2,3)
        mean_in = x.mean(dim=dimensions, keepdim=True)
        var_in = x.var(dim=dimensions, keepdim=True)
        dn = torch.sqrt(var_in + self.eps)
        x_in = (x - mean_in)/ dn

        x = self.rho * x_bn + (1-self.rho) * x_in

        x = x * self.gamma + self.beta

        return x

In [4]:
import time 
import torch 
import torch.nn as nn 
import torch.optim as optim 
from tqdm import tqdm 
from sklearn.metrics import f1_score, accuracy_score
import numpy as np
import matplotlib.pyplot as plt 
# from resnet import ResNet, Block 
import argparse
import pickle
import os 

In [5]:

# from norm import StandardBatchNorm, InstanceNorm, BatchInstanceNorm, LayerNorm, GroupNorm, IdentityNorm


class Block(nn.Module):
    def __init__(self, in_channels, intermediate_channels, identity_downsample=None, stride=1, norm_type='bn'):
        super().__init__()
        self.expansion = 2
        self.conv1 = nn.Conv2d(in_channels, intermediate_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        
        if norm_type == 'bn':
            self.nl1 = StandardBatchNorm(intermediate_channels)
            self.nl2 = StandardBatchNorm(intermediate_channels* self.expansion)
        elif norm_type == 'in':
            self.nl1 = InstanceNorm(intermediate_channels)
            self.nl2 = InstanceNorm(intermediate_channels* self.expansion)
        elif norm_type == 'gn':
            self.nl1 = GroupNorm(num_features=intermediate_channels)  # Assuming a group size of 4
            self.nl2 = GroupNorm(num_features=intermediate_channels* self.expansion)
        elif norm_type == 'bin':
            self.nl1 = BatchInstanceNorm(intermediate_channels)
            self.nl2 = BatchInstanceNorm(intermediate_channels* self.expansion)
        elif norm_type == 'ln':
            self.nl1 = LayerNorm(num_features=intermediate_channels)
            self.nl2 = LayerNorm(num_features=intermediate_channels* self.expansion)
        elif norm_type=='nn':
            self.nl1= IdentityNorm(intermediate_channels)
            self.nl2 = IdentityNorm(intermediate_channels* self.expansion)
            
        self.conv2 = nn.Conv2d(intermediate_channels, intermediate_channels * self.expansion, kernel_size=3, stride=1, padding=1, bias=False)    
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
        self.stride = stride

    def forward(self, x):
        identity = x.clone()

        x = self.conv1(x)
        x = self.nl1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.nl2(x)

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)

        x += identity
        x = self.relu(x)
        return x

class ResNet(nn.Module):
    def __init__(self, block, layers, image_channels, num_classes, norm_type):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv1 = nn.Conv2d(image_channels, 16, kernel_size=3, stride=1, padding=1, bias=False)

        if norm_type == 'bn':
            self.norm = StandardBatchNorm(16)
            self.gamma = self.norm.gamma
            self.beta = self.norm.beta
            
        elif norm_type == 'in':
            self.norm = InstanceNorm(16)
            self.gamma = self.norm.gamma
            self.beta = self.norm.beta
            
        elif norm_type == 'gn':
            self.norm = GroupNorm(group=4, num_features=16)  # Assuming a group size of 4
            self.gamma = self.norm.gamma
            self.beta = self.norm.beta
            
        elif norm_type == 'bin':
            self.norm = BatchInstanceNorm(16)
            self.gamma = self.norm.gamma
            self.beta = self.norm.beta
            
        elif norm_type == 'ln':
            self.norm = LayerNorm(16)
            self.gamma = self.norm.gamma
            self.beta = self.norm.beta
            
        elif norm_type=='nn':
            self.norm= IdentityNorm(16)
            self.gamma = self.norm.gamma
            self.beta = self.norm.beta
        else:
            raise ValueError("Invalid normalization type. Choose from 'batch', 'instance', or 'group'.")
        self.relu = nn.ReLU()
        
        self.layer1 = self._make_layer(block, layers[0], intermediate_channels=16, stride=1, norm_type=norm_type)
        self.layer2 = self._make_layer(block, layers[1], intermediate_channels=32, stride=2, norm_type=norm_type)
        self.layer3 = self._make_layer(block, layers[2], intermediate_channels=64, stride=2, norm_type=norm_type)
        
        self.avgpool = nn.AdaptiveAvgPool2d((8,8))
        self.fc = nn.Linear(128 * 8*8, num_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.norm(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x
    
    
    def _make_layer(self, block, num_residual_blocks, intermediate_channels, stride, norm_type):
        identity_downsample = None
        layers = []

        if stride != 1 or self.in_channels != intermediate_channels * 2:
            if norm_type == 'gn':
                num_groups = 4  # Adjust according to your group size preference
                identity_downsample = nn.Sequential(
                    nn.Conv2d(self.in_channels, intermediate_channels * 2, kernel_size=1, stride=stride, bias=False),
                    GroupNorm(num_features=intermediate_channels * 2, group=num_groups),
                )
            elif norm_type == 'bn':
                identity_downsample = nn.Sequential(
                    nn.Conv2d(self.in_channels, intermediate_channels * 2, kernel_size=1, stride=stride, bias=False),
                    StandardBatchNorm(intermediate_channels * 2)
                )
            elif norm_type == 'in':
                identity_downsample = nn.Sequential(
                    nn.Conv2d(self.in_channels, intermediate_channels * 2, kernel_size=1, stride=stride, bias=False),
                    InstanceNorm(intermediate_channels * 2)
                )
            elif norm_type == 'bin':
                identity_downsample = nn.Sequential(
                    nn.Conv2d(self.in_channels, intermediate_channels * 2, kernel_size=1, stride=stride, bias=False),
                    BatchInstanceNorm(intermediate_channels * 2)
                )
            elif norm_type == 'ln':
                identity_downsample = nn.Sequential(
                    nn.Conv2d(self.in_channels, intermediate_channels * 2, kernel_size=1, stride=stride, bias=False),
                    LayerNorm(intermediate_channels * 2)
                )
            elif norm_type == 'nn':
                identity_downsample = nn.Sequential(
                    nn.Conv2d(self.in_channels, intermediate_channels * 2, kernel_size=1, stride=stride, bias=False),
                    IdentityNorm(intermediate_channels * 2)
                )

        layers.append(block(self.in_channels, intermediate_channels, identity_downsample, stride, norm_type))
        self.in_channels = intermediate_channels * 2

        for i in range(num_residual_blocks - 1):
            layers.append(block(self.in_channels, intermediate_channels, norm_type=norm_type))

        return nn.Sequential(*layers)


In [6]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7f3689d95180>

In [7]:
val_loader

<torch.utils.data.dataloader.DataLoader at 0x7f3689d95210>

In [8]:
import os
def train(model_name, n, batch_size, num_epochs, use_early_stopping, patience, num_classes, opt, lr,norm_type, num_workers,train_loader, val_loader):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)



    num_epochs = num_epochs
    num_classes= num_classes

    def ResNetModel(img_channel=3, num_classes=25, norm_type=norm_type):
        print(f"\nNorm in Train is: {norm_type}\n")
        return ResNet(Block, [n,n,n], img_channel, num_classes, norm_type)


    model = ResNetModel(img_channel=3, norm_type= norm_type,num_classes=25).to(device)
    
#     if torch.cuda.device_count() > 1:
#         print("Using", torch.cuda.device_count(), "GPUs!")
#         model = nn.DataParallel(model)
        
    criterion = nn.CrossEntropyLoss()
    if opt=="SGD":
        optimizer = optim.SGD(model.parameters(), lr=lr)
        
    if opt=="AdaGrad":
        optimizer = optim.Adagrad(model.parameters(), lr=lr)
        
    if opt=="RMSprop":
        optimizer = optim.RMSprop(model.parameters(), lr=lr)
        
    if opt=="Adam":
        optimizer = optim.Adam(model.parameters(), lr=lr)
        
        
    
    model= model.to(device)
    # Set the number of epochs
    train_losses = []
    train_accuracies = []
    train_micro_f1_scores = []
    train_macro_f1_scores = []
    val_losses = []
    val_accuracies = []
    val_micro_f1_scores = []
    val_macro_f1_scores = []

    # Early stopping parameters
    use_early_stopping = use_early_stopping
    patience = patience 
    early_stopping_counter = 0
    best_val_loss = np.inf
    
    folder_name = f'/kaggle/working/model_name_{model_name}'
    
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
        
    csv_file_path = os.path.join(folder_name, f'{model_name}_metrics.csv')
    
    with open(csv_file_path, 'w') as f:
        f.write("Epoch,Train Loss,Train Accuracy,Train Micro F1,Train Macro F1,Val Loss,Val Accuracy,Val Micro F1,Val Macro F1\n")
        
    

    # Training loop
    total_start_time = time.time()
    for epoch in range(num_epochs):
        epoch_start_time = time.time()
        model.train()
        running_loss = 0
        all_labels = []
        all_predictions = []

        for data, labels in tqdm(train_loader, desc=f'Training - Epoch {epoch + 1}/{num_epochs}', leave=False):
            x = data.to(device)
            y = labels.to(device)

            optimizer.zero_grad()
            y_hat = model(x)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            all_labels.extend(y.cpu().numpy())
            all_predictions.extend(torch.argmax(y_hat, dim=1).cpu().numpy())

        epoch_train_time = time.time() - epoch_start_time
        average_loss = running_loss / len(train_loader)
        accuracy = accuracy_score(all_labels, all_predictions)
        micro_f1 = f1_score(all_labels, all_predictions, average='micro')
        macro_f1 = f1_score(all_labels, all_predictions, average='macro')

        print(f"Train - Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}, Accuracy: {accuracy:.4f}, Micro F1: {micro_f1:.4f}, Macro F1: {macro_f1:.4f}, Time: {epoch_train_time:.2f} seconds")

        train_losses.append(average_loss)
        train_accuracies.append(accuracy)
        train_micro_f1_scores.append(micro_f1)
        train_macro_f1_scores.append(macro_f1)

        epoch_val_start_time = time.time()
        model.eval()
        val_running_loss = 0
        val_all_labels = []
        val_all_predictions = []

        with torch.no_grad():
            for val_data, val_labels in tqdm(val_loader, desc=f'Validation - Epoch {epoch + 1}/{num_epochs}', leave=False):
                val_x = val_data.to(device)
                val_y = val_labels.to(device)
                val_y_hat = model(val_x)
                val_loss = criterion(val_y_hat, val_y)
                val_running_loss += val_loss.item()
                val_all_labels.extend(val_y.cpu().numpy())
                val_all_predictions.extend(torch.argmax(val_y_hat, dim=1).cpu().numpy())

        epoch_val_time = time.time() - epoch_val_start_time
        val_average_loss = val_running_loss / len(val_loader)
        val_accuracy = accuracy_score(val_all_labels, val_all_predictions)
        val_micro_f1 = f1_score(val_all_labels, val_all_predictions, average='micro')
        val_macro_f1 = f1_score(val_all_labels, val_all_predictions, average='macro')

        print(f"Validation - Epoch [{epoch + 1}/{num_epochs}], Loss: {val_average_loss:.4f}, Accuracy: {val_accuracy:.4f}, Micro F1: {val_micro_f1:.4f}, Macro F1: {val_macro_f1:.4f}, Time: {epoch_val_time:.2f} seconds")

        val_losses.append(val_average_loss)
        val_accuracies.append(val_accuracy)
        val_micro_f1_scores.append(val_micro_f1)
        val_macro_f1_scores.append(val_macro_f1)

        if use_early_stopping:
            if val_average_loss < best_val_loss:
                best_val_loss = val_average_loss
                early_stopping_counter = 0
            else:
                early_stopping_counter += 1

            if early_stopping_counter >= patience:
                print(f"Early stopping triggered after {epoch + 1} epochs without improvement.")
                break
        
        with open(csv_file_path, 'a') as f:
            f.write(f"{epoch + 1},{average_loss},{accuracy},{micro_f1},{macro_f1},{val_average_loss},{val_accuracy},{val_micro_f1},{val_macro_f1}\n")
            
            
        print(f"File created and saved successfully!!\n")
            
        torch.save(model.state_dict(), f'{folder_name}/{model_name}_model.pth')
        print("Saved Model !!")

    total_train_time = time.time() - total_start_time
    print(f"Total Training Time: {total_train_time / 60:.2f} minutes")





    def plot_with_grid(x, train_data, val_data, train_label, val_label, xlabel, ylabel, title, x_interval=5, folder_name=None):
        plt.figure(figsize=(8, 4))
        plt.plot(x, train_data, label=train_label)
        plt.plot(x, val_data, label=val_label)
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.title(title)
        plt.legend()
        plt.xticks(np.arange(min(x), max(x)+1, x_interval))
        plt.grid(True, which='both', linestyle='--', linewidth=0.5)
        
        if folder_name is not None:
            os.makedirs(folder_name, exist_ok=True)
            file_path = os.path.join(folder_name, f"{title.replace(' ', '_').lower()}.png")
            plt.savefig(file_path)
        plt.show()

    # Training Loss
    plot_with_grid(range(len(train_losses)), train_losses, val_losses, 'Train', 'Validation', 'Epoch', 'Loss', 'Training and Validation Loss Curves', folder_name=folder_name)

    # Training Accuracy
    plot_with_grid(range(len(train_accuracies)), train_accuracies, val_accuracies, 'Train', 'Validation', 'Epoch', 'Accuracy', 'Training and Validation Accuracy Curves', folder_name=folder_name)

    # Micro F1 Scores
    plot_with_grid(range(len(train_micro_f1_scores)), train_micro_f1_scores, val_micro_f1_scores, 'Train Micro F1', 'Validation Micro F1', 'Epoch', 'F1 Score', 'Micro F1 Score Curves', folder_name=folder_name)

    # Macro F1 Scores
    plot_with_grid(range(len(train_macro_f1_scores)), train_macro_f1_scores, val_macro_f1_scores, 'Train Macro F1', 'Validation Macro F1', 'Epoch', 'F1 Score', 'Macro F1 Score Curves', folder_name=folder_name)
    
    return train_losses, train_accuracies, train_micro_f1_scores,train_macro_f1_scores, val_losses, val_accuracies,val_micro_f1_scores, val_macro_f1_scores

In [9]:
model_name= "n2bs32ep50nc25optAdamlr-4normbnesF"
n =2
batch_size = 32
num_epochs = 50
num_classes = 25
opt = "Adam"
lr = 1e-4
norm_type = "bn"
num_workers = 4
use_early_stopping=  False
patience=2

In [None]:
train(model_name, n, batch_size, num_epochs, use_early_stopping, patience, num_classes, opt, lr,norm_type, num_workers,train_loader, val_loader)

cuda

Norm in Train is: bn



Training - Epoch 1/50:  62%|██████▏   | 574/920 [03:35<02:08,  2.69it/s]