In [1]:
# External libraries used for various tasks in later parts of the code
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import xml.etree.ElementTree as ET
from torch.utils.data import Dataset
from imutils import paths
import torch
import torch.nn as nn
import time
import numpy as np
import torch.optim as optim
from tqdm.notebook import tqdm
import os

# Check if running on Google Colab and set the data paths accordingly
is_running_on_colab = 'COLAB_GPU' in os.environ
if is_running_on_colab:
    from google.colab import drive
    drive.mount('/content/gdrive')
    data_path = '/content/gdrive/MyDrive/license_plate/data/'
    test_path = '/content/gdrive/MyDrive/license_plate/testdata/'
else:
    current_dir = os.getcwd()
    data_path = os.path.join(current_dir, 'dataset', 'license_plate', 'data')
    test_path = os.path.join(current_dir, 'dataset', 'license_plate', 'test_data')

# Function to check if directories exist
def check_directories_exist(*paths):
    for path in paths:
        assert os.path.isdir(path), f"The directory {path} does not exist."

# Check the directories
check_directories_exist(data_path, test_path)

data_link = "https://drive.google.com/open?id=1rdEsCUcIUaYOVRkx5IMTRNA7PcGMmSgc"

Mounted at /content/gdrive


In [2]:
from torch.utils.data import Dataset
import cv2
from imutils import paths
import os
import torch

class LabelFpsDataLoader(Dataset):
    def __init__(self, img_dirs, img_size, is_transform=None):
        self.img_paths = [p for img_dir in img_dirs for p in paths.list_images(img_dir)]
        self.img_size = img_size
        self.is_transform = is_transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, index):
        img_name = self.img_paths[index]
        img = cv2.imread(img_name)
        resized_image = self.transform_image(img)

        file_name = os.path.splitext(os.path.basename(img_name))[0]
        labels = self.extract_labels(file_name, img.shape)

        return resized_image, labels, img_name

    def transform_image(self, img):
        resized_image = cv2.resize(img, self.img_size).astype('float32') / 255.0
        # print(resized_image.dtype)
        # Normalization - Adjust mean and std if necessary
        mean = np.array([0.485, 0.456, 0.406], dtype=resized_image.dtype)
        std = np.array([0.229, 0.224, 0.225],dtype=resized_image.dtype)

        resized_image = (resized_image - mean) / std  # Normalize
        # print(resized_image.dtype)
        return resized_image

    def extract_labels(self, file_name, img_shape):
        parts = file_name.split('-')
        points = [self.parse_point(part) for part in parts[2].split('_')]
        ori_w, ori_h = img_shape[1], img_shape[0]
        scale_w, scale_h = self.img_size[0] / ori_w, self.img_size[1] / ori_h
        scaled_points = [(int(x * scale_w), int(y * scale_h)) for x, y in points]

        #rescale labels to 0,1
        scaled_points = [(x / self.img_size[0], y / self.img_size[1]) for x, y in scaled_points]

        flattened_labels = [coord for point in scaled_points for coord in point]
        return torch.tensor(flattened_labels, dtype=torch.float32)

    @staticmethod
    def parse_point(point_str):
        return tuple(int(coord) for coord in point_str.split('&'))


In [3]:
# Initialize your dataset
img_dirs = [data_path]  # Replace with actual paths to your image directories
img_size = (224, 224)  # Replace with your desired image size
dataset = LabelFpsDataLoader(img_dirs=img_dirs, img_size=img_size)  # Use 'img_dirs' as the correct argument name

# Test the length of the dataset
print(f"Length of dataset: {len(dataset)}")
#
# Test getting an item
try:
    # Retrieve the first item
    img, labels, img_name = dataset[0]

    # Check the shapes and types
    print(f"Image shape: {img.shape}")
    print(f"Labels: {labels}")
    print(f"Image name: {img_name}")

    # If the code reaches this point, the item has been retrieved successfully
    print("Item retrieval successful.")
except Exception as e:
    # If there is any error, print it out
    print(f"An error occurred: {e}")
finally:
    print("Got out here")


Length of dataset: 5769
Image shape: (224, 224, 3)
Labels: tensor([0.2321, 0.4330, 0.6250, 0.5045])
Image name: /content/gdrive/MyDrive/license_plate/data/train/0236447482639-90_96-168&506_451&590-444&590_171&587_168&506_451&513-0_0_3_24_30_26_30_33-114-166.jpg
Item retrieval successful.
Got out here


In [4]:
import numpy as np
import cv2
import matplotlib.pyplot as plt

def plot_img_and_boundingbox(image, labels):
    # Assuming image is a PyTorch tensor, we need to first move it to CPU if it's on GPU,
    # then detach it from the current graph, convert to numpy, and finally transpose it
    # from (channels, height, width) to (height, width, channels)
    numpy_array = image.cpu().detach().numpy()

    # Convert from BGR to RGB format if the image was read by OpenCV
    rgb_image = cv2.cvtColor(numpy_array, cv2.COLOR_BGR2RGB)

    # Extract numerical values from tensors and convert to integers
    left_up = (int(labels[0]), int(labels[1]))
    right_down = (int(labels[2]), int(labels[3]))

    # Draw rectangle on the image
    new_image = cv2.rectangle(rgb_image, left_up, right_down, (255, 0, 0), 2)

    # Display the image
    plt.imshow(new_image)
    plt.axis('off')  # Hide the axis
    plt.show()

# Example usage:
# img, labels, _ = dataset[0]  # Assuming this is how you get an image and labels from your dataset
# plot_img_and_boundingbox(img, labels)


In [5]:
import torch
import torch.nn as nn


# Function to create the modified classifier
def create_custom_classifier(input_features, hidden_units, output_features):
    classifier = nn.Sequential(
        nn.Linear(input_features, hidden_units),
        nn.ReLU(),
        nn.Dropout(0.0),
        #nn.BatchNorm1d(hidden_units),
        nn.Linear(hidden_units, output_features),
    )
    return classifier

# Load a pre-trained MobileNetV3 model
model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v3_large', pretrained=True)

# Replace the classifier with a new one - adjust the input_features to match the last layer of MobileNetV3
model.classifier = create_custom_classifier(input_features=960, hidden_units=100, output_features=4)
#Sets model to training mode,e this is important for layers that have different functionalities depending on training or eval. for example BatchNorm
model.train()
def get_new_model(n_hidden=100):
    model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v3_large', pretrained=True)
    model.classifier = create_custom_classifier(input_features=960, hidden_units=n_hidden, output_features=4)
    model.train()
    # Number of layers you want to fine-tune
    layers_of_classifier = len(model.classifier)

    # Get all the parameters from the model as a list
    parameters = list(model.parameters())

    # The total number of layers is the length of the parameters list
    number_of_layers = len(parameters)
    print(f"Total number of layers is {number_of_layers}")

    # Calculate the number of pretrained layers (all except the last 'layers_of_classifier')
    pretrained_layers = number_of_layers - layers_of_classifier
    print(f"Number of pretrained base layers is {pretrained_layers}")

    # Freeze the parameters of the pretrained base layers
    for param in parameters[:-layers_of_classifier]:
        param.requires_grad = False

    # Unfreeze the parameters of the last 'layers_of_classifier' layers
    for param in parameters[-layers_of_classifier:]:
        param.requires_grad = True
    return model



Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_large-8738ca79.pth
100%|██████████| 21.1M/21.1M [00:00<00:00, 139MB/s] 


In [6]:
# Number of layers you want to fine-tune
layers_of_classifier = len(model.classifier)

# Get all the parameters from the model as a list
parameters = list(model.parameters())

# The total number of layers is the length of the parameters list
number_of_layers = len(parameters)
print(f"Total number of layers is {number_of_layers}")

# Calculate the number of pretrained layers (all except the last 'layers_of_classifier')
pretrained_layers = number_of_layers - layers_of_classifier
print(f"Number of pretrained base layers is {pretrained_layers}")

# Freeze the parameters of the pretrained base layers
for param in parameters[:-layers_of_classifier]:
    param.requires_grad = False

# Unfreeze the parameters of the last 'layers_of_classifier' layers
for param in parameters[-layers_of_classifier:]:
    param.requires_grad = True

Total number of layers is 174
Number of pretrained base layers is 170


In [7]:
def check_requires_grad():
  for name, param in model.named_parameters(): # Just to check
    if name.startswith('classifier'):
        print(f'Layer {name} - requires_grad: {param.requires_grad}')
    if name.startswith('features'):
        print(f'Layer {name} - requires_grad: {param.requires_grad}')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# check_requires_grad()

In [8]:
# from datetime import datetime
# def bb_intersection_over_union(boxA, boxB): # https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
# 	# determine the (x, y)-coordinates of the intersection rectangle
# 	xA = max(boxA[0], boxB[0])
# 	yA = max(boxA[1], boxB[1])
# 	xB = min(boxA[2], boxB[2])
# 	yB = min(boxA[3], boxB[3])
# 	# compute the area of intersection rectangle
# 	interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
# 	# compute the area of both the prediction and ground-truth
# 	# rectangles
# 	boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
# 	boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
# 	# compute the intersection over union by taking the intersection
# 	# area and dividing it by the sum of prediction + ground-truth
# 	# areas - the interesection area
# 	iou = interArea / float(boxAArea + boxBArea - interArea)
# 	# return the intersection over union value
# 	return iou

# def calculate_true_positives_in_batch(y_pred, y_batch):
#   threshold = 0.5
#   true_positive_amount = 0
#   for i in range(y_pred.shape[0]):
#     y_pred_numpy = y_pred[i].cpu().detach().numpy()
#     y_batch_numpy = y_batch[i].cpu().detach().numpy()
#     iou = bb_intersection_over_union(y_pred_numpy, y_batch_numpy)
#     if iou > threshold:
#       true_positive_amount +=1
#   return true_positive_amount

# #wrapper of tqdm to enable toggling it off easily
# def toggle_tqdm(iterable, use_tqdm=True):
#   if use_tqdm:
#     return tqdm(iterable)
#   else:
#     return iterable

# def calculate_loss_and_accuracy(loader):
#   total_loss = 0
#   total_true_positives = 0
#   total_samples = 0

#   for X_batch_, y_batch, img_name in loader:
#       X_batch = X_batch_.permute(0,3,1,2).to(device)
#       y_batch = y_batch.to(device)
#       y_pred = model(X_batch)

#       total_true_positives += calculate_true_positives_in_batch(y_pred, y_batch)

#       loss = loss_fn(y_pred, y_batch)
#       total_loss += loss.item() * len(y_batch)  # Accumulate scaled loss
#       total_samples += len(y_batch)  # Accumulate number of samples
#   sample_loss = total_loss / total_samples  # Calculate average loss
#   accuracy = total_true_positives / total_samples
#   return sample_loss, accuracy

# def save_model(model):
#   # Get the current time
#   current_time = datetime.now()

#   # Format the time in a 'YearMonthDay_HourMinuteSecond' format for the filename
#   timestamp = current_time.strftime('%Y%m%d_%H%M')
#   model_path = f'/content/gdrive/MyDrive/license_plate/models/model_weights_{timestamp}.pth'
#   torch.save(model.state_dict(), model_path)

# def load_model(path):
#   model.load_state_dict(torch.load(path))


In [9]:
def save_model(model):
  # Get the current time
  current_time = datetime.now()

  # Format the time in a 'YearMonthDay_HourMinuteSecond' format for the filename
  timestamp = current_time.strftime('%Y%m%d_%H%M')
  model_path = f'/content/gdrive/MyDrive/license_plate/models/model_weights_{timestamp}.pth'
  torch.save(model.state_dict(), model_path)

def load_model(path):
  model.load_state_dict(torch.load(path))

In [10]:
def bb_intersection_over_union(boxA, boxB):
    # Compute the intersection over union on GPU
    # Assumes boxA and boxB are tensors
    xA = torch.max(boxA[..., 0], boxB[..., 0])
    yA = torch.max(boxA[..., 1], boxB[..., 1])
    xB = torch.min(boxA[..., 2], boxB[..., 2])
    yB = torch.min(boxA[..., 3], boxB[..., 3])

    interArea = torch.clamp(xB - xA, min=0) * torch.clamp(yB - yA, min=0)

    boxAArea = (boxA[..., 2] - boxA[..., 0]) * (boxA[..., 3] - boxA[..., 1])
    boxBArea = (boxB[..., 2] - boxB[..., 0]) * (boxB[..., 3] - boxB[..., 1])

    iou = interArea / (boxAArea + boxBArea - interArea)
    return iou

def calculate_true_positives_in_batch(y_pred, y_batch, threshold=0.5):
    # Calculate IoU for the batch
    iou = bb_intersection_over_union(y_pred, y_batch)

    # Count true positives in the batch
    true_positives = torch.sum(iou > threshold).item()
    return true_positives

def calculate_loss_and_accuracy(loader, model, loss_fn, device):
    model.eval()  # Set model to evaluation mode
    total_loss = 0
    total_true_positives = 0
    total_samples = 0

    with torch.no_grad():  # Disable gradient computation
        for X_batch, y_batch, _ in loader:
            X_batch = X_batch.permute(0,3,1,2).to(device)
            y_batch = y_batch.to(device)
            y_pred = model(X_batch)

            total_true_positives += calculate_true_positives_in_batch(y_pred, y_batch)

            loss = loss_fn(y_pred, y_batch)
            total_loss += loss.item() * X_batch.size(0)  # Accumulate scaled loss
            total_samples += X_batch.size(0)  # Accumulate number of samples

    sample_loss = total_loss / total_samples  # Calculate average loss
    accuracy = total_true_positives / total_samples  # Calculate accuracy
    return sample_loss, accuracy


In [11]:
# load_model('/content/gdrive/MyDrive/license_plate/models/model_weights_20231106_1410.pth')


In [12]:
import torch
from torch.utils.data import DataLoader, Subset
from datetime import datetime
from tqdm import tqdm

# Constants
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DATA_PATH = [data_path]
TEST_PATH = [test_path]
BATCH_SIZE = 2
VAL_BATCH_SIZE = 2
SUBSET_INDICES = 16
SUBSET_VAL_INDICES = 16
DECODER_EPOCHS = 50
N_EPOCHS = 100
USE_TQDM = True
USE_SUBSET = True
PRINT_EVERY = 5

# Define model, loss function, optimizers, and schedulers
model.to(DEVICE)
loss_fn = nn.MSELoss()
optimizer_decoder = optim.AdamW(model.parameters(), lr=1e-5)
# scheduler_decoder = optim.lr_scheduler.OneCycleLR(optimizer_decoder, 1e-2, total_steps=DECODER_EPOCHS)
scheduler_decoder = optim.lr_scheduler.CyclicLR(optimizer_decoder, base_lr=1e-5, max_lr=1e-3, cycle_momentum=False, step_size_up=DECODER_EPOCHS/6)
optimizer_full = optim.AdamW(model.parameters(), lr=5e-6)
scheduler_full = optim.lr_scheduler.CyclicLR(optimizer_decoder, base_lr=1e-5, max_lr=1e-4, cycle_momentum=False, step_size_up=N_EPOCHS/6)

# Define dataloaders
train_dataset = LabelFpsDataLoader(DATA_PATH, (500, 1000))
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_dataset = LabelFpsDataLoader(TEST_PATH, (500, 1000))
validation_loader = DataLoader(val_dataset, batch_size=VAL_BATCH_SIZE, shuffle=True, num_workers=2)

# Use subsets for faster iterations during development
if USE_SUBSET:
    train_dataset = Subset(train_dataset, indices=range(SUBSET_INDICES))
    val_dataset = Subset(val_dataset, indices=range(SUBSET_VAL_INDICES))
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
    validation_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

# Training functions
def train_epoch(dataloader, optimizer, model, loss_fn, scaler):
    for X_batch, y_batch, _ in dataloader:
        optimizer.zero_grad()
        X_batch = X_batch.permute(0, 3, 1, 2).to(DEVICE)
        y_batch = y_batch.to(DEVICE)
        with torch.cuda.amp.autocast():
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

def train_model(n_epochs, optimizer, scheduler, dataloader, val_loader, model, loss_fn):
    train_loss_history = []
    val_loss_history = []
    train_accuracy_history = []
    val_accuracy_history = []
    scaler = torch.cuda.amp.GradScaler()

    for epoch in tqdm(range(n_epochs)):
        model.train()
        # print(f"epoch: {epoch}")
        train_epoch(dataloader, optimizer, model, loss_fn, scaler)
        scheduler.step()

        if epoch % PRINT_EVERY == 0:
            model.eval()
            with torch.no_grad():
                # In-sample
                in_sample_loss, in_sample_accuracy = calculate_loss_and_accuracy(dataloader, model, loss_fn, device)

                train_loss_history.append(in_sample_loss)
                train_accuracy_history.append(in_sample_accuracy)

                # Out-sample
                out_sample_loss, out_sample_accuracy = calculate_loss_and_accuracy(val_loader, model, loss_fn, device)
                val_loss_history.append(out_sample_loss)
                val_accuracy_history.append(out_sample_accuracy)

                if USE_TQDM:
                    tqdm.write(f'Epoch {epoch + 1}, In-sample Loss: {in_sample_loss:.4f}, In-sample Accuracy: {in_sample_accuracy:.4f}')
                    tqdm.write(f'Epoch {epoch + 1}, Out-sample Loss: {out_sample_loss:.4f}, Out-sample Accuracy: {out_sample_accuracy:.4f}')

    return train_loss_history, val_loss_history, train_accuracy_history, val_accuracy_history





In [13]:
# Training and validation functions are assumed to be defined above this script

# Train the model with the decoder first
# print("Starting training with decoder...")
#  decoder_loss_history, decoder_val_loss_history, decoder_test_accuracy_history, decoder_val_accuracy_history = train_model(
#     n_epochs=DECODER_EPOCHS,
#     optimizer=optimizer_decoder,
#     scheduler=scheduler_decoder,
#     dataloader=train_loader,
#     val_loader=validation_loader,
#     model=model,
#     loss_fn=loss_fn
# )
# save_model(model)
# # print("Decoder training completed.")

# # Unfreeze model layers for full model training
# for param in model.parameters():
#     param.requires_grad = True

# # Now train the full model
# print("Starting full model training...")
# full_loss_history, full_val_loss_history, full_test_accuracy_history, full_val_accuracy_history = train_model(
#     n_epochs=N_EPOCHS,
#     optimizer=optimizer_full,
#     scheduler=scheduler_full,
#     dataloader=train_loader,  # Assuming train_loader_finetuning is the same as train_loader
#     val_loader=validation_loader,
#     model=model,
#     loss_fn=loss_fn
# )
# save_model(model)
# print("Full model training completed.")


In [14]:
lr_to_test = [(1e-4, 1e-2),(1e-4, 1e-1),(1e-3, 1e-2),(1e-3,1)]
decoder_loss_result = []
full_loss_result = []
DECODER_EPOCHS = 80
N_EPOCHS = 80

for lr_base, lr_max in lr_to_test:
  model = get_new_model()
  model.to(DEVICE)
  optimizer_decoder = optim.AdamW(model.parameters(), lr=lr_base)
  # scheduler_decoder = optim.lr_scheduler.OneCycleLR(optimizer_decoder, 1e-2, total_steps=DECODER_EPOCHS)
  scheduler_decoder = optim.lr_scheduler.CyclicLR(optimizer_decoder, base_lr=lr_base, max_lr=lr_max, cycle_momentum=False, step_size_up=DECODER_EPOCHS/6)
  optimizer_full = optim.AdamW(model.parameters(), lr=lr_base)
  scheduler_full = optim.lr_scheduler.CyclicLR(optimizer_decoder, base_lr=lr_base*1e-1, max_lr=lr_max*1e-1, cycle_momentum=False, step_size_up=N_EPOCHS/6)
  decoder_loss_history, decoder_val_loss_history, decoder_test_accuracy_history, decoder_val_accuracy_history = train_model(
    n_epochs=DECODER_EPOCHS,
    optimizer=optimizer_decoder,
    scheduler=scheduler_decoder,
    dataloader=train_loader,
    val_loader=validation_loader,
    model=model,
    loss_fn=loss_fn
  )
  decoder_loss_result.append(decoder_loss_history)



  # Unfreeze model layers for full model training
  for param in model.parameters():
    param.requires_grad = True



  # Now train the full model
  print("Starting full model training...")
  full_loss_history, full_val_loss_history, full_test_accuracy_history, full_val_accuracy_history = train_model(
      n_epochs=N_EPOCHS,
      optimizer=optimizer_full,
      scheduler=scheduler_full,
      dataloader=train_loader,  # Assuming train_loader_finetuning is the same as train_loader
      val_loader=validation_loader,
      model=model,
      loss_fn=loss_fn
  )
  full_loss_result.append(full_loss_history)
  print("Full model training completed.")
plot_loss(lr_to_test, decoder_loss_result, full_loss_result)


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Total number of layers is 174
Number of pretrained base layers is 170


  1%|▏         | 1/80 [00:12<15:51, 12.05s/it]

Epoch 1, In-sample Loss: 0.0745, In-sample Accuracy: 0.0000
Epoch 1, Out-sample Loss: 0.0546, Out-sample Accuracy: 0.0000


  8%|▊         | 6/80 [00:16<02:03,  1.67s/it]

Epoch 6, In-sample Loss: 0.0385, In-sample Accuracy: 0.0000
Epoch 6, Out-sample Loss: 0.0341, Out-sample Accuracy: 0.0000


 14%|█▍        | 11/80 [00:21<01:47,  1.55s/it]

Epoch 11, In-sample Loss: 0.0298, In-sample Accuracy: 0.0000
Epoch 11, Out-sample Loss: 0.0358, Out-sample Accuracy: 0.0000


 20%|██        | 16/80 [00:27<01:22,  1.30s/it]

Epoch 16, In-sample Loss: 0.0244, In-sample Accuracy: 0.0000
Epoch 16, Out-sample Loss: 0.0334, Out-sample Accuracy: 0.0000


 26%|██▋       | 21/80 [00:31<01:03,  1.07s/it]

Epoch 21, In-sample Loss: 0.0078, In-sample Accuracy: 0.1875
Epoch 21, Out-sample Loss: 0.0153, Out-sample Accuracy: 0.0000


 32%|███▎      | 26/80 [00:36<00:59,  1.10s/it]

Epoch 26, In-sample Loss: 0.0097, In-sample Accuracy: 0.1250
Epoch 26, Out-sample Loss: 0.0144, Out-sample Accuracy: 0.0000


 39%|███▉      | 31/80 [00:43<01:14,  1.51s/it]

Epoch 31, In-sample Loss: 0.0058, In-sample Accuracy: 0.1250
Epoch 31, Out-sample Loss: 0.0126, Out-sample Accuracy: 0.0000


 45%|████▌     | 36/80 [00:47<00:49,  1.12s/it]

Epoch 36, In-sample Loss: 0.0118, In-sample Accuracy: 0.0625
Epoch 36, Out-sample Loss: 0.0143, Out-sample Accuracy: 0.0000


 51%|█████▏    | 41/80 [00:51<00:42,  1.08s/it]

Epoch 41, In-sample Loss: 0.0036, In-sample Accuracy: 0.2500
Epoch 41, Out-sample Loss: 0.0117, Out-sample Accuracy: 0.0000


 57%|█████▊    | 46/80 [00:57<00:52,  1.55s/it]

Epoch 46, In-sample Loss: 0.0062, In-sample Accuracy: 0.1875
Epoch 46, Out-sample Loss: 0.0088, Out-sample Accuracy: 0.0000


 64%|██████▍   | 51/80 [01:02<00:34,  1.18s/it]

Epoch 51, In-sample Loss: 0.0055, In-sample Accuracy: 0.0625
Epoch 51, Out-sample Loss: 0.0118, Out-sample Accuracy: 0.0000


 70%|███████   | 56/80 [01:06<00:25,  1.04s/it]

Epoch 56, In-sample Loss: 0.0029, In-sample Accuracy: 0.3125
Epoch 56, Out-sample Loss: 0.0091, Out-sample Accuracy: 0.0000


 76%|███████▋  | 61/80 [01:12<00:25,  1.37s/it]

Epoch 61, In-sample Loss: 0.0033, In-sample Accuracy: 0.0625
Epoch 61, Out-sample Loss: 0.0107, Out-sample Accuracy: 0.0000


 82%|████████▎ | 66/80 [01:18<00:19,  1.36s/it]

Epoch 66, In-sample Loss: 0.0060, In-sample Accuracy: 0.0625
Epoch 66, Out-sample Loss: 0.0132, Out-sample Accuracy: 0.0000


 89%|████████▉ | 71/80 [01:22<00:09,  1.11s/it]

Epoch 71, In-sample Loss: 0.0038, In-sample Accuracy: 0.4375
Epoch 71, Out-sample Loss: 0.0132, Out-sample Accuracy: 0.0000


 95%|█████████▌| 76/80 [01:27<00:04,  1.07s/it]

Epoch 76, In-sample Loss: 0.0027, In-sample Accuracy: 0.4375
Epoch 76, Out-sample Loss: 0.0111, Out-sample Accuracy: 0.0000


100%|██████████| 80/80 [01:31<00:00,  1.14s/it]


Starting full model training...


  1%|▏         | 1/80 [00:03<04:27,  3.38s/it]

Epoch 1, In-sample Loss: 0.0033, In-sample Accuracy: 0.4375
Epoch 1, Out-sample Loss: 0.0096, Out-sample Accuracy: 0.0000


  8%|▊         | 6/80 [00:11<02:34,  2.09s/it]

Epoch 6, In-sample Loss: 0.0023, In-sample Accuracy: 0.3750
Epoch 6, Out-sample Loss: 0.0084, Out-sample Accuracy: 0.0000


 14%|█▍        | 11/80 [00:21<02:24,  2.09s/it]

Epoch 11, In-sample Loss: 0.0019, In-sample Accuracy: 0.4375
Epoch 11, Out-sample Loss: 0.0085, Out-sample Accuracy: 0.0000


 20%|██        | 16/80 [00:26<01:26,  1.35s/it]

Epoch 16, In-sample Loss: 0.0017, In-sample Accuracy: 0.5000
Epoch 16, Out-sample Loss: 0.0073, Out-sample Accuracy: 0.0000


 26%|██▋       | 21/80 [00:33<01:38,  1.67s/it]

Epoch 21, In-sample Loss: 0.0013, In-sample Accuracy: 0.6250
Epoch 21, Out-sample Loss: 0.0070, Out-sample Accuracy: 0.0000


 32%|███▎      | 26/80 [00:39<01:17,  1.44s/it]

Epoch 26, In-sample Loss: 0.0010, In-sample Accuracy: 0.8125
Epoch 26, Out-sample Loss: 0.0068, Out-sample Accuracy: 0.0000


 39%|███▉      | 31/80 [00:45<01:01,  1.26s/it]

Epoch 31, In-sample Loss: 0.0009, In-sample Accuracy: 0.6875
Epoch 31, Out-sample Loss: 0.0063, Out-sample Accuracy: 0.0000


 45%|████▌     | 36/80 [00:52<01:19,  1.80s/it]

Epoch 36, In-sample Loss: 0.0007, In-sample Accuracy: 0.7500
Epoch 36, Out-sample Loss: 0.0062, Out-sample Accuracy: 0.0000


 51%|█████▏    | 41/80 [00:58<00:53,  1.37s/it]

Epoch 41, In-sample Loss: 0.0006, In-sample Accuracy: 0.8125
Epoch 41, Out-sample Loss: 0.0058, Out-sample Accuracy: 0.0000


 57%|█████▊    | 46/80 [01:03<00:42,  1.25s/it]

Epoch 46, In-sample Loss: 0.0007, In-sample Accuracy: 0.7500
Epoch 46, Out-sample Loss: 0.0061, Out-sample Accuracy: 0.0000


 64%|██████▍   | 51/80 [01:11<00:51,  1.78s/it]

Epoch 51, In-sample Loss: 0.0010, In-sample Accuracy: 0.6875
Epoch 51, Out-sample Loss: 0.0062, Out-sample Accuracy: 0.0000


 70%|███████   | 56/80 [01:16<00:31,  1.33s/it]

Epoch 56, In-sample Loss: 0.0006, In-sample Accuracy: 0.7500
Epoch 56, Out-sample Loss: 0.0059, Out-sample Accuracy: 0.0000


 76%|███████▋  | 61/80 [01:21<00:26,  1.37s/it]

Epoch 61, In-sample Loss: 0.0008, In-sample Accuracy: 0.6875
Epoch 61, Out-sample Loss: 0.0054, Out-sample Accuracy: 0.0000


 82%|████████▎ | 66/80 [01:29<00:22,  1.59s/it]

Epoch 66, In-sample Loss: 0.0005, In-sample Accuracy: 0.7500
Epoch 66, Out-sample Loss: 0.0059, Out-sample Accuracy: 0.0000


 89%|████████▉ | 71/80 [01:34<00:11,  1.28s/it]

Epoch 71, In-sample Loss: 0.0008, In-sample Accuracy: 0.6875
Epoch 71, Out-sample Loss: 0.0064, Out-sample Accuracy: 0.0000


 95%|█████████▌| 76/80 [01:41<00:06,  1.62s/it]

Epoch 76, In-sample Loss: 0.0008, In-sample Accuracy: 0.6250
Epoch 76, Out-sample Loss: 0.0061, Out-sample Accuracy: 0.0000


100%|██████████| 80/80 [01:45<00:00,  1.32s/it]
Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Full model training completed.
Total number of layers is 174
Number of pretrained base layers is 170


  1%|▏         | 1/80 [00:02<02:54,  2.21s/it]

Epoch 1, In-sample Loss: 0.0812, In-sample Accuracy: 0.0000
Epoch 1, Out-sample Loss: 0.0839, Out-sample Accuracy: 0.0000


  8%|▊         | 6/80 [00:06<01:27,  1.18s/it]

Epoch 6, In-sample Loss: 0.0368, In-sample Accuracy: 0.0000
Epoch 6, Out-sample Loss: 0.0435, Out-sample Accuracy: 0.0000


 14%|█▍        | 11/80 [00:14<02:15,  1.97s/it]

Epoch 11, In-sample Loss: 0.0041, In-sample Accuracy: 0.1250
Epoch 11, Out-sample Loss: 0.0089, Out-sample Accuracy: 0.0000


 20%|██        | 16/80 [00:19<01:22,  1.29s/it]

Epoch 16, In-sample Loss: 0.0028, In-sample Accuracy: 0.3125
Epoch 16, Out-sample Loss: 0.0095, Out-sample Accuracy: 0.0000


 26%|██▋       | 21/80 [00:23<01:06,  1.12s/it]

Epoch 21, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 21, Out-sample Loss: 0.0092, Out-sample Accuracy: 0.0000


 32%|███▎      | 26/80 [00:29<01:25,  1.58s/it]

Epoch 26, In-sample Loss: 0.0028, In-sample Accuracy: 0.3125
Epoch 26, Out-sample Loss: 0.0101, Out-sample Accuracy: 0.0000


 39%|███▉      | 31/80 [00:35<01:04,  1.33s/it]

Epoch 31, In-sample Loss: 0.0028, In-sample Accuracy: 0.2500
Epoch 31, Out-sample Loss: 0.0101, Out-sample Accuracy: 0.0000


 45%|████▌     | 36/80 [00:40<00:50,  1.14s/it]

Epoch 36, In-sample Loss: 0.0028, In-sample Accuracy: 0.2500
Epoch 36, Out-sample Loss: 0.0092, Out-sample Accuracy: 0.0000


 51%|█████▏    | 41/80 [00:45<00:57,  1.47s/it]

Epoch 41, In-sample Loss: 0.0030, In-sample Accuracy: 0.1875
Epoch 41, Out-sample Loss: 0.0085, Out-sample Accuracy: 0.0000


 57%|█████▊    | 46/80 [00:51<00:46,  1.38s/it]

Epoch 46, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 46, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


 64%|██████▍   | 51/80 [00:56<00:33,  1.15s/it]

Epoch 51, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 51, Out-sample Loss: 0.0095, Out-sample Accuracy: 0.0000


 70%|███████   | 56/80 [01:01<00:30,  1.28s/it]

Epoch 56, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 56, Out-sample Loss: 0.0103, Out-sample Accuracy: 0.0000


 76%|███████▋  | 61/80 [01:08<00:28,  1.51s/it]

Epoch 61, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 61, Out-sample Loss: 0.0087, Out-sample Accuracy: 0.0000


 82%|████████▎ | 66/80 [01:12<00:16,  1.17s/it]

Epoch 66, In-sample Loss: 0.0029, In-sample Accuracy: 0.3125
Epoch 66, Out-sample Loss: 0.0119, Out-sample Accuracy: 0.0000


 89%|████████▉ | 71/80 [01:17<00:10,  1.14s/it]

Epoch 71, In-sample Loss: 0.0028, In-sample Accuracy: 0.3125
Epoch 71, Out-sample Loss: 0.0108, Out-sample Accuracy: 0.0000


 95%|█████████▌| 76/80 [01:24<00:06,  1.64s/it]

Epoch 76, In-sample Loss: 0.0029, In-sample Accuracy: 0.2500
Epoch 76, Out-sample Loss: 0.0115, Out-sample Accuracy: 0.0000


100%|██████████| 80/80 [01:27<00:00,  1.10s/it]


Starting full model training...


  1%|▏         | 1/80 [00:02<02:51,  2.18s/it]

Epoch 1, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 1, Out-sample Loss: 0.0096, Out-sample Accuracy: 0.0000


  8%|▊         | 6/80 [00:08<01:54,  1.54s/it]

Epoch 6, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 6, Out-sample Loss: 0.0097, Out-sample Accuracy: 0.0000


 14%|█▍        | 11/80 [00:15<01:50,  1.61s/it]

Epoch 11, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 11, Out-sample Loss: 0.0097, Out-sample Accuracy: 0.0000


 20%|██        | 16/80 [00:21<01:25,  1.33s/it]

Epoch 16, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 16, Out-sample Loss: 0.0097, Out-sample Accuracy: 0.0000


 26%|██▋       | 21/80 [00:28<01:47,  1.83s/it]

Epoch 21, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 21, Out-sample Loss: 0.0097, Out-sample Accuracy: 0.0000


 32%|███▎      | 26/80 [00:34<01:18,  1.45s/it]

Epoch 26, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 26, Out-sample Loss: 0.0098, Out-sample Accuracy: 0.0000


 39%|███▉      | 31/80 [00:40<01:05,  1.33s/it]

Epoch 31, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 31, Out-sample Loss: 0.0098, Out-sample Accuracy: 0.0000


 45%|████▌     | 36/80 [00:48<01:20,  1.83s/it]

Epoch 36, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 36, Out-sample Loss: 0.0098, Out-sample Accuracy: 0.0000


 51%|█████▏    | 41/80 [00:53<00:52,  1.36s/it]

Epoch 41, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 41, Out-sample Loss: 0.0098, Out-sample Accuracy: 0.0000


 57%|█████▊    | 46/80 [01:00<00:55,  1.62s/it]

Epoch 46, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 46, Out-sample Loss: 0.0098, Out-sample Accuracy: 0.0000


 64%|██████▍   | 51/80 [01:07<00:43,  1.52s/it]

Epoch 51, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 51, Out-sample Loss: 0.0098, Out-sample Accuracy: 0.0000


 70%|███████   | 56/80 [01:12<00:31,  1.31s/it]

Epoch 56, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 56, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


 76%|███████▋  | 61/80 [01:20<00:36,  1.92s/it]

Epoch 61, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 61, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


 82%|████████▎ | 66/80 [01:26<00:19,  1.41s/it]

Epoch 66, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 66, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


 89%|████████▉ | 71/80 [01:32<00:12,  1.35s/it]

Epoch 71, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 71, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


 95%|█████████▌| 76/80 [01:40<00:06,  1.69s/it]

Epoch 76, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 76, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


100%|██████████| 80/80 [01:43<00:00,  1.29s/it]
Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Full model training completed.
Total number of layers is 174
Number of pretrained base layers is 170


  1%|▏         | 1/80 [00:01<02:33,  1.94s/it]

Epoch 1, In-sample Loss: 0.0489, In-sample Accuracy: 0.0000
Epoch 1, Out-sample Loss: 0.0492, Out-sample Accuracy: 0.0000


  8%|▊         | 6/80 [00:07<01:55,  1.56s/it]

Epoch 6, In-sample Loss: 0.0390, In-sample Accuracy: 0.0000
Epoch 6, Out-sample Loss: 0.0554, Out-sample Accuracy: 0.0000


 14%|█▍        | 11/80 [00:14<01:38,  1.42s/it]

Epoch 11, In-sample Loss: 0.0356, In-sample Accuracy: 0.0000
Epoch 11, Out-sample Loss: 0.0411, Out-sample Accuracy: 0.0000


 20%|██        | 16/80 [00:18<01:15,  1.17s/it]

Epoch 16, In-sample Loss: 0.0218, In-sample Accuracy: 0.0000
Epoch 16, Out-sample Loss: 0.0291, Out-sample Accuracy: 0.0000


 26%|██▋       | 21/80 [00:24<01:23,  1.41s/it]

Epoch 21, In-sample Loss: 0.0125, In-sample Accuracy: 0.0625
Epoch 21, Out-sample Loss: 0.0184, Out-sample Accuracy: 0.0000


 32%|███▎      | 26/80 [00:31<01:22,  1.53s/it]

Epoch 26, In-sample Loss: 0.0114, In-sample Accuracy: 0.0625
Epoch 26, Out-sample Loss: 0.0163, Out-sample Accuracy: 0.0000


 39%|███▉      | 31/80 [00:35<00:58,  1.19s/it]

Epoch 31, In-sample Loss: 0.0080, In-sample Accuracy: 0.1250
Epoch 31, Out-sample Loss: 0.0111, Out-sample Accuracy: 0.0000


 45%|████▌     | 36/80 [00:40<00:53,  1.23s/it]

Epoch 36, In-sample Loss: 0.0052, In-sample Accuracy: 0.0000
Epoch 36, Out-sample Loss: 0.0105, Out-sample Accuracy: 0.0000


 51%|█████▏    | 41/80 [00:47<01:00,  1.55s/it]

Epoch 41, In-sample Loss: 0.0027, In-sample Accuracy: 0.4375
Epoch 41, Out-sample Loss: 0.0098, Out-sample Accuracy: 0.0000


 57%|█████▊    | 46/80 [00:52<00:40,  1.20s/it]

Epoch 46, In-sample Loss: 0.0066, In-sample Accuracy: 0.0625
Epoch 46, Out-sample Loss: 0.0082, Out-sample Accuracy: 0.0000


 64%|██████▍   | 51/80 [00:56<00:33,  1.17s/it]

Epoch 51, In-sample Loss: 0.0081, In-sample Accuracy: 0.1250
Epoch 51, Out-sample Loss: 0.0120, Out-sample Accuracy: 0.0000


 70%|███████   | 56/80 [01:04<00:39,  1.63s/it]

Epoch 56, In-sample Loss: 0.0040, In-sample Accuracy: 0.3750
Epoch 56, Out-sample Loss: 0.0070, Out-sample Accuracy: 0.0000


 76%|███████▋  | 61/80 [01:08<00:23,  1.21s/it]

Epoch 61, In-sample Loss: 0.0037, In-sample Accuracy: 0.3125
Epoch 61, Out-sample Loss: 0.0078, Out-sample Accuracy: 0.0000


 82%|████████▎ | 66/80 [01:13<00:16,  1.17s/it]

Epoch 66, In-sample Loss: 0.0047, In-sample Accuracy: 0.1250
Epoch 66, Out-sample Loss: 0.0190, Out-sample Accuracy: 0.0000


 89%|████████▉ | 71/80 [01:21<00:15,  1.70s/it]

Epoch 71, In-sample Loss: 0.0062, In-sample Accuracy: 0.3125
Epoch 71, Out-sample Loss: 0.0102, Out-sample Accuracy: 0.0000


 95%|█████████▌| 76/80 [01:25<00:04,  1.25s/it]

Epoch 76, In-sample Loss: 0.0070, In-sample Accuracy: 0.0625
Epoch 76, Out-sample Loss: 0.0101, Out-sample Accuracy: 0.0000


100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


Starting full model training...


  1%|▏         | 1/80 [00:02<02:46,  2.11s/it]

Epoch 1, In-sample Loss: 0.0846, In-sample Accuracy: 0.1250
Epoch 1, Out-sample Loss: 0.0143, Out-sample Accuracy: 0.0000


  8%|▊         | 6/80 [00:10<02:22,  1.93s/it]

Epoch 6, In-sample Loss: 0.0282, In-sample Accuracy: 0.0000
Epoch 6, Out-sample Loss: 0.0480, Out-sample Accuracy: 0.0000


 14%|█▍        | 11/80 [00:15<01:35,  1.39s/it]

Epoch 11, In-sample Loss: 0.0149, In-sample Accuracy: 0.0000
Epoch 11, Out-sample Loss: 0.0189, Out-sample Accuracy: 0.0000


 20%|██        | 16/80 [00:22<01:49,  1.71s/it]

Epoch 16, In-sample Loss: 0.0082, In-sample Accuracy: 0.0000
Epoch 16, Out-sample Loss: 0.0134, Out-sample Accuracy: 0.0000


 26%|██▋       | 21/80 [00:29<01:30,  1.54s/it]

Epoch 21, In-sample Loss: 0.0035, In-sample Accuracy: 0.3750
Epoch 21, Out-sample Loss: 0.0089, Out-sample Accuracy: 0.0000


 32%|███▎      | 26/80 [00:35<01:11,  1.33s/it]

Epoch 26, In-sample Loss: 0.0017, In-sample Accuracy: 0.2500
Epoch 26, Out-sample Loss: 0.0056, Out-sample Accuracy: 0.0000


 39%|███▉      | 31/80 [00:43<01:35,  1.94s/it]

Epoch 31, In-sample Loss: 0.0012, In-sample Accuracy: 0.4375
Epoch 31, Out-sample Loss: 0.0053, Out-sample Accuracy: 0.0000


 45%|████▌     | 36/80 [00:49<01:04,  1.47s/it]

Epoch 36, In-sample Loss: 0.0006, In-sample Accuracy: 0.7500
Epoch 36, Out-sample Loss: 0.0056, Out-sample Accuracy: 0.0000


 51%|█████▏    | 41/80 [01:00<01:34,  2.43s/it]

Epoch 41, In-sample Loss: 0.0003, In-sample Accuracy: 0.9375
Epoch 41, Out-sample Loss: 0.0054, Out-sample Accuracy: 0.0000


 57%|█████▊    | 46/80 [01:05<00:51,  1.50s/it]

Epoch 46, In-sample Loss: 0.0005, In-sample Accuracy: 0.5625
Epoch 46, Out-sample Loss: 0.0059, Out-sample Accuracy: 0.0000


 64%|██████▍   | 51/80 [01:12<00:43,  1.50s/it]

Epoch 51, In-sample Loss: 0.0006, In-sample Accuracy: 0.6250
Epoch 51, Out-sample Loss: 0.0050, Out-sample Accuracy: 0.0000


 70%|███████   | 56/80 [01:19<00:38,  1.62s/it]

Epoch 56, In-sample Loss: 0.0007, In-sample Accuracy: 0.6875
Epoch 56, Out-sample Loss: 0.0051, Out-sample Accuracy: 0.0000


 76%|███████▋  | 61/80 [01:25<00:25,  1.36s/it]

Epoch 61, In-sample Loss: 0.0005, In-sample Accuracy: 0.7500
Epoch 61, Out-sample Loss: 0.0061, Out-sample Accuracy: 0.0000


 82%|████████▎ | 66/80 [01:33<00:26,  1.89s/it]

Epoch 66, In-sample Loss: 0.0007, In-sample Accuracy: 0.8125
Epoch 66, Out-sample Loss: 0.0049, Out-sample Accuracy: 0.0000


 89%|████████▉ | 71/80 [01:39<00:13,  1.44s/it]

Epoch 71, In-sample Loss: 0.0010, In-sample Accuracy: 0.6250
Epoch 71, Out-sample Loss: 0.0065, Out-sample Accuracy: 0.0000


 95%|█████████▌| 76/80 [01:44<00:05,  1.34s/it]

Epoch 76, In-sample Loss: 0.0009, In-sample Accuracy: 0.5625
Epoch 76, Out-sample Loss: 0.0074, Out-sample Accuracy: 0.0000


100%|██████████| 80/80 [01:50<00:00,  1.38s/it]
Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Full model training completed.
Total number of layers is 174
Number of pretrained base layers is 170


  1%|▏         | 1/80 [00:02<03:01,  2.30s/it]

Epoch 1, In-sample Loss: 0.0727, In-sample Accuracy: 0.0000
Epoch 1, Out-sample Loss: 0.0650, Out-sample Accuracy: 0.0000


  8%|▊         | 6/80 [00:07<01:30,  1.22s/it]

Epoch 6, In-sample Loss: 0.4625, In-sample Accuracy: 0.0000
Epoch 6, Out-sample Loss: 0.4314, Out-sample Accuracy: 0.0000


 14%|█▍        | 11/80 [00:12<01:34,  1.37s/it]

Epoch 11, In-sample Loss: 0.0062, In-sample Accuracy: 0.0000
Epoch 11, Out-sample Loss: 0.0175, Out-sample Accuracy: 0.0000


 20%|██        | 16/80 [00:19<01:37,  1.53s/it]

Epoch 16, In-sample Loss: 0.0030, In-sample Accuracy: 0.1875
Epoch 16, Out-sample Loss: 0.0090, Out-sample Accuracy: 0.0000


 26%|██▋       | 21/80 [00:24<01:12,  1.22s/it]

Epoch 21, In-sample Loss: 0.0028, In-sample Accuracy: 0.3125
Epoch 21, Out-sample Loss: 0.0109, Out-sample Accuracy: 0.0000


 32%|███▎      | 26/80 [00:29<01:12,  1.34s/it]

Epoch 26, In-sample Loss: 0.0028, In-sample Accuracy: 0.2500
Epoch 26, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 39%|███▉      | 31/80 [00:36<01:22,  1.68s/it]

Epoch 31, In-sample Loss: 0.0028, In-sample Accuracy: 0.3125
Epoch 31, Out-sample Loss: 0.0101, Out-sample Accuracy: 0.0000


 45%|████▌     | 36/80 [00:41<00:54,  1.23s/it]

Epoch 36, In-sample Loss: 0.0030, In-sample Accuracy: 0.1250
Epoch 36, Out-sample Loss: 0.0125, Out-sample Accuracy: 0.0000


 51%|█████▏    | 41/80 [00:46<00:48,  1.25s/it]

Epoch 41, In-sample Loss: 0.0032, In-sample Accuracy: 0.3125
Epoch 41, Out-sample Loss: 0.0110, Out-sample Accuracy: 0.0000


 57%|█████▊    | 46/80 [00:54<00:57,  1.69s/it]

Epoch 46, In-sample Loss: 0.0035, In-sample Accuracy: 0.1875
Epoch 46, Out-sample Loss: 0.0095, Out-sample Accuracy: 0.0000


 64%|██████▍   | 51/80 [00:58<00:35,  1.23s/it]

Epoch 51, In-sample Loss: 0.0029, In-sample Accuracy: 0.2500
Epoch 51, Out-sample Loss: 0.0108, Out-sample Accuracy: 0.0000


 70%|███████   | 56/80 [01:04<00:30,  1.28s/it]

Epoch 56, In-sample Loss: 0.0028, In-sample Accuracy: 0.2500
Epoch 56, Out-sample Loss: 0.0102, Out-sample Accuracy: 0.0000


 76%|███████▋  | 61/80 [01:11<00:30,  1.61s/it]

Epoch 61, In-sample Loss: 0.0029, In-sample Accuracy: 0.2500
Epoch 61, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 82%|████████▎ | 66/80 [01:16<00:17,  1.22s/it]

Epoch 66, In-sample Loss: 0.0047, In-sample Accuracy: 0.1250
Epoch 66, Out-sample Loss: 0.0175, Out-sample Accuracy: 0.0000


 89%|████████▉ | 71/80 [01:21<00:12,  1.35s/it]

Epoch 71, In-sample Loss: 0.0037, In-sample Accuracy: 0.1875
Epoch 71, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


 95%|█████████▌| 76/80 [01:28<00:06,  1.56s/it]

Epoch 76, In-sample Loss: 0.0031, In-sample Accuracy: 0.3125
Epoch 76, Out-sample Loss: 0.0097, Out-sample Accuracy: 0.0000


100%|██████████| 80/80 [01:31<00:00,  1.14s/it]


Starting full model training...


  1%|▏         | 1/80 [00:02<02:50,  2.16s/it]

Epoch 1, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 1, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


  8%|▊         | 6/80 [00:08<02:09,  1.75s/it]

Epoch 6, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 6, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 14%|█▍        | 11/80 [00:15<01:47,  1.55s/it]

Epoch 11, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 11, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 20%|██        | 16/80 [00:21<01:26,  1.36s/it]

Epoch 16, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 16, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 26%|██▋       | 21/80 [00:29<01:55,  1.96s/it]

Epoch 21, In-sample Loss: 0.0028, In-sample Accuracy: 0.2500
Epoch 21, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


 32%|███▎      | 26/80 [00:35<01:17,  1.44s/it]

Epoch 26, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 26, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 39%|███▉      | 31/80 [00:41<01:17,  1.59s/it]

Epoch 31, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 31, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 45%|████▌     | 36/80 [00:49<01:10,  1.61s/it]

Epoch 36, In-sample Loss: 0.0028, In-sample Accuracy: 0.2500
Epoch 36, Out-sample Loss: 0.0101, Out-sample Accuracy: 0.0000


 51%|█████▏    | 41/80 [00:55<00:53,  1.38s/it]

Epoch 41, In-sample Loss: 0.0028, In-sample Accuracy: 0.2500
Epoch 41, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


 57%|█████▊    | 46/80 [01:03<01:06,  1.95s/it]

Epoch 46, In-sample Loss: 0.0028, In-sample Accuracy: 0.3125
Epoch 46, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 64%|██████▍   | 51/80 [01:09<00:41,  1.45s/it]

Epoch 51, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 51, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


 70%|███████   | 56/80 [01:14<00:33,  1.39s/it]

Epoch 56, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 56, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 76%|███████▋  | 61/80 [01:22<00:29,  1.57s/it]

Epoch 61, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 61, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 82%|████████▎ | 66/80 [01:28<00:19,  1.37s/it]

Epoch 66, In-sample Loss: 0.0028, In-sample Accuracy: 0.1875
Epoch 66, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 89%|████████▉ | 71/80 [01:35<00:16,  1.87s/it]

Epoch 71, In-sample Loss: 0.0028, In-sample Accuracy: 0.2500
Epoch 71, Out-sample Loss: 0.0100, Out-sample Accuracy: 0.0000


 95%|█████████▌| 76/80 [01:41<00:05,  1.41s/it]

Epoch 76, In-sample Loss: 0.0028, In-sample Accuracy: 0.2500
Epoch 76, Out-sample Loss: 0.0099, Out-sample Accuracy: 0.0000


100%|██████████| 80/80 [01:44<00:00,  1.31s/it]

Full model training completed.





NameError: ignored

In [None]:
import matplotlib.pyplot as plt

def plot_loss(values_to_test,decoder_loss_result,full_loss_result, label="n_hidden"):
# Plot decoder loss
  plt.figure(figsize=(12, 6))
  for i, value_tested in enumerate(values_to_test):
      plt.plot(range(1, len(decoder_loss_result[0]) + 1), decoder_loss_result[i], label=f'{label}={value_tested}')

  plt.title('Decoder Training Loss')
  plt.xlabel('Epoch')
  plt.yscale('log')
  plt.ylabel('Loss')
  plt.legend()
  plt.show()

  # Plot full model loss
  plt.figure(figsize=(12, 6))
  for i, value_tested in enumerate(values_to_test):
      plt.plot(range(1, len(full_loss_result[0]) + 1), full_loss_result[i], label=f'Full Model {label}={value_tested}')

  plt.title('Full Model Training Loss')
  plt.xlabel('Epoch')
  plt.yscale('log')
  plt.ylabel('Loss')
  plt.legend()
  plt.show()



In [None]:
#Code to investigate number of neurons in hidden layer
decoder_loss_result = []
full_loss_result = []
DECODER_EPOCHS = 20
N_EPOCHS = 20
n_hidden_to_test = [4,100]
for n_hidden in n_hidden_to_test:
  model = get_new_model(n_hidden = n_hidden)
  model.to(DEVICE)
  optimizer_decoder = optim.AdamW(model.parameters(), lr=1e-4)
  # scheduler_decoder = optim.lr_scheduler.OneCycleLR(optimizer_decoder, 1e-2, total_steps=DECODER_EPOCHS)
  scheduler_decoder = optim.lr_scheduler.CyclicLR(optimizer_decoder, base_lr=1e-4, max_lr=1e-1, cycle_momentum=False, step_size_up=DECODER_EPOCHS/6)
  optimizer_full = optim.AdamW(model.parameters(), lr=1e-4)
  scheduler_full = optim.lr_scheduler.CyclicLR(optimizer_decoder, base_lr=1e-2, max_lr=1e-1, cycle_momentum=False, step_size_up=N_EPOCHS/6)
  decoder_loss_history, decoder_val_loss_history, decoder_test_accuracy_history, decoder_val_accuracy_history = train_model(
    n_epochs=DECODER_EPOCHS,
    optimizer=optimizer_decoder,
    scheduler=scheduler_decoder,
    dataloader=train_loader,
    val_loader=validation_loader,
    model=model,
    loss_fn=loss_fn
  )
  decoder_loss_result.append(decoder_loss_history)



  # Unfreeze model layers for full model training
  for param in model.parameters():
    param.requires_grad = True



  # Now train the full model
  print("Starting full model training...")
  full_loss_history, full_val_loss_history, full_test_accuracy_history, full_val_accuracy_history = train_model(
      n_epochs=N_EPOCHS,
      optimizer=optimizer_full,
      scheduler=scheduler_full,
      dataloader=train_loader,  # Assuming train_loader_finetuning is the same as train_loader
      val_loader=validation_loader,
      model=model,
      loss_fn=loss_fn
  )
  full_loss_result.append(full_loss_history)
  print("Full model training completed.")



In [None]:
plot_loss(n_hidden_to_test, decoder_loss_result, full_loss_result)


In [None]:
def get_n_best_and_worst(model, dataloader, n, device):
    best_losses = []
    model.eval()
    best_idx = []
    worst_losses = []
    worst_idx = []
    loss_fn =loss_fn = nn.MSELoss(reduction='none')
    for X_batch, y_batch,_ in dataloader:
        X_batch = X_batch.permute(0, 3, 1, 2).to(device)  # Needs to have shape [batch_size, channels, height, width]
        y_batch = y_batch.to(device)
        y_pred = model(X_batch)
        losses = loss_fn(y_pred, y_batch)  # Calculate loss for each item in the batch
        for i, loss_val in enumerate(losses):
            # print(loss_val)
            loss_val = loss_val.sum()  # Convert to Python scalar¨
            # Update best losses
            if len(best_losses) < n:
                best_losses.append(loss_val)
                best_idx.append(i)
            elif loss_val < max(best_losses):
                max_idx = best_losses.index(max(best_losses))
                best_losses[max_idx] = loss_val
                best_idx[max_idx] = i

            # Update worst losses
            if len(worst_losses) < n:
                worst_losses.append(loss_val)
                worst_idx.append(i)
            elif loss_val > min(worst_losses):
                min_idx = worst_losses.index(min(worst_losses))
                worst_losses[min_idx] = loss_val
                worst_idx[min_idx] = i

    # Code for plotting the best and worst predictions
    # ...

    return best_losses, best_idx, worst_losses, worst_idx

def plot_list_of_idx(idxs, dataloader):
  for idx in idxs:
    img, true_label = dataloader[idx]
    pred = model(img)
    true_label_scaled_up =
    plot_img_and_boundingbox(image, pred)
    plot_img_and_boundingbox(image, true_label)
  dataloader[]

best_losses, best_idx, worst_losses, worst_idx = get_n_best_and_worst(model, train_loader, 5,'cuda')
print(best_losses)



In [None]:
initial_lr = 0.00001
lr_epochs = 100
optimizer = optim.SGD(model.parameters(), lr=initial_lr)


lr_hist = []
train_loss_history = []

def train_model(n_epochs):
  i = 0
  lr = initial_lr
  for epoch in tqdm(range(n_epochs)):
    # print("epoch:", i)
    for X_batch, y_batch, img_name in train_loader:
      agg_loss = 0
      X_batch = X_batch.permute(0,3,1,2).to(device) # Needs to have shape [batch_size, channels, height, width]
      y_batch = y_batch.to(device)
      y_pred = model(X_batch)
      loss = loss_fn(y_pred,y_batch)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      agg_loss += loss.item()
      train_loss_history.append(agg_loss)

      lr_hist.append(lr)

      lr = 1.1*lr
      for g in optimizer.param_groups:
        g['lr'] = lr
      if lr >= 0.0005:
        break

train_model(3)




In [19]:
#Code to investigate number of neurons in hidden layer
decoder_loss_result = []
full_loss_result = []
loss_hist = []
acc_hist = []
DECODER_EPOCHS = 10
N_EPOCHS_CYCLIC = 10
N_EPOCHS_LINEAR = 10

n_hidden_to_test = [100]
for n_hidden in n_hidden_to_test:
  model = get_new_model(n_hidden = n_hidden)
  model.to(DEVICE)
  # scheduler_decoder = optim.lr_scheduler.OneCycleLR(optimizer_decoder, 1e-2, total_steps=DECODER_EPOCHS)
  optimizer_decoder = optim.AdamW(model.parameters(), lr=1e-4)
  scheduler_decoder = optim.lr_scheduler.CyclicLR(optimizer_decoder, base_lr=1e-4, max_lr=1e-1, cycle_momentum=False, step_size_up=DECODER_EPOCHS/6, verbose=True)
  decoder_loss_history, decoder_val_loss_history, decoder_test_accuracy_history, decoder_val_accuracy_history = train_model(
    n_epochs=DECODER_EPOCHS,
    optimizer=optimizer_decoder,
    scheduler=scheduler_decoder,
    dataloader=train_loader,
    val_loader=validation_loader,
    model=model,
    loss_fn=loss_fn
  )
  decoder_loss_result.append(decoder_loss_history)
  loss_hist.append(decoder_loss_history)
  acc_hist.append(decoder_test_accuracy_history)


  # Unfreeze model layers for full model training
  for param in model.parameters():
    param.requires_grad = True

  # Now train the full cyclic
  optimizer_full = optim.AdamW(model.parameters(), lr=1e-4)
  scheduler_full = optim.lr_scheduler.CyclicLR(optimizer_decoder, base_lr=1e-4, max_lr=1e-2, cycle_momentum=False, step_size_up=N_EPOCHS/6,verbose=True)
  print("Starting full model cyclic training...")
  full_loss_history, full_val_loss_history, full_test_accuracy_history, full_val_accuracy_history = train_model(
      n_epochs=N_EPOCHS_CYCLIC,
      optimizer=optimizer_full,
      scheduler=scheduler_full,
      dataloader=train_loader,  # Assuming train_loader_finetuning is the same as train_loader
      val_loader=validation_loader,
      model=model,
      loss_fn=loss_fn
  )
  loss_hist.append(full_loss_history)
  acc_hist.append(full_test_accuracy_history)

  print("Starting linear training")
  optimizer_linear = optim.AdamW(model.parameters(), lr=1e-2)
  scheduler_linear = optim.lr_scheduler.ReduceLROnPlateau(optimizer_linear, patience=3,verbose=True)
  full_loss_history, full_val_loss_history, full_test_accuracy_history, full_val_accuracy_history = train_model(
      n_epochs=N_EPOCHS_LINEAR,
      optimizer=optimizer_full,
      scheduler=scheduler_linear,
      dataloader=train_loader,  # Assuming train_loader_finetuning is the same as train_loader
      val_loader=validation_loader,
      model=model,
      loss_fn=loss_fn
  )
  loss_hist.append(full_loss_history)
  acc_hist.append(full_test_accuracy_history)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Total number of layers is 174
Number of pretrained base layers is 170
Adjusting learning rate of group 0 to 1.0000e-04.


  0%|          | 0/10 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 6.0040e-02.


 10%|█         | 1/10 [00:02<00:25,  2.78s/it]

Epoch 1, In-sample Loss: 0.1946, In-sample Accuracy: 0.0000
Epoch 1, Out-sample Loss: 0.1848, Out-sample Accuracy: 0.0000


 20%|██        | 2/10 [00:03<00:12,  1.54s/it]

Adjusting learning rate of group 0 to 8.0020e-02.


 30%|███       | 3/10 [00:04<00:08,  1.15s/it]

Adjusting learning rate of group 0 to 2.0080e-02.


 40%|████      | 4/10 [00:04<00:05,  1.05it/s]

Adjusting learning rate of group 0 to 4.0060e-02.


 50%|█████     | 5/10 [00:05<00:04,  1.16it/s]

Adjusting learning rate of group 0 to 1.0000e-01.
Adjusting learning rate of group 0 to 4.0060e-02.


 60%|██████    | 6/10 [00:07<00:05,  1.27s/it]

Epoch 6, In-sample Loss: 0.4230, In-sample Accuracy: 0.0000
Epoch 6, Out-sample Loss: 0.4249, Out-sample Accuracy: 0.0000


 70%|███████   | 7/10 [00:08<00:03,  1.08s/it]

Adjusting learning rate of group 0 to 2.0080e-02.


 80%|████████  | 8/10 [00:08<00:01,  1.05it/s]

Adjusting learning rate of group 0 to 8.0020e-02.


 90%|█████████ | 9/10 [00:09<00:00,  1.03it/s]

Adjusting learning rate of group 0 to 6.0040e-02.


100%|██████████| 10/10 [00:11<00:00,  1.16s/it]


Adjusting learning rate of group 0 to 1.0000e-04.
Adjusting learning rate of group 0 to 1.0000e-04.
Starting full model cyclic training...


  0%|          | 0/10 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 8.4250e-04.


 10%|█         | 1/10 [00:03<00:33,  3.72s/it]

Epoch 1, In-sample Loss: 0.2235, In-sample Accuracy: 0.0000
Epoch 1, Out-sample Loss: 0.2256, Out-sample Accuracy: 0.0000


 20%|██        | 2/10 [00:04<00:17,  2.22s/it]

Adjusting learning rate of group 0 to 1.5850e-03.


 30%|███       | 3/10 [00:05<00:11,  1.60s/it]

Adjusting learning rate of group 0 to 2.3275e-03.


 40%|████      | 4/10 [00:06<00:07,  1.30s/it]

Adjusting learning rate of group 0 to 3.0700e-03.


 50%|█████     | 5/10 [00:07<00:05,  1.14s/it]

Adjusting learning rate of group 0 to 3.8125e-03.
Adjusting learning rate of group 0 to 4.5550e-03.


 60%|██████    | 6/10 [00:09<00:05,  1.47s/it]

Epoch 6, In-sample Loss: 0.2190, In-sample Accuracy: 0.0000
Epoch 6, Out-sample Loss: 0.2227, Out-sample Accuracy: 0.0000


 70%|███████   | 7/10 [00:10<00:03,  1.31s/it]

Adjusting learning rate of group 0 to 5.2975e-03.


 80%|████████  | 8/10 [00:11<00:02,  1.19s/it]

Adjusting learning rate of group 0 to 6.0400e-03.


 90%|█████████ | 9/10 [00:12<00:01,  1.09s/it]

Adjusting learning rate of group 0 to 6.7825e-03.


100%|██████████| 10/10 [00:13<00:00,  1.32s/it]


Adjusting learning rate of group 0 to 7.5250e-03.
Starting linear training


  0%|          | 0/10 [00:00<?, ?it/s]


TypeError: ignored

In [None]:
plt.figure(figsize=(10, 5))
plt.xscale('log')  # Set the y-axis to a logarithmic scale

plt.plot(lr_hist,train_loss_history, label='Train Loss')
plt.xlabel('LR')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()  # Add a legend to differentiate between train and validation loss
plt.show()
print(train_loss_history)
print(lr_hist)

In [None]:
plt.figure(figsize=(10, 5))
#plt.yscale('log')  # Set the y-axis to a logarithmic scale
plt.plot(test_loss_history, label='Train Loss')
plt.plot(val_loss_history, label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()  # Add a legend to differentiate between train and validation loss
plt.show()
print(val_accuracy_history)

In [None]:
#print a trainset with predicted label
# train_loader = DataLoader(data_loader, batch_size=1, shuffle=True, num_workers=1)

train_iter = iter(train_loader)

batch = next(train_iter)

X_batch, y_batch, _ = batch

image = batch[0][0]
labels = batch[1][0]

# Get predicted labels
X_batch = X_batch.permute(0,3,1,2).to(device)
pred = model(X_batch)
print(image.shape)
plot_img_and_boundingbox(image, labels)
plot_img_and_boundingbox(image, pred[0])

In [None]:
# Print a validation image with its predicted labels

validationloader = DataLoader(test_loader, batch_size=1, shuffle=True, num_workers=1)

validationloader_iter = iter(validationloader)

batch = next(validationloader_iter)

X_batch, y_batch, _ = batch
print(X_batch.shape)
print(y_batch.shape)

image = batch[0][0]
labels = batch[1][0]

# Get predicted labels
X_batch = X_batch.permute(0,3,1,2).to(device)
print(X_batch.shape)
pred = model(X_batch)

plot_img_and_boundingbox(image, labels)

In [None]:
plot_img_and_boundingbox(image,pred[0])
print(pred)
print(calculate_true_positives_in_batch(labels.view(1,4),pred[0].view(1,4))) # Need to change view since we dont have batch