In [None]:
import torch
from classifier import Darknet53

def test_output_shape():
    # Create a random tensor with the desired dimensions
    input_tensor = torch.randn(1, 3, 256, 256)  # Assuming input shape is (batch_size, channels, height, width)
    input_tensor_batch = torch.randn(10, 3, 256, 256)

    # Initialize the classifier
    classifier = Darknet53()

    # Pass the input tensor through the classifier
    output = classifier(input_tensor)
    output_batch = classifier(input_tensor_batch)

    # Check if the output tensor has the expected dimensions
    assert output.shape == (1, 1000)  # Replace `num_classes` with the actual number of classes
    assert output_batch.shape == (10, 1000) 

test_output_shape()

Test single and batch inputs. Output shape should have proper dimensions of 1x1000 for the 1000 classes in the ImageNet-1k dataset

In [None]:
# Create a random input tensor
input_tensor = torch.randn(1, 3, 256, 256)  # Assuming input shape is (batch_size, channels, height, width)

# Create a random target tensor
target_tensor = torch.randn(1, 1000)  # Assuming target shape is (batch_size, num_classes)

# Initialize the classifier
classifier = Darknet53()

# Forward pass
output = classifier(input_tensor)

# Calculate the loss
loss = torch.nn.functional.mse_loss(output, target_tensor)

# Backward pass
loss.backward()

# Check if the gradients are updated
for param in classifier.parameters():
    assert param.grad is not None
    

Test to see if dataset objects load Image net traning and validation directories correctly

In [None]:
from imagenet import ImageNetDataset
import matplotlib.pyplot as plt

def transform_image(image):
    # Rescale the image to 256x256 pixels
    image = image.resize((256, 256))
    return image

TRAINING_IMAGE_PATH = "C:\\Repos\\imagenet-1k\\training\\"
TRAINING_ANNOTATION_PATH = "C:\\Repos\\imagenet-1k\\labels\\synsets.txt"

VALIDATION_IMAGE_PATH = "C:\\Repos\\imagenet-1k\\validation\\"
VALIDATION_ANNOTATION_PATH = "C:\\Repos\\imagenet-1k\\labels\\val_labels.txt"

training_dataset = ImageNetDataset(TRAINING_IMAGE_PATH, TRAINING_ANNOTATION_PATH, transform_image)
validation_dataset = ImageNetDataset(VALIDATION_IMAGE_PATH, VALIDATION_ANNOTATION_PATH, transform_image)


plt.imshow(training_dataset[0][0].permute(1, 2, 0).int())
print(f'First image label: {training_dataset[0][1]}')

plt.imshow(validation_dataset[0][0].permute(1, 2, 0).int())
print(f'First image label: {validation_dataset[0][1]}')



testing save weights to file directory

In [None]:
from classifier import Darknet53

WEIGHTS_PATH = '.\classifer.weights'

model = Darknet53(weights_path=WEIGHTS_PATH)

model.save_weights()

testing loading weights from file directory

In [None]:
from classifier import Darknet53

WEIGHTS_PATH = '.\classifer.weights'

model = Darknet53(weights_path=WEIGHTS_PATH)

model.load_weights()

convert validation localization files into direct labels

In [None]:
import pandas as pd

PATH = 'C:\\Repos\\imagenet-1k\\labels\\LOC_val_solution.csv'
LABELS = 'C:\\Repos\\imagenet-1k\\labels\\synsets.txt'

df = pd.read_csv(PATH)

with open(LABELS, 'r') as f:
    labels = f.readlines()

#sort in order
df = df.sort_values('ImageId')

# take first label only, disgard bounding boxs
df["PredictionString"] = df["PredictionString"].apply(lambda x: x.split()[0])
df["PredictionString"] = df["PredictionString"].apply(lambda x: labels.index(x.strip() + '\n'))
df.head(20)

#df['PredictionString'].to_csv('val_labels.csv', index=False, header=False)

Test validation set after 5 epochs

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

from imagenet import ImageNetDataset
from classifier import Darknet53
from utils import scale
import matplotlib.pyplot as plt
from labels import LABELS

VALIDATION_IMAGE_PATH = "C:\\Repos\\imagenet-1k\\validation\\"
VALIDATION_ANNOTATION_PATH = "C:\\Repos\\imagenet-1k\\labels\\val_labels.txt"

WEIGHTS_PATH = '.\classifer.weights'

validation_dataset = ImageNetDataset(VALIDATION_IMAGE_PATH, VALIDATION_ANNOTATION_PATH, scale)
validation = torch.utils.data.DataLoader(validation_dataset, batch_size=128, shuffle=False)

model = Darknet53(weights_path=WEIGHTS_PATH)
model.to('cuda')

# Set the classifier to evaluation mode
model.eval()

# Evaluate the classifier on the test dataset
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in validation:

        images = images.to('cuda')
        labels = labels.to('cuda')

        outputs = model(images)
        _, predicted = torch.topk(outputs.data, k=1, dim=1)

        # Count the number of correct predictions
        correct += (predicted == labels.view(-1, 1)).sum().item()
        total += labels.size(0)

        print(f'Accuracy of the network on the {total} test images: {100 * correct / total}%')

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')

Trained at 256x256 input resolution for 20 epochs with a top-1 accuracy of 65% and top-5 accuracy of 85% on the validation set.

Going to up the input resolution to 416x416 and fine tune for ~10 epochs as suggested by the orginal authors of the yolov3 paper. 

Had to reduce batch size to 16 so the full batch can be stored/processed by VRAM

In [None]:
from classifier import Darknet53

WEIGHTS_PATH = '.\classifer.weights'

model = Darknet53(weights_path=WEIGHTS_PATH)

print("hi")