In [1]:
import cv2
import numpy as np
import pandas as pd
import os

# The function below dumps all the images from the parquet file into the filesystem.
def dump_images(df, output_dir='output_images'):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for i, row in df.iterrows():
        # Decode the image from the buffer
        image = cv2.imdecode(np.frombuffer(row['image'], np.uint8), cv2.IMREAD_COLOR)

        # Construct the file name, assuming you have some identifier in the row (e.g., 'filename' column)
        image_name = row['filename'] if 'filename' in row else f'image_{i}.jpg'
        image_path = os.path.join(output_dir, image_name)

        # Save the image to the filesystem
        cv2.imwrite(image_path, image)

df = pd.read_parquet('./images.parquet', engine='fastparquet')
dump_images(df)


In [76]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchsummary import summary
import config

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

slope = 0.1

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        # self.depth = config.B * (5 + config.C)
        self.model = nn.Sequential(
            # layer 1
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), #  adding padding=3 to control output dimension size. (3-7+2*3)/2 + 1 = 224 to yield 112 height and width after 1st maxpool layer as per yolo paper
            nn.LeakyReLU(negative_slope=slope),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # layer 2
            nn.Conv2d(64, 192, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # layer 3
            nn.Conv2d(192, 128, kernel_size=1, stride=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(256, 256, kernel_size=1, stride=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.MaxPool2d(kernel_size=2, stride=2),


            # layer 4
            nn.Conv2d(512, 256, kernel_size=1),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(512, 256, kernel_size=1),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(512, 256, kernel_size=1),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(512, 256, kernel_size=1),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(512, 512, kernel_size=1),
            # nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(512, 1024, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # layer 5
            nn.Conv2d(1024, 512, kernel_size=1),
            # nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(512, 1024, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(1024, 512, kernel_size=1),
            # nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(512, 1024, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),

            nn.Conv2d(1024, 1024, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(1024, 1024, kernel_size=3,stride=2,padding=1),#  STRIDE 2, why?
            nn.LeakyReLU(negative_slope=slope),
            # nn.MaxPool2d(kernel_size=3, stride=2),

            # layer 6
            nn.Conv2d(1024, 1024, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),
            nn.Conv2d(1024, 1024, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=slope),

            # connected layer
            nn.Flatten(),
            nn.Linear(config.S * config.S * 1024, 4096),                            # Linear 1
            nn.Dropout(),
            nn.LeakyReLU(negative_slope=slope),
            # final connected layer
            nn.Linear(4096, config.S * config.S * config.B * (5+config.C)),                      # Linear 2
            # nn.Linear(4096, 7*7*30),  # S x S x (B*5 + C) where S=7, B=2, C=20
        )
    
    
    def forward(self, x):
        logits = self.model(x)
        batch_size = x.size(dim=0)
        print(batch_size)
        final_output = torch.reshape(logits, (batch_size, config.S, config.S, config.B, 5 + config.C))
        return final_output

neural_network = NeuralNetwork()
summary(neural_network.model, input_size=(3, 448, 448))




Using mps device
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           9,472
         LeakyReLU-2         [-1, 64, 224, 224]               0
         MaxPool2d-3         [-1, 64, 112, 112]               0
            Conv2d-4        [-1, 192, 112, 112]         110,784
         LeakyReLU-5        [-1, 192, 112, 112]               0
         MaxPool2d-6          [-1, 192, 56, 56]               0
            Conv2d-7          [-1, 128, 56, 56]          24,704
         LeakyReLU-8          [-1, 128, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         295,168
        LeakyReLU-10          [-1, 256, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          65,792
        LeakyReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 512, 56, 56]       1,180,160
        LeakyReLU-14  

In [49]:
df_labels = pd.read_parquet('./labels.parquet', engine='fastparquet')

df_labels

Unnamed: 0,image_id,x,y,orientation,radius,class
0,0,904,386,0.000000,15,0
1,0,905,232,4.171337,43,1
2,0,197,449,3.630285,39,1
3,0,749,355,0.523599,40,1
4,0,207,188,1.692969,48,1
...,...,...,...,...,...,...
22995,999,788,238,4.799655,41,2
22996,999,550,230,0.558505,38,2
22997,999,435,74,3.211406,41,2
22998,999,688,309,5.410521,46,2


In [58]:
df

Unnamed: 0,id,image
0,0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
1,1,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
2,2,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
3,3,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4,4,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
...,...,...
995,995,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
996,996,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
997,997,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
998,998,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [61]:
# Try to annotate the image with the data

def annotate_image(image, label):
    x, y, radius = int(label['x']), int(label['y']), int(label['radius'])
    cv2.circle(image, (x, y), radius, (0, 255, 0), 2)
    return image

test_image = annotate_image(cv2.imread('output_images/image_0.jpg'), df_labels.iloc[1])
cv2.imwrite('image_0_annotated.jpg', test_image)

True

In [65]:
df_labels[df_labels['image_id'] == 1]

Unnamed: 0,image_id,x,y,orientation,radius,class
23,1,285,117,0.0,18,0
24,1,333,81,3.822271,40,1
25,1,709,256,4.014257,43,1
26,1,748,121,4.468043,38,1
27,1,209,179,6.126106,41,1
28,1,355,444,6.248279,38,1
29,1,543,351,0.139626,41,1
30,1,125,298,3.490659,44,1
31,1,467,421,3.961897,45,1
32,1,897,299,3.281219,43,1


In [69]:
from sklearn.model_selection import train_test_split

train_image_df, test_image_df = train_test_split(df, test_size=0.2, random_state=42)
train_image_df


Unnamed: 0,id,image
29,29,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
535,535,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
695,695,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
557,557,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
836,836,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
...,...,...
106,106,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
270,270,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
860,860,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
435,435,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [94]:
train_image_df.iloc[0]['image']

b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x01\x01\x01\x01\x02\x01\x01\x01\x02\x02\x02\x02\x02\x04\x03\x02\x02\x02\x02\x05\x04\x04\x03\x04\x06\x05\x06\x06\x06\x05\x06\x06\x06\x07\t\x08\x06\x07\t\x07\x06\x06\x08\x0b\x08\t\n\n\n\n\n\x06\x08\x0b\x0c\x0b\n\x0c\t\n\n\n\xff\xdb\x00C\x01\x02\x02\x02\x02\x02\x02\x05\x03\x03\x05\n\x07\x06\x07\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\xff\xc0\x00\x11\x08\x02\x00\x04\x00\x03\x01"\x00\x02\x11\x01\x03\x11\x01\xff\xc4\x00\x1f\x00\x00\x01\x05\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\xff\xc4\x00\xb5\x10\x00\x02\x01\x03\x03\x02\x04\x03\x05\x05\x04\x04\x00\x00\x01}\x01\x02\x03\x00\x04\x11\x05\x12!1A\x06\x13Qa\x07"q\x142\x81\x91\xa1\x08#B\xb1\xc1\x15R\xd1\xf0$3br\x82\t\n\x16\x17\x18\x19\x1a%&\'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz\x83\x84\x85\x86\x87\x88\x89\x8a\x92\x93\x94\x95\x96\x97\

In [103]:
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import io
from torchvision import transforms
from PIL import Image
 

optimizer = torch.optim.Adam(neural_network.parameters(), lr=config.LEARNING_RATE)

class CustomDataset(Dataset):
    def __init__(self, image_df, label_df):
        self.image_df = image_df
        self.label_df = label_df

    def __len__(self):
        return len(self.image_df)

    def __getitem__(self, idx):
        image = self.image_df.iloc[idx]
        image_id = image['id']
        image_bytes = image['image'] # x
        image = Image.open(io.BytesIO(image_bytes))
        transform = transforms.ToTensor()
        image_tensor = transform(image)

        labels = self.label_df[self.label_df['image_id'] == image_id] # y
        
        labels_obj = []
        labels_obj = torch.zeros(config.S, config.S, config.B, 5 + config.C)

        # find grid cell for S
        max_x = 1024
        max_y = 512
        box_index = 0
        for index, label in labels.iterrows():
            x, y = int(label['x']), int(label['y'])
            grid_x = int(x / (max_x / config.S))
            grid_y = int(y / (max_y / config.S))
            
            for box_index in range(config.B):
                if labels_obj[grid_x, grid_y, box_index, 4] == 0: # put parameters in the box that has confidence is 0
                    labels_obj[grid_x, grid_y, box_index, 0] = int(label['x'])
                    labels_obj[grid_x, grid_y, box_index, 1] = int(label['y'])
                    labels_obj[grid_x, grid_y, box_index, 2] = int(label['radius'])
                    labels_obj[grid_x, grid_y, box_index, 3] = int(label['orientation'])
                    labels_obj[grid_x, grid_y, box_index, 4] = 1 # confidence
                    labels_obj[ grid_x, grid_y, box_index, 5] = int(int(label['class']) == 0)
                    labels_obj[grid_x, grid_y, box_index, 6] = int(int(label['class']) == 1)
                    labels_obj[grid_x, grid_y, box_index, 7] = int(int(label['class']) == 2)
                    break

        
        return image_tensor, labels_obj

# Load the data
def load_data(image_df, label_df):
    # Split the data into training and validation sets
    dataset = CustomDataset(image_df=image_df, label_df=label_df)

    # Split image_df into 80% training and 20% testing
    train_image_df, test_image_df = train_test_split(image_df, test_size=0.2, random_state=42)
   
    # Load the dataset
    train_set = CustomDataset(train_image_df, label_df)
    test_set = CustomDataset(test_image_df, label_df)

    train_loader = DataLoader(
        train_set,
        batch_size=config.BATCH_SIZE,
        shuffle=False
    )
    test_loader = DataLoader(
        test_set,
        batch_size=config.BATCH_SIZE,
        shuffle=False
    )

    return train_loader, test_loader

train_loader, test_loader = load_data(image_df=df, label_df=df_labels)

In [102]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # print(X)
        # print(y)
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        print(pred)

        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

class LossFunction():
    def __init__(self):
        self.mse = nn.MSELoss()
    
    def __call__(self, pred, target):
        # First Term
        # final_output = torch.reshape(logits, (batch_size, config.S, config.S, config.B, 5 + config.C))
        batch_size = pred.size(dim=0)

        for b in range(batch_size):
            for i in range(config.S):  # Loop over rows
                for j in range(config.S):  # Loop over columns
                    for box in range(config.B):
                        # x, y, r, o, c
                        x, y, r, o, c = pred[b,i,j,box,:5]
                        class_probabilities = pred[b,i,j,box,5:]
                        
                        target_x, target_y, target_r, target_o, target_c = target[b,i,j,box,:5]
                        
                    
                    
        

loss_fn = LossFunction()
train(train_loader, neural_network, loss_fn, optimizer)

tensor([[[[0.2235, 0.2275, 0.2275,  ..., 0.2941, 0.2941, 0.2902],
          [0.2314, 0.2275, 0.2235,  ..., 0.2902, 0.2902, 0.2863],
          [0.2353, 0.2314, 0.2235,  ..., 0.2902, 0.2941, 0.2902],
          ...,
          [0.1686, 0.1765, 0.1725,  ..., 0.2235, 0.2196, 0.2314],
          [0.1686, 0.1725, 0.1725,  ..., 0.2196, 0.2196, 0.2314],
          [0.1608, 0.1686, 0.1647,  ..., 0.2157, 0.2157, 0.2314]],

         [[0.4824, 0.4863, 0.4863,  ..., 0.4941, 0.4941, 0.4902],
          [0.4902, 0.4863, 0.4824,  ..., 0.4902, 0.4902, 0.4863],
          [0.4941, 0.4902, 0.4824,  ..., 0.4902, 0.4941, 0.4902],
          ...,
          [0.4745, 0.4824, 0.4784,  ..., 0.4824, 0.4784, 0.4902],
          [0.4745, 0.4784, 0.4784,  ..., 0.4784, 0.4784, 0.4902],
          [0.4667, 0.4745, 0.4706,  ..., 0.4745, 0.4745, 0.4902]],

         [[0.0667, 0.0706, 0.0706,  ..., 0.0980, 0.0980, 0.0941],
          [0.0745, 0.0706, 0.0667,  ..., 0.0941, 0.0941, 0.0902],
          [0.0784, 0.0745, 0.0667,  ..., 0

AttributeError: 'list' object has no attribute 'to'

In [31]:
import config

class YOLOv1(nn.Module):
    def __init__(self):
        super().__init__()
        self.depth = config.B * 5 + config.C

        layers = [
            # Probe(0, forward=lambda x: print('#' * 5 + ' Start ' + '#' * 5)),
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),                   # Conv 1
            nn.LeakyReLU(negative_slope=0.1),
            # Probe('conv1', forward=probe_dist),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 192, kernel_size=3, padding=1),                           # Conv 2
            nn.LeakyReLU(negative_slope=0.1),
            # Probe('conv2', forward=probe_dist),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(192, 128, kernel_size=1),                                     # Conv 3
            nn.LeakyReLU(negative_slope=0.1),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Conv2d(256, 256, kernel_size=1),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.1),
            # Probe('conv3', forward=probe_dist),
            nn.MaxPool2d(kernel_size=2, stride=2)
        ]

        for i in range(4):                                                          # Conv 4
            layers += [
                nn.Conv2d(512, 256, kernel_size=1),
                nn.Conv2d(256, 512, kernel_size=3, padding=1),
                nn.LeakyReLU(negative_slope=0.1)
            ]
        layers += [
            nn.Conv2d(512, 512, kernel_size=1),
            nn.Conv2d(512, 1024, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.1),
            # Probe('conv4', forward=probe_dist),
            nn.MaxPool2d(kernel_size=2, stride=2)
        ]

        for i in range(2):                                                          # Conv 5
            layers += [
                nn.Conv2d(1024, 512, kernel_size=1),
                nn.Conv2d(512, 1024, kernel_size=3, padding=1),
                nn.LeakyReLU(negative_slope=0.1)
            ]
        layers += [
            nn.Conv2d(1024, 1024, kernel_size=3, padding=1),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Conv2d(1024, 1024, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(negative_slope=0.1),
            # Probe('conv5', forward=probe_dist),
        ]

        for _ in range(2):                                                          # Conv 6
            layers += [
                nn.Conv2d(1024, 1024, kernel_size=3, padding=1),
                nn.LeakyReLU(negative_slope=0.1)
            ]
        # layers.append(Probe('conv6', forward=probe_dist))

        layers += [
            nn.Flatten(),
            nn.Linear(config.S * config.S * 1024, 4096),                            # Linear 1
            nn.Dropout(),
            nn.LeakyReLU(negative_slope=0.1),
            # Probe('linear1', forward=probe_dist),
            nn.Linear(4096, config.S * config.S * self.depth),                      # Linear 2
            # Probe('linear2', forward=probe_dist),
        ]

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return torch.reshape(
            self.model.forward(x),
            (x.size(dim=0), config.S, config.S, self.depth)
        )

summary(YOLOv1().model, input_size=(3, 448, 448))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           9,472
         LeakyReLU-2         [-1, 64, 224, 224]               0
         MaxPool2d-3         [-1, 64, 112, 112]               0
            Conv2d-4        [-1, 192, 112, 112]         110,784
         LeakyReLU-5        [-1, 192, 112, 112]               0
         MaxPool2d-6          [-1, 192, 56, 56]               0
            Conv2d-7          [-1, 128, 56, 56]          24,704
         LeakyReLU-8          [-1, 128, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         295,168
        LeakyReLU-10          [-1, 256, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          65,792
        LeakyReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 512, 56, 56]       1,180,160
        LeakyReLU-14          [-1, 512,

In [46]:
from unittest import TestCase

def test_shape():
    batch_size = 64
    test_model = YOLOv1().to(device)
    test_tensor = torch.rand((batch_size, 3, config.IMAGE_SIZE[0], config.IMAGE_SIZE[1])).to(device)
    print(test_tensor.size())
    result = test_model.forward(test_tensor)
    print(tuple(result.size()))
    print((128, config.S, config.S, test_model.depth))

test_shape()


torch.Size([64, 3, 448, 448])
(64, 7, 7, 30)
(128, 7, 7, 30)
