In [35]:
import shutil
import os
import pathlib
import numpy as np
import pandas as pd
import plotly.express as px
import kaggle

import tensorflow as tf

# Implmentation Outline
- Do pretraining for image classification to learn features
- Convert model to object detection
- Make custom loss function
- Get object detection dataset
- Train according to paper specifications
- Test model

## Pretraining

For pretraining I will use a smaller version of the ImageNet dataset then the one used in the paper.

In [2]:
## Load pretraining data
kaggle.api.authenticate()
# kaggle.api.dataset_download_files('ifigotin/imagenetmini-1000',
#                                 path='/home/misha/Desktop/data/yolo_paper/pretrain_data/image_data/',
#                                 unzip=True)



### Data Preprocessing

In [3]:
PRETRAIN_DATA_PATH = '/home/misha/Desktop/data/yolo_paper/pretrain_data/'
labels_txt = PRETRAIN_DATA_PATH + 'words.txt'

In [4]:
labels = pd.read_csv(labels_txt, sep='\t')

In [5]:
labels

Unnamed: 0,code,object
0,n00001740,entity
1,n00001930,physical entity
2,n00002137,"abstraction, abstract entity"
3,n00002452,thing
4,n00002684,"object, physical object"
...,...,...
82110,n15299225,study hall
82111,n15299367,"Transfiguration, Transfiguration Day, August 6"
82112,n15299585,usance
82113,n15299783,window


In [7]:
# labels[labels['code'] == 'n03485794214']['object'].values[0]

In [8]:
n_classes = len(os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/train/'))

In [9]:
n_classes

999

In [10]:
len(os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/val/'))

999

In [11]:
os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/val/') == os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/train/')

True

In [12]:
os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/')

['val', 'train']

#### Rename the folders to their label instead of the code

In [13]:
# for split in os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/'):
#     for folder in os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/' + split):
#         try:
#             label = labels[labels['code'] == folder]['object'].values[0]
#         except IndexError as err:
#             print(f"No label found for Split: {split} Folder: {folder}")
#             continue

#         ## Rename folder
#         source_path = PRETRAIN_DATA_PATH + f'image_data/imagenet-mini/{split}/{folder}'
#         destination_path = PRETRAIN_DATA_PATH + f'image_data/imagenet-mini/{split}/{label}'

#         try:
#             shutil.move(source_path, destination_path)
#         except OSError as err:
#             print(f"Error moving folder({folder}): {err}")

In [14]:
os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/val/') == os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/train/')

True

#### Move load data into Tensorflow datasets.

In [15]:
train_dir = pathlib.Path(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/train/')
val_dir = pathlib.Path(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/val/')

In [16]:
train_df = tf.keras.utils.image_dataset_from_directory(
    train_dir, 
    labels='inferred',
    color_mode='rgb',
    batch_size=32, 
    label_mode='categorical',   ## Vector Representation (Use categorical_crossentropy loss)
    image_size=(224, 224),
    crop_to_aspect_ratio=True,
    seed=1, 
    shuffle=True
)
val_df = tf.keras.utils.image_dataset_from_directory(
    val_dir, 
    labels='inferred',
    color_mode='rgb',
    batch_size=32, 
    label_mode='categorical',   ## Vector Representation (Use categorical_crossentropy loss)
    image_size=(224, 224),
    crop_to_aspect_ratio=True,
    seed=1, 
    shuffle=True
)

Found 34745 files belonging to 999 classes.


I0000 00:00:1737638447.138351    4124 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9716 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:01:00.0, compute capability: 8.6


Found 3923 files belonging to 999 classes.


#### Data augmentation

In [17]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal', seed=1),
    tf.keras.layers.RandomRotation(0.2, seed=1)
])

In [18]:
train_aug = train_df.map(lambda x, y: (data_augmentation(x), y))
val_aug = val_df.map(lambda x, y: (data_augmentation(x), y))

In [19]:
train_df = train_df.concatenate(train_aug).prefetch(1)
val_df = val_df.concatenate(val_aug).prefetch(1)

In [21]:
len(train_df) * 32

69504

In [22]:
len(val_df) * 32

7872

### Pretraining

In [1]:
# class FullConvLayer(tf.keras.layers.Layer):
#     """
#     Custom layer to hold the full convolution block. This is done to reduce copy and paste, and to make code simple.
#     Always follows this simple structure:
    
#     Convolutional layers
#     Maxpool layer
#     """
#     def __init__(self, conv_map:dict, **kwargs):
#         """
#         Takes a dictionary/map where each key represents one convolution layer, and the value pair is a map 
#         with parameter values (i.e. {'filters': 10, 'kernel_size': (2,3), etc...})
#         """
#         super().__init__(**kwargs)
#         self.layers = []
#         for layer, param_map in list(conv_map.items()):
#             self.layers.append(tf.keras.layers.Conv2D(**param_map))

#         ## Append maxpool layer to block
#         self.layers.append(tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=2))

                           
#     def call(self, inputs):
#         for i in range(self.layers):
            
            

# Focus on general architecture

In [36]:
class PreTrainingModel(tf.keras.Model):
    """
    This is the model that will be used for the pretrained layers in the YOLO network
    Will have a export method that will export the layers needed for the full YOLO network
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.input_layer = tf.keras.layers.Input(shape=(224, 224, 3), name='input_layer') ## Standard according to paper

        ## First 20 convolution layers of the network
        ## All layers done according to paper standards
        