In [59]:
import shutil
import os
import pathlib
import numpy as np
import pandas as pd
import plotly.express as px
import tensorflow as tf
import kaggle

# Implmentation Outline
- Do pretraining for image classification to learn features
- Convert model to object detection
- Make custom loss function
- Get object detection dataset
- Train according to paper specifications
- Test model

## Pretraining

For pretraining I will use a smaller version of the ImageNet dataset then the one used in the paper.

In [50]:
## Load pretraining data
kaggle.api.authenticate()
kaggle.api.dataset_download_files('ifigotin/imagenetmini-1000',
                                path='/home/misha/Desktop/data/yolo_paper/pretrain_data/image_data/',
                                unzip=True)



### Data Preprocessing

In [51]:
PRETRAIN_DATA_PATH = '/home/misha/Desktop/data/yolo_paper/pretrain_data/'
labels_txt = PRETRAIN_DATA_PATH + 'words.txt'

In [52]:
labels = pd.read_csv(labels_txt, sep='\t')

In [53]:
labels

Unnamed: 0,code,object
0,n00001740,entity
1,n00001930,physical entity
2,n00002137,"abstraction, abstract entity"
3,n00002452,thing
4,n00002684,"object, physical object"
...,...,...
82110,n15299225,study hall
82111,n15299367,"Transfiguration, Transfiguration Day, August 6"
82112,n15299585,usance
82113,n15299783,window


In [54]:
labels[labels['code'] == 'n03485794']['object'].values[0]

'handkerchief, hankie, hanky, hankey'

In [62]:
n_classes = len(os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/train/'))

In [63]:
n_classes

999

In [64]:
len(os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/val/'))

122

In [57]:
os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/')

['val', 'train']

#### Rename the folders to their label instead of the code

In [60]:
for split in os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/'):
    for folder in os.listdir(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/' + split):
        try:
            label = labels[labels['code'] == folder]['object'].values[0]
        except Exception as err:
            print(f"Split: {split}  Folder:{folder} Error:{err}")
            continue

        source_path = PRETRAIN_DATA_PATH + f'image_data/imagenet-mini/{split}/{folder}'
        destination_path = PRETRAIN_DATA_PATH + f'image_data/imagenet-mini/{split}/{label}'
        
        try:
            # Move the folder and its contents
            shutil.move(source_path, destination_path)  
            
            # Remove the original (now empty) folder
            os.rmdir(source_path)  
        except OSError as e:
            print(f"Error moving or deleting folder: {e}")

Error moving or deleting folder: [Errno 2] No such file or directory: '/home/misha/Desktop/data/yolo_paper/pretrain_data/image_data/imagenet-mini/val/n03126707'
Error moving or deleting folder: [Errno 2] No such file or directory: '/home/misha/Desktop/data/yolo_paper/pretrain_data/image_data/imagenet-mini/val/n04429376'
Error moving or deleting folder: [Errno 2] No such file or directory: '/home/misha/Desktop/data/yolo_paper/pretrain_data/image_data/imagenet-mini/val/n02101556'
Error moving or deleting folder: [Errno 2] No such file or directory: '/home/misha/Desktop/data/yolo_paper/pretrain_data/image_data/imagenet-mini/val/n03599486'
Error moving or deleting folder: [Errno 2] No such file or directory: '/home/misha/Desktop/data/yolo_paper/pretrain_data/image_data/imagenet-mini/val/n01675722'
Error moving or deleting folder: [Errno 2] No such file or directory: '/home/misha/Desktop/data/yolo_paper/pretrain_data/image_data/imagenet-mini/val/n03950228'
Error moving or deleting folder: [

#### Move load data into Tensorflow datasets.

In [15]:
train_dir = pathlib.Path(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/train/')
val_dir = pathlib.Path(PRETRAIN_DATA_PATH + 'image_data/imagenet-mini/val/')

In [24]:
train_df = tf.keras.utils.image_dataset_from_directory(
    train_dir, 
    labels='inferred',
    color_mode='rgb',
    batch_size=32, 
    label_mode='int',
    image_size=(224, 224),
    crop_to_aspect_ratio=True,
    seed=1, 
    shuffle=True
)
val_df = tf.keras.utils.image_dataset_from_directory(
    val_dir, 
    labels='inferred',
    color_mode='rgb',
    batch_size=32, 
    label_mode='int',
    image_size=(224, 224),
    crop_to_aspect_ratio=True,
    seed=1, 
    shuffle=True
)

Found 34745 files belonging to 999 classes.
Found 461 files belonging to 122 classes.


In [25]:
len(os.listdir(val_dir))

122