# Build YOLOv5 Dataset structure

Before we can train the model, we need to make some coordinates transfomations to the bouding boxes annotations/tags and also create the folder structure needed

In [4]:
import pandas as pd

# System 
import sys
sys.path.append('../model')
# sys.path.remove('yolov5\\')
import os

# Self-made libraries
from utils_eda import bboxes
from utils_eda import util_funcs as uf

# Reload to pick last changes
import importlib
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Getting the yolov5 coordinates

In [5]:
train_tags_df = pd.DataFrame()

# Reading in chunks
for chunk_df in uf.read_csv_chunks(chunksize=50000):
    train_tags_df = pd.concat( [train_tags_df,chunk_df] , ignore_index = True)

train_tags_df.set_index('img_name',inplace = True)
train_tags_df

Unnamed: 0_level_0,x1,y1,x2,y2,type,total_width,total_height
img_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
train_0.jpg,208,537,422,814,object,3024,3024
train_0.jpg,1268,1923,1365,2209,object,3024,3024
train_0.jpg,1135,2074,1261,2166,object,3024,3024
train_0.jpg,1045,2085,1122,2258,object,3024,3024
train_0.jpg,976,2036,1040,2177,object,3024,3024
...,...,...,...,...,...,...,...
train_999.jpg,422,2386,675,2542,object,2336,4160
train_999.jpg,427,2581,667,2715,object,2336,4160
train_999.jpg,699,2365,823,2474,object,2336,4160
train_999.jpg,1849,1678,2108,1769,object,2336,4160


In [6]:
yolo_labels_df = uf.to_yolov5_coords(train_tags_df)
yolo_labels_df

Unnamed: 0_level_0,class_id,center_x,center_y,width_bb,height_bb
img_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
train_0.jpg,1,0.104167,0.223214,0.070767,0.091601
train_0.jpg,1,0.435185,0.683201,0.032077,0.094577
train_0.jpg,1,0.396164,0.701058,0.041667,0.030423
train_0.jpg,1,0.358135,0.717923,0.025463,0.057209
train_0.jpg,1,0.333333,0.696429,0.021164,0.046627
...,...,...,...,...,...
train_999.jpg,1,0.234589,0.592308,0.108305,0.037500
train_999.jpg,1,0.234161,0.636538,0.102740,0.032212
train_999.jpg,1,0.325771,0.581490,0.053082,0.026202
train_999.jpg,1,0.846747,0.414183,0.110873,0.021875


We save this into a text file for each of the images to feed to the model later:

In [7]:
uf.labels_to_txt(yolo_labels_df)

 train_0.txt saved
 train_1.txt saved
 train_10.txt saved
 train_100.txt saved
 train_1000.txt saved
 train_1001.txt saved
 train_1002.txt saved
 train_1003.txt saved
 train_1004.txt saved
 train_1005.txt saved
 train_1006.txt saved
 train_1007.txt saved
 train_1008.txt saved
 train_1009.txt saved
 train_101.txt saved
 train_1010.txt saved
 train_1011.txt saved
 train_1012.txt saved
 train_1013.txt saved
 train_1014.txt saved
 train_1015.txt saved
 train_1016.txt saved
 train_1017.txt saved
 train_1018.txt saved
 train_1019.txt saved
 train_102.txt saved
 train_1020.txt saved
 train_1021.txt saved
 train_1022.txt saved
 train_1023.txt saved
 train_1025.txt saved
 train_1026.txt saved
 train_1027.txt saved
 train_1028.txt saved
 train_1029.txt saved
 train_103.txt saved
 train_1030.txt saved
 train_1031.txt saved
 train_1032.txt saved
 train_1033.txt saved
 train_1034.txt saved
 train_1035.txt saved
 train_1036.txt saved
 train_1037.txt saved
 train_1038.txt saved
 train_1039.txt saved


Repeat the same steps for validation and test set

Validation set

In [8]:
val_tags_df = pd.DataFrame()

# Reading in chunks
for chunk_df in uf.read_csv_chunks(img_set='val',chunksize=50000):
    val_tags_df = pd.concat( [val_tags_df,chunk_df] , ignore_index = True)

val_tags_df.set_index('img_name',inplace = True)

yolo_labels_df = uf.to_yolov5_coords(val_tags_df)

uf.labels_to_txt(yolo_labels_df)

 val_0.txt saved
 val_1.txt saved
 val_10.txt saved
 val_100.txt saved
 val_101.txt saved
 val_102.txt saved
 val_103.txt saved
 val_104.txt saved
 val_105.txt saved
 val_106.txt saved
 val_107.txt saved
 val_108.txt saved
 val_109.txt saved
 val_11.txt saved
 val_110.txt saved
 val_111.txt saved
 val_112.txt saved
 val_113.txt saved
 val_114.txt saved
 val_115.txt saved
 val_116.txt saved
 val_117.txt saved
 val_118.txt saved
 val_119.txt saved
 val_12.txt saved
 val_120.txt saved
 val_121.txt saved
 val_122.txt saved
 val_123.txt saved
 val_124.txt saved
 val_125.txt saved
 val_126.txt saved
 val_127.txt saved
 val_128.txt saved
 val_129.txt saved
 val_13.txt saved
 val_130.txt saved
 val_131.txt saved
 val_132.txt saved
 val_133.txt saved
 val_134.txt saved
 val_135.txt saved
 val_136.txt saved
 val_137.txt saved
 val_138.txt saved
 val_139.txt saved
 val_14.txt saved
 val_140.txt saved
 val_141.txt saved
 val_142.txt saved
 val_143.txt saved
 val_144.txt saved
 val_145.txt saved
 v

Test set

In [9]:
test_tags_df = pd.DataFrame()

# Reading in chunks
for chunk_df in uf.read_csv_chunks(img_set='test',chunksize=50000):
    test_tags_df = pd.concat( [test_tags_df,chunk_df] , ignore_index = True)

test_tags_df.set_index('img_name',inplace = True)

yolo_labels_df = uf.to_yolov5_coords(test_tags_df)

uf.labels_to_txt(yolo_labels_df)

 test_0.txt saved
 test_1.txt saved
 test_10.txt saved
 test_100.txt saved
 test_1000.txt saved
 test_1001.txt saved
 test_1002.txt saved
 test_1003.txt saved
 test_1004.txt saved
 test_1005.txt saved
 test_1006.txt saved
 test_1007.txt saved
 test_1008.txt saved
 test_1009.txt saved
 test_101.txt saved
 test_1010.txt saved
 test_1011.txt saved
 test_1012.txt saved
 test_1013.txt saved
 test_1014.txt saved
 test_1015.txt saved
 test_1016.txt saved
 test_1017.txt saved
 test_1018.txt saved
 test_1019.txt saved
 test_102.txt saved
 test_1020.txt saved
 test_1021.txt saved
 test_1022.txt saved
 test_1023.txt saved
 test_1024.txt saved
 test_1025.txt saved
 test_1026.txt saved
 test_1027.txt saved
 test_1028.txt saved
 test_1029.txt saved
 test_103.txt saved
 test_1030.txt saved
 test_1031.txt saved
 test_1032.txt saved
 test_1033.txt saved
 test_1034.txt saved
 test_1035.txt saved
 test_1036.txt saved
 test_1037.txt saved
 test_1038.txt saved
 test_1039.txt saved
 test_104.txt saved
 test

##### Pick random images for labeling (Roboflow)

In [10]:
# uf.pick_random_imgs(3276, 4912, size= 100)