# YOLOv5

### !git clone https://github.com/ultralytics/yolov5

In [11]:
import pandas as pd
import numpy as np
import os
import shutil

In [12]:
from pathlib import Path

In [13]:
# symlink
train_imgs = Path('/home/jovyan/bpjt-road-data/dataset/train/images_balanced/')
val_imgs = Path('/home/jovyan/bpjt-road-data/dataset/validation/images/')
train_imgs

PosixPath('/home/jovyan/bpjt-road-data/dataset/train/images_balanced')

In [14]:
path = Path('sym_data/wo_others')

if os.path.exists(path):
    # remove symlink first
    print('WARNING: PATH EXISTS!!! removing folders/files on the existing path...')    
    #remove linked folders     
    os.remove(path/'train/images')
    os.remove(path/'val/images')
    shutil.rmtree(path)

os.mkdir(path)
os.mkdir(path/'train')
os.mkdir(path/'val')

os.symlink(train_imgs,path/'train/images')
os.symlink(val_imgs,path/'val/images')

#     print(os.listdir(path))



# New box format transform

In [15]:
train_box = '/home/jovyan/bpjt-road-data/dataset/train/box_balanced/box_balanced.csv'
val_box = '/home/jovyan/bpjt-road-data/dataset/validation/box_balanced/box_balanced.csv'
columns = 'file_name	label_name	xmid	ymid	width  height area'.split()
columns

['file_name', 'label_name', 'xmid', 'ymid', 'width', 'height', 'area']

In [16]:
for bbox in [train_box,val_box]:
    df = pd.read_csv(bbox)
    df = df[columns]
    print('max area:',df.area.max())
    df.area/=df.area.max()
    
    imgs = df.file_name.unique()
    
    folder = 'val'
    if 'train' in bbox:
        folder = 'train'
    
    label_path = path/f'{folder}/labels'
    if os.path.exists(label_path): shutil.rmtree(label_path)
    os.mkdir(label_path)
    
    for img in imgs:
        val = df[df.file_name == img][df.columns[1:]]
        val.round(6).to_csv(label_path/f'{img}.txt',sep=' ',index=False,header=False)

max area: 2953177.0
max area: 2939200.0


## Train

In [10]:
df.area.std()

0.4310913322259625

# OLD box format transform

In [40]:
w, h = 2704, 1520
def map_posxy(p):
    p = [float(i) for i in p.strip('()').split(', ')]    
    width = p[2]-p[0]
    height = p[3]-p[1]    
    mid = (p[0] + width/2, p[1] + height/2)
    return mid[0]/w, mid[1]/h, width/w, height/h


for bbox in [train_box,val_box]:
    df = pd.read_csv(bbox)
    pos = df[df.columns[2]].apply(map_posxy)
    pos = pd.DataFrame(pos.to_list(),index=pos.index).clip(0,1)
    df = pd.concat([df[df.columns[:2]], pos], axis=1)
    
    imgs = df.file_name.unique()
    
    folder = 'val'
    if 'train' in bbox:
        folder = 'train'
    
    label_path = path+f'/{folder}/labels'
    if os.path.exists(label_path): shutil.rmtree(label_path)
    os.mkdir(label_path)
    
    for img in imgs:
        val = df[df.file_name == img][df.columns[1:]]
        val.round(6).to_csv(f'{label_path}/{img}.txt',sep=' ',index=False,header=False)
    

# Formating Experiment Here

In [25]:
df_train = pd.read_csv(train_box)
df_train

Unnamed: 0,file_name,label_name,"(xmin, ymin, xmax, ymax)",area,shape_type,ratio
0,A100_0,3,"(158, 90, 2393, 1490)",2047382.5,polygon,"(2235, 1400)"
1,A100_1,3,"(158, 90, 2393, 1490)",2047382.5,polygon,"(2235, 1400)"
2,A100_10,3,"(158, 90, 2393, 1490)",2047382.5,polygon,"(2235, 1400)"
3,A100_11,3,"(158, 90, 2393, 1490)",2047382.5,polygon,"(2235, 1400)"
4,A100_12,3,"(158, 90, 2393, 1490)",2047382.5,polygon,"(2235, 1400)"
...,...,...,...,...,...,...
8430,e-7_5,2,"(1696, 1168, 1878, 1407)",26074.0,polygon,"(182, 239)"
8431,e-7_5,0,"(15, 676, 720, 1060)",41823.5,polygon,"(705, 384)"
8432,e-7_6,0,"(15, 676, 720, 1060)",41823.5,polygon,"(705, 384)"
8433,e-7_6,2,"(1696, 1168, 1878, 1407)",26074.0,polygon,"(182, 239)"


In [26]:
w, h = 2704, 1520
def map_posxy(p):
    p = [float(i) for i in p.strip('()').split(', ')]    
    width = p[2]-p[0]
    height = p[3]-p[1]    
    mid = (p[0] + width/2, p[1] + height/2)
    return mid[0]/w, mid[1]/h, width/w, height/h

In [28]:
pos = df_train[df_train.columns[2]].apply(map_posxy)
pos = pd.DataFrame(pos.to_list(),index=pos.index).clip(0,1)

In [31]:
df_cat = pd.concat([df_train[df_train.columns[:2]], pos], axis=1)
df_cat

Unnamed: 0,file_name,label_name,0,1,2,3
0,A100_0,3,0.471709,0.519737,0.826553,0.921053
1,A100_1,3,0.471709,0.519737,0.826553,0.921053
2,A100_10,3,0.471709,0.519737,0.826553,0.921053
3,A100_11,3,0.471709,0.519737,0.826553,0.921053
4,A100_12,3,0.471709,0.519737,0.826553,0.921053
...,...,...,...,...,...,...
8430,e-7_5,2,0.660873,0.847039,0.067308,0.157237
8431,e-7_5,0,0.135910,0.571053,0.260725,0.252632
8432,e-7_6,0,0.135910,0.571053,0.260725,0.252632
8433,e-7_6,2,0.660873,0.847039,0.067308,0.157237


In [34]:
imgs = df_cat.file_name.unique()
os.mkdir(path+'/train/labels')
# len(imgs)
for img in imgs:
    val = df[df.file_name == img][c[1:]]
    val.round(6).to_csv(path+f'labels/{img}.txt',sep=' ',index=False,header=False)

4011

In [25]:
df_train[df_train.file_name =='D-120']
# y,x,y,x

Unnamed: 0,file_name,label_name,"(top, left, bottom, right)",area,shape_type,ratio,file_name_crop
2389,D-120,K2 - Retak Melintang,"(531, 291, 671, 2699)",107141.0,polygon,"(2408, 140)",D-120_0.jpg
2390,D-120,K1 - Retak Memanjang,"(602, 762, 1507, 1101)",37536.5,polygon,"(339, 905)",D-120_1.jpg
2391,D-120,Ket - Bayangan,"(655, 15, 1519, 1970)",239439.5,polygon,"(1955, 864)",D-120_2.jpg
2392,D-120,Ket - Kerb,"(0, 0, 1519, 175)",253996.5,polygon,"(175, 1519)",D-120_3.jpg
2393,D-120,Ket - Marka,"(0, 294, 1519, 377)",124355.5,polygon,"(83, 1519)",D-120_4.jpg
2394,D-120,K2 - Retak Melintang,"(551, 292, 646, 2702)",23328.0,polygon,"(2410, 95)",D-120_5.jpg
2395,D-120,K1 - Retak Memanjang,"(604, 777, 1519, 1092)",5522.5,polygon,"(315, 915)",D-120_6.jpg


In [26]:
# L1/L2/L3/L4/K1/K2/Ko1/Ko2/Ko4 - Retak:1
# L5/Ko5 - Retak Kulit Buaya:2
# L7/Ko7 - Lubang:3
# K3 - Gumpal pada Sambungan:4
# K5 - Pumping:5

retak = 'L1/L2/L3/L4/K1/K2/Ko1/Ko2/Ko4'.split('/')
buaya = 'L5/Ko5'.split('/')
lubang = 'L7/Ko7'.split('/')
sudut = ['K4']
gumpal = ['K3']
pumping = ['K5'] #others
ket = '''Ket - Joint/Sambungan (Kaku)
Ket - Expansion Joint
Ket - Marka
Ket - Bekas Kecelakaan
Ket - Sampah Plastik
Ket - Kayu
Ket - Tumpahan Minyak
Ket - Median/Separator
Ket - Patch
Ket - Sealant
Ket - Kerb
Ket - Bekas Ban
Ket - Lain-lain'''.split()


def map_label(s):
    labels = [retak,buaya,lubang,sudut,gumpal]
    for i, label in enumerate(labels):
        for l in label:
            if s.startswith(l+' '): return i
#     if not s.startswith('Ket'): return len(labels) #others
            
    return -1
    
print(df_train['label_name'][0])
# map_label(df_train['label_name'][2474])

Ko5 - Retak Kulit Buaya


In [27]:
lb = df_train['label_name'].apply(map_label)
lb

0       1
1      -1
2      -1
3       4
4      -1
       ..
2401    0
2402   -1
2403   -1
2404   -1
2405    3
Name: label_name, Length: 2406, dtype: int64

In [28]:
(lb>=0).sum()

1000

In [29]:
w, h = 2704, 1520
def map_pos(p):
    p = [float(i) for i in p.strip('()').split(', ')]
    
    width = p[3]-p[1]
    height = p[2]-p[0]
    
    mid = (p[1] + width/2, p[0] + height/2)
    return mid[0]/w, mid[1]/h, width/w, height/h

In [30]:
map_pos(df_train['(top, left, bottom, right)'][0])

(0.04400887573964497,
 0.49967105263157896,
 0.08801775147928995,
 0.9993421052631579)

In [31]:
pos = df_train['(top, left, bottom, right)'].apply(map_pos)
pos

0       (0.04400887573964497, 0.49967105263157896, 0.0...
1       (0.07525887573964497, 0.5009868421052631, 0.00...
2       (0.5012943786982249, 0.5023026315789474, 0.996...
3       (0.7609097633136095, 0.5411184210526315, 0.477...
4       (0.2618343195266272, 0.5694078947368421, 0.523...
                              ...                        
2401    (0.38775887573964496, 0.7763157894736842, 0.07...
2402    (0.3947855029585799, 0.7644736842105263, 0.546...
2403    (0.7919748520710059, 0.5019736842105263, 0.001...
2404    (0.5001849112426036, 0.8690789473684211, 0.998...
2405    (0.5730399408284024, 0.5976973684210526, 0.438...
Name: (top, left, bottom, right), Length: 2406, dtype: object

In [32]:
posl = pd.DataFrame(pos.to_list(),index=pos.index).clip(0,1)
posl

Unnamed: 0,0,1,2,3
0,0.044009,0.499671,0.088018,0.999342
1,0.075259,0.500987,0.009985,0.994079
2,0.501294,0.502303,0.996672,0.017763
3,0.760910,0.541118,0.477441,0.090132
4,0.261834,0.569408,0.523669,0.859868
...,...,...,...,...
2401,0.387759,0.776316,0.072115,0.431579
2402,0.394786,0.764474,0.546967,0.469737
2403,0.791975,0.501974,0.001109,0.994737
2404,0.500185,0.869079,0.998891,0.086842


In [30]:
posl.describe()

Unnamed: 0,0,1,2,3
count,8435.0,8435.0,8435.0,8435.0
mean,0.537925,0.541773,0.152172,0.302842
std,0.285762,0.20527,0.201025,0.304811
min,0.005362,0.025,0.004068,0.005921
25%,0.293177,0.420066,0.031435,0.070395
50%,0.566198,0.532895,0.069527,0.165789
75%,0.792345,0.693421,0.177145,0.456579
max,0.993343,0.990461,0.99963,0.999342


In [34]:
df_train['lb'] = lb
df_train[['x','y','w','h']] = posl


In [35]:
df_train[df_train.h>1].iloc[:,[2]].values


array([], shape=(0, 1), dtype=object)

In [36]:
c = ['file_name','lb','x','y','w','h']
df = df_train[c]
df

Unnamed: 0,file_name,lb,x,y,w,h
0,F-91,1,0.044009,0.499671,0.088018,0.999342
1,F-9,-1,0.075259,0.500987,0.009985,0.994079
2,F-9,-1,0.501294,0.502303,0.996672,0.017763
3,F-9,4,0.760910,0.541118,0.477441,0.090132
4,C-73,-1,0.261834,0.569408,0.523669,0.859868
...,...,...,...,...,...,...
2401,C-96,0,0.387759,0.776316,0.072115,0.431579
2402,C-164,-1,0.394786,0.764474,0.546967,0.469737
2403,C-164,-1,0.791975,0.501974,0.001109,0.994737
2404,C-164,-1,0.500185,0.869079,0.998891,0.086842


In [37]:
df=df[df.lb>=0]
df

Unnamed: 0,file_name,lb,x,y,w,h
0,F-91,1,0.044009,0.499671,0.088018,0.999342
3,F-9,4,0.760910,0.541118,0.477441,0.090132
8,C-73,4,0.892567,0.303947,0.038831,0.607895
9,C-73,4,0.911058,0.878289,0.011464,0.090789
19,B-3,0,0.943417,0.737829,0.015533,0.265132
...,...,...,...,...,...,...
2394,D-120,0,0.553624,0.393750,0.891272,0.062500
2395,D-120,0,0.345599,0.698355,0.116494,0.601974
2398,D-112,0,0.754438,0.499342,0.039201,0.997368
2401,C-96,0,0.387759,0.776316,0.072115,0.431579


In [38]:
for i in range(6):
    print((df.lb==i).sum())

610
77
121
65
127
0


In [39]:
imgs = df.file_name.unique()

In [40]:
len(imgs)

551

In [41]:
for img in imgs:
    val = df[df.file_name == img][c[1:]]
    val.round(6).to_csv(path+f'labels/{img}.txt',sep=' ',index=False,header=False)

In [42]:
# %load_ext tensorboard
# %tensorboard --logdir yolov5/runs --host 127.0.0.1

In [6]:
import pandas as pd
d = pd.read_csv('yolov5/results/wo_other_finetn_unf/csv/box.csv')


In [9]:
d.label_name.unique()

array(['lubang', 'retak-L1/L2/L3/L4/K1/K2/Ko1/Ko2/Ko4', 'retak sudut',
       'retak kulit buaya'], dtype=object)

In [8]:
update_name={'lubang':'lubang','retak':'retak-L1/L2/L3/L4/K1/K2/Ko1/Ko2/Ko4', 'retak sudut':'retak sudut', 'retak buaya':'retak kulit buaya'}
d.label_name =d.label_name.map(update_name)

In [14]:
d.to_csv('yolov5/results/wo_other_finetn_unf/csv/box2.csv',index=False)

In [13]:
d.head()

Unnamed: 0,damage_id,img_name,label_name,"(left, top, right, bottom)",area,shape_type,latitude,longitude
0,264_0,264.jpg,lubang,"(1066, 1112, 1117, 1157)",3.394305,yolo_bounding_box,-6.202551,106.503365
1,568_0,568.jpg,retak-L1/L2/L3/L4/K1/K2/Ko1/Ko2/Ko4,"(0, 0, 106, 1499)",235.004226,yolo_bounding_box,-6.202456,106.502945
2,576_0,576.jpg,retak-L1/L2/L3/L4/K1/K2/Ko1/Ko2/Ko4,"(0, 0, 89, 1495)",196.788345,yolo_bounding_box,-6.202454,106.502936
3,584_0,584.jpg,retak-L1/L2/L3/L4/K1/K2/Ko1/Ko2/Ko4,"(0, 0, 89, 1505)",198.104655,yolo_bounding_box,-6.202452,106.502927
4,736_0,736.jpg,lubang,"(423, 559, 561, 701)",28.982484,yolo_bounding_box,-6.202405,106.502723
