# Add Box Label on image by using YOLOV5 Format 

#### Convert Yolov5 Format to (x1, y1) (x2, y2) 

#### Convert Yolov5 polygon Format to (x1, y1) (x2, y2)


In [29]:
from glob import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt

def convert_y2xy(l, im_w, im_h):
    print(np.array(l.split(' '), np.float32))
    c, xc, yc, w,h = np.array(l.split(' '), np.float32)
    c, x1,y1, x2,y2  = np.array([c, (xc-(w/2))*im_w, (yc-(h/2))*im_h, (xc+(w/2))*im_w, (yc+(h/2))*im_h], np.int32)
    return c, (x1,y1), (x2,y2)

def conv_poloygn_to_box(l, im_w,im_h):
    l = np.array(l.split(' '), np.float32)
    c, xy = l[0], l[1:]
    xs = np.array([xy[i] for i in range(0,len(xy), 2)])
    ys = np.array([xy[i] for i in range(1,len(xy), 2)])

    xmin,ymin = xs.min(), ys.min()
    xmax,ymax = xs.max(), ys.max()

    c, x1,y1, x2,y2  = np.array([c, xmin*im_w,ymin*im_h , xmax*im_w,ymax*im_h], np.int32)
    return c, (x1,y1), (x2,y2)
    

def show_img_yolov5_format(img_name='name_of_img.jpg', path_folder='dataset/test'):
    img_path = path_folder+'/images/'+img_name
    label_path = path_folder+'/labels/'+img_name[:-4]+'.txt'
    img = plt.imread(img_path)
    im_h, im_w = img.shape[:-1]
    with open(label_path, 'r') as f :
        labels = f.readlines()
        f.close()
    for l in labels:
        c, pt1, pt2 = conv_poloygn_to_box(l, im_h, im_w)
        print(c, pt1, pt2)
        cv2.rectangle(img, pt1, pt2, (255,0,0), 5)
    plt.figure(figsize=(10,5))
    plt.imshow(img)
    plt.show()

    return img

# Count number of Instance of Labels

In [1]:
import numpy as np
from glob import glob

def count_label_instances(data_path = 'dataset_path', project='train', all = True, plot=True):
    if all == True:
        l_path = glob(data_path + '/*/labels/*.txt')
    else:
        l_path = glob(data_path + f'/{project}/labels/*.txt')
    l_index = {}
    for l in l_path:
        with open(l, 'r') as f:
            lines = f.readlines()
            f.close()
        for line in lines:
            obj_no = int(line.split(' ')[0])

            try:
                l_index[obj_no] += 1
            except:
                l_index[obj_no] = 1
    
    keys = list(l_index.keys())
    values = list(l_index.values())
    sorted_value_index = np.argsort(values)
    sorted_dict = {keys[i]: values[i] for i in sorted_value_index[::-1]}

    if plot:
        plt.figure(figsize = (10,10))
        plt.bar(sorted_dict.keys(), sorted_dict.values())

    else:
        return sorted_dict
                

# Removing unnecessary label from YOLO format

In [None]:
import os
from glob import glob

def remove_unlisted_label(selected_labels, data_path = 'dataset_path', project='train', all = True, add=0):

    if all == True:
        labels = glob(data_path+'/*/labels/*')
        imgs = glob(data_path+'/*/images/*')
    else:
        labels = glob(data_path + f'/{project}/labels/*.txt')
        imgs = glob(data_path+ f'/{project}/images/*')
    
    count_rm = 0
    count_new = 0

    for i, img in enumerate(imgs):
        path_split = img.split('/')
        
        print('We in Image no. : {:<6} $$$ Name : {:<100} $$$'.format(i, path_split[-1]),  end= ' & ')
        img_id = '.'.join(path_split[-1].split('.')[:-1])
        label = '/'.join(path_split[:-2] + ['labels', img_id + '.txt']) 
        lines, new_lines = [], []

        try:
            with open(label, 'r') as f:
                lines = f.readlines()
                f.close()

            for line in lines:
                spilt_line = line.split(' ')

                obj_no = int(spilt_line[0])
                if obj_no in selected_labels:
                    spilt_line[0] = str(selected_labels.index(obj_no) + add)
                    new_lines.append(' '.join(spilt_line))
        
            if len(new_lines) != 0:
                with open(label, 'w') as f:
                    f.writelines(new_lines)
                    f.close()
                count_new +=1
                print(' Done With this :)')
            else:

                os.remove(label)
                os.remove(img)
                count_rm +=1
                print(' The Label file is empty, we deleted it  :( ')

        except FileNotFoundError:
            count_rm +=1
            os.remove(img)
            print('removd this image, it\'s not have label ')
    
    print('no. img that removed: ', count_rm, '\nno. img that added', count_new)


# Creating daya.yaml file from labels' name list

In [None]:
def create_data_yaml_file(data_folder='Dataset_folder', out_file_ph ='data.yaml', seleced_names = ['l1, l2']):
    data_yaml = f"""# Mohamed Mosa
    path: ../{data_folder}  # dataset root dir
    train: train/images  # train images 
    val: valid/images  # val images 
    test: test/images # test images (optional)
    
    names:"""
    for i, label in enumerate(seleced_names):
        data_yaml = data_yaml + f'\n  {i}: {label}'
    with open(out_file_ph,'w') as f:
        f.write(data_yaml)
        f.close()

# Split Train, Valid data with YOLO Format

In [None]:
from glob import glob
import shutil
import random
import os

parent_path = '~/dataset'

os.makedirs(os.path.join(parent_path, 'train', 'images'))
os.makedirs(os.path.join(parent_path, 'train', 'labels'))
os.makedirs(os.path.join(parent_path, 'valid', 'images'))
os.makedirs(os.path.join(parent_path, 'valid', 'labels'))


imgs_path = '~/dataset/obj_train_data'
all_images = glob(os.path.join(imgs_path, '*.jpg'))

random.Random(22).shuffle(all_images)

for img in all_images[:int(len(all_images)*0.8)]:
    label = img[:-3] + 'txt'
    
    shutil.copy(label, os.path.join(parent_path, 'train', 'labels'))
    shutil.copy(img, os.path.join(parent_path, 'train', 'images'))
for img in all_images[int(len(all_images)*0.8):]:
    label = img[:-3] + 'txt'
    
    shutil.copy(label, os.path.join(parent_path, 'valid', 'labels'))
    shutil.copy(img, os.path.join(parent_path, 'valid', 'images'))

!ls dataset/train/images/* | wc -l 
!echo 'train img'
!ls dataset/train/labels/* | wc -l 
!echo 'train label'
!ls dataset/valid/images/* | wc -l 
!echo 'valid img'
!ls dataset/valid/labels/* | wc -l 
!echo 'valid label'

# Plot image

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2

def show_labels(img_path, edit_label = 2):
    labels_df = pd.read_csv('/content/dataset/train.csv')
    img_name = img_path.split('/')[-1]
    img_label = labels_df[labels_df['image_path'] == img_name][['xmax','xmin','ymax', 'ymin']]
    img = cv2.imread(img_path)
    for i in range(len(img_label)):
        cv2.rectangle(img, (abs(int(img_label['xmin'].iloc[i]* edit_label)), abs(int(img_label['ymin'].iloc[i]* edit_label))), 
                      (abs(int(img_label['xmax'].iloc[i]* edit_label)), abs(int(img_label['ymax'].iloc[i]* edit_label))),
                      (random.randint(0,255),random.randint(0,255), random.randint(150,255) ), 5)

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(15,10))
    plt.imshow(img)
    plt.show()
        



# Change Label Index

In [None]:
def change_label_index(index_label, path= '/content/test_data/FADED2/obj_train_data/*.txt'):
    labels = glob(path)
    for label in labels:
        new_lines = []
        with open(label, 'r') as f:
            lines = f.readlines() 
            f.close()
        for line in lines:

            l_split = line.split(' ')
            l_split[0] = str(index_label)

            line = ' '.join(l_split)
            new_lines.append(line)
        with open(label, 'w') as writer:
            for text in new_lines:
                writer.write(text)
        
        print(label)

# Marge Datasets And update labels

In [None]:
from glob import glob 
import yaml
from yaml.loader import SafeLoader
import shutil


yaml_files = glob('/content/dataset*/*.yaml')
yaml_files

true_labels = { 0: ['plaque', 'dental palque'], 
 1: ['gum_inflammation'], # bbox 
 2: ['misaligned'], # bbox 
 3: ['Cavities', 'decaycavity'],
 4: ['earlydecay'],
 5: ['gingevitis', 'Gingivitis'],
 6: ['gum_swelling', 'gum-swelling'],
 7: ['Abnormal']} # bbox 

def merge_dataset(datasets='the path to datasets', output_path='the path to output dataset', cls_output={0:'class one' }):

    count_rm = 0
    count_new = 0

    for data in datasets:
        with open(data) as yaml_f:
            d = yaml.load(yaml_f, Loader=SafeLoader)
            local_classes = d['names']
        data_path = '/'.join(data.split('/')[:-1])
        for mode in ['train', 'valid', 'test']:
            imgs_path = data_path + '/' + mode + '/images' 
            out_imgs_path = output_path + '/' + mode + '/images' 
            imgs = glob(imgs_path + '/*')

            for i, img in enumerate(imgs):
                path_split = img.split('/')
                
                print('We in Image no. : {:<6} $$$ Name : {:<100} $$$'.format(i, path_split[-1]),  end= ' & ')
                img_id = '.'.join(path_split[-1].split('.')[:-1])
                label = '/'.join(path_split[:-2] + ['labels', img_id + '.txt']) 
                out_label = '/'.join([output_path, mode, 'labels', img_id + '.txt']) 
                lines, new_lines = [], []

                try:
                    with open(label, 'r') as l_f:
                        lines = l_f.readlines()
                        l_f.close()

                    for line in lines:
                        spilt_line = line.split(' ')

                        obj_no = int(spilt_line[0])
                        cls_name = local_classes[obj_no]
                        for k,v in cls_output.items():
                            if cls_name in v:
                                spilt_line[0] = str(k)
                                new_lines.append(' '.join(spilt_line))
                
                    if len(new_lines) != 0:
                        with open(out_label, 'w') as f:
                            f.writelines(new_lines)
                            f.close()
                        shutil.copy(img, out_imgs_path)
                        count_new +=1

                        print(' Done With this :)')
                    else:

                        # os.remove(label)
                        # os.remove(img)
                        count_rm +=1
                        print(' The Label file is empty, we don\'t copy it  :( ')

                except FileNotFoundError and IndexError:
                    count_rm +=1
                    # os.remove(img)
                    print('don\'t copy this image, it\'s not have label ')
            
            print('no. img that we don\'t copy it: ', count_rm, '\nno. img that added', count_new)
            
merge_dataset(datasets=yaml_files, output_path='/content/dataset', cls_output=true_labels)