In [7]:
import glob
import os
import pickle
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET

# Copying .xml and .jpg files to the same folder

In [5]:
src_dir   = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/WSODD USV_dataset'
dest_dir      = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/label_and_img/' 




In [6]:
img_src_dir = os.path.join(src_dir, 'image')

label_src_dir = os.path.join(src_dir, 'annotation')

In [7]:
imgs_paths    = glob.glob(os.path.join(img_src_dir, '*.jpg'))
len(imgs_paths)

7467

In [8]:
for img in imgs_paths:
    
    filename_no_ext = os.path.splitext(os.path.basename(img))[0]
    
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
        
    shutil.copy(img, os.path.join(dest_dir, filename_no_ext) + '.jpg')
    shutil.copy(os.path.join(label_src_dir, filename_no_ext + '.xml'),
                os.path.join(dest_dir, filename_no_ext+ '.xml'))

# Splitting in train, validation and test partitions.

In [11]:
src_dir = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/label_and_img/'

train_dest_dir  = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/label_and_img/train'

test_dest_dir   = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/label_and_img/val'

In [12]:
imgs_paths     = glob.glob(os.path.join(src_dir, '*.jpg'))
len(imgs_paths)

7467

In [13]:
df = pd.DataFrame({'imgs': imgs_paths})

In [14]:
train, test = train_test_split(df, test_size = 0.2, random_state = 101)

In [15]:
train.shape

(5973, 1)

In [16]:
test.shape

(1494, 1)

In [17]:
for img in train.imgs:
    filename_no_ext = os.path.splitext(os.path.basename(img))[0]
    dirname = os.path.dirname(img)
    if not os.path.exists(train_dest_dir):
        os.makedirs(train_dest_dir)
    shutil.move(img, os.path.join(train_dest_dir, filename_no_ext + '.jpg'))
    
    shutil.move(os.path.join(dirname, filename_no_ext + '.xml'),
                os.path.join(train_dest_dir, filename_no_ext + '.xml'))

In [18]:
for img in test.imgs:
    filename_no_ext = os.path.splitext(os.path.basename(img))[0]
    dirname = os.path.dirname(img)
    if not os.path.exists(test_dest_dir):
        os.makedirs(test_dest_dir)
    shutil.move(img, os.path.join(test_dest_dir, filename_no_ext + '.jpg'))
    
    shutil.move(os.path.join(dirname, filename_no_ext + '.xml'),
                os.path.join(test_dest_dir, filename_no_ext + '.xml'))

# Moving images to Image folder

In [19]:
train_src_dir   = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/label_and_img/train'
test_src_dir    = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/label_and_img/val'



train_dest_dir  = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/label_and_img/train/images'
test_dest_dir   = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/label_and_img/val/images'

In [20]:
train_imgs_paths = glob.glob(os.path.join(train_src_dir, '*.jpg'))
len(train_imgs_paths)

5973

In [21]:
test_imgs_paths = glob.glob(os.path.join(test_src_dir, '*.jpg'))
len(test_imgs_paths)

1494

In [22]:
train = pd.DataFrame({'imgs': train_imgs_paths})
train.shape

(5973, 1)

In [23]:
test = pd.DataFrame({'imgs': test_imgs_paths})
test.shape

(1494, 1)

In [24]:
for img in train.imgs:
    filename_no_ext = os.path.splitext(os.path.basename(img))[0]
    dirname = os.path.dirname(img)
    if not os.path.exists(train_dest_dir):
        os.makedirs(train_dest_dir)
    shutil.move(img, os.path.join(train_dest_dir, filename_no_ext + '.jpg'))


In [25]:
for img in test.imgs:
    filename_no_ext = os.path.splitext(os.path.basename(img))[0]
    dirname = os.path.dirname(img)
    if not os.path.exists(test_dest_dir):
        os.makedirs(test_dest_dir)
    shutil.move(img, os.path.join(test_dest_dir, filename_no_ext + '.jpg'))


# Convert PASCAL VOC to YOLO

In [8]:
def get_files_in_dir(dir_path):
    
    image_list = []
    
    for filename in glob.glob(dir_path + '/*.xml'):
        
        image_list.append(filename)
        
    return image_list

In [9]:
def convert(size, box):
    
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

In [10]:
def convert_annotation(dir_path, output_path, image_path):
    
    basename = os.path.basename(image_path)
    basename_no_ext = os.path.splitext(basename)[0]

    in_file = open(dir_path + '/' + basename_no_ext + '.xml')
    out_file = open(output_path + basename_no_ext + '.txt', 'w')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

In [11]:
dirs = ['train', 'val']
classes = ['bridge', 'ship', 'boat', 'ball', 'rubbish', 'rock', 'buoy', 'platform', 'harbor', 'mast',
           'tree', 'animal', 'grass', 'person']



In [12]:
cwd = os.getcwd()
print(cwd)

/home/rtiagos/pprojects/self-driving-boats/src/notebooks


In [13]:
input_dir = '/home/rtiagos/pprojects/self-driving-boats/src/dataset/label_and_img_tmp/'

In [14]:
for dir_path in dirs:
    full_dir_path = os.path.join(input_dir, dir_path) 
    output_path = full_dir_path + '/labels/'
    
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    pascal_voc_annotations = get_files_in_dir(full_dir_path)
#     list_file = open(full_dir_path + '.txt', 'w')
    
    for ann in pascal_voc_annotations:
#         list_file.write(image_path + '\n')
        convert_annotation(full_dir_path, output_path, ann)
        
#     list_file.close()

    print("Finished processing: " + dir_path)

Finished processing: train
Finished processing: val


# Updating Missed Label