In [137]:
import glob
import os
import pickle
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET

In [171]:
dirs = ['train', 'val']
classes = ['bridge', 'ship', 'boat', 'ball', 'rubish', 'rock', 'buoy', 'platform', 'habor', 'mast',
           'tree', 'animal', 'grass', 'person']



In [139]:
def get_images_in_dir(dir_path):
    
    image_list = []
    
    for filename in glob.glob(dir_path + '/*.jpg'):
        
        image_list.append(filename)
        
    return image_list

In [140]:
def convert(size, box):
    
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

In [141]:
def convert_annotation(dir_path, output_path, image_path):
    
    basename = os.path.basename(image_path)
    basename_no_ext = os.path.splitext(basename)[0]

    in_file = open(dir_path + '/' + basename_no_ext + '.xml')
    out_file = open(output_path + basename_no_ext + '.txt', 'w')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

In [142]:
cwd = os.getcwd()
print(cwd)

/home/rtiagosa/pprojects/self-driving-boats


In [143]:
input_dir = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img'

In [172]:
for dir_path in dirs:
    full_dir_path = os.path.join(input_dir, dir_path) 
    output_path = full_dir_path + '/labels/'
    
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    image_paths = get_images_in_dir(full_dir_path)
    list_file = open(full_dir_path + '.txt', 'w')
    
    for image_path in image_paths:
        list_file.write(image_path + '\n')
        convert_annotation(full_dir_path, output_path, image_path)
        
    list_file.close()

    print("Finished processing: " + dir_path)

Finished processing: train
Finished processing: val


# Copying .xml and .jpg files to the same folder

In [159]:
src_img_dir   = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/image'
src_label_dir = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/annotation/'

dest_dir      = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/' 

In [160]:
img_list    = glob.glob(os.path.join(src_img_dir, '*.jpg'))
label_list  = glob.glob(os.path.join(src_label_dir, '*.xml'))

In [161]:
for src_label, src_img in zip(label_list, img_list):
    basename = os.path.splitext(os.path.basename(src_label))[0]
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    shutil.copy(src_label, os.path.join(dest_dir, basename) + '.xml')
    shutil.copy(src_img, os.path.join(dest_dir, basename) + '.jpg')

# Splitting in train, validation and test partitions.

In [168]:
src_dir = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/'

train_src_dir   = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/train'
test_src_dir    = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/test'



train_dest_dir  = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/train'
test_dest_dir   = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/test'

In [162]:
src_img_list       = glob.glob(os.path.join(src_dir, '*.jpg'))

In [147]:
# train_img_list        = glob.glob(os.path.join(train_src_dir, '*.jpg'))

In [154]:
# test_img_list        = glob.glob(os.path.join(test_src_dir, '*.jpg'))

In [164]:
df = pd.DataFrame({'imgs': src_img_list})

In [165]:
train, test = train_test_split(df, test_size = 0.2)

In [166]:
train.shape

(5972, 1)

In [167]:
test.shape

(1494, 1)

In [169]:
for img_path in train.imgs:
    basename = os.path.splitext(os.path.basename(img_path))[0]
    dirname = os.path.dirname(img_path)
    if not os.path.exists(train_dest_dir):
        os.makedirs(train_dest_dir)
    shutil.move(img_path, os.path.join(train_dest_dir, basename) + '.jpg')
    shutil.move(os.path.join(dirname, basename + '.xml'),
                os.path.join(train_dest_dir, basename + '.xml')
               )

In [170]:
for img_path in test.imgs:
    basename = os.path.splitext(os.path.basename(img_path))[0]
    dirname = os.path.dirname(img_path)
    if not os.path.exists(test_dest_dir):
        os.makedirs(test_dest_dir)
    shutil.move(img_path, os.path.join(test_dest_dir, basename) + '.jpg')
    shutil.move(os.path.join(dirname, basename + '.xml'),
                os.path.join(test_dest_dir, basename + '.xml')
               )

# Moving images to Image folder

In [183]:
train_src_dir   = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/train'
test_src_dir    = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/val'



train_dest_dir  = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/train/images'
test_dest_dir   = '/home/rtiagosa/pprojects/self-driving-boats/WSODD_dataset/label_and_img/test/images'

In [177]:
train_img_list        = glob.glob(os.path.join(train_src_dir, '*.jpg'))

In [184]:
test_img_list        = glob.glob(os.path.join(test_src_dir, '*.jpg'))

In [185]:
train = pd.DataFrame({'imgs': train_img_list})

In [186]:
test = pd.DataFrame({'imgs': test_img_list})

In [181]:
for img_path in train.imgs:
    basename = os.path.splitext(os.path.basename(img_path))[0]
    dirname = os.path.dirname(img_path)
    if not os.path.exists(train_dest_dir):
        os.makedirs(train_dest_dir)
    shutil.move(img_path, os.path.join(train_dest_dir, basename) + '.jpg')

In [187]:
for img_path in test.imgs:
    basename = os.path.splitext(os.path.basename(img_path))[0]
    dirname = os.path.dirname(img_path)
    if not os.path.exists(test_dest_dir):
        os.makedirs(test_dest_dir)
    shutil.move(img_path, os.path.join(test_dest_dir, basename) + '.jpg')