In [1]:
import os
import scipy.io as scio
import shutil
import numpy as np
from PIL import Image

In [2]:
# HAT database
DATABASE_FOLDER = '..\\data\\hatdb'
IMAGES_FOLDER = os.path.join(DATABASE_FOLDER, 'images')
ANNOTATION_FILE = os.path.join(DATABASE_FOLDER, 'anno.mat')

In [3]:
SSD_DATA = '..\\data\\ssd_data'
TRAIN_IMAGES_FOLDER = os.path.join(SSD_DATA, 'images')
TRAIN_XML_FOLDER = os.path.join(SSD_DATA, 'xml')
TEST_IMAGES_FOLDER = os.path.join(SSD_DATA, 'test_images')
TEST_XML_FOLDER = os.path.join(SSD_DATA, 'test_xml')
TRAIN_TXT = os.path.join(SSD_DATA, 'train.txt')
TEST_TXT = os.path.join(SSD_DATA, 'test.txt')

In [4]:
# Creating selection folders with data and its marking
if not os.path.exists(SSD_DATA):
    os.makedirs(SSD_DATA)
if not os.path.exists(TRAIN_IMAGES_FOLDER):
    os.makedirs(TRAIN_IMAGES_FOLDER)
if not os.path.exists(TRAIN_XML_FOLDER):
    os.makedirs(TRAIN_XML_FOLDER)
if not os.path.exists(TEST_IMAGES_FOLDER):
    os.makedirs(TEST_IMAGES_FOLDER)
if not os.path.exists(TEST_XML_FOLDER):
    os.makedirs(TEST_XML_FOLDER)

In [5]:
annotation_file = scio.loadmat(ANNOTATION_FILE)

In [6]:
# list of images from db (each element is an array of 1 element)
img_files = annotation_file['anno']['files'][0][0][0].tolist()

In [7]:
# list of bboxes for each image from db (each element is a list, first 4 coords are important)
bboxes = annotation_file['anno']['objbbs'][0][0].tolist()

In [8]:
# list of gender labels
# 1 - F, -1 - M, 0 - ?
sexes = annotation_file['anno']['y'][0][0][0].tolist()

In [9]:
def get_gender(label):
    if (label == 1):
        return 'f'
    if (label == -1):
        return 'm'
    if (label == 0):
        return 'u'
    return 'u'

In [10]:
# dict of images, bboxes and labels
ssd_dict = dict()

In [11]:
# fill ssd dict
for i in range(len(img_files)):
    gend = get_gender(sexes[i])
    if gend != 'u':
        pict = img_files[i][0]
        bbox = bboxes[i][:4]
        bbox[0] += 1
        bbox[1] += 1
        bbox[2] -= 1
        bbox[3] -= 1
        ssd_content = [bbox, gend]
        if not (pict in ssd_dict.keys()):
            ssd_dict[pict] = [ssd_content]
        else:
            ssd_dict[pict].append(ssd_content)

In [12]:
# some important sections for xml marking
DB_NAME = 'UAD'
IMG_TYPE = 'gender'
CHANNELS = 3

In [13]:
# create the conthent of xml elements with bboxes and classes
def xml_objects(objs):
    res = ""
    for el in objs:
        res += '  <object>\n' + \
        '    <name>{}</name>\n'.format(el[1]) + \
        '    <bndbox>\n' + \
        '      <xmin>{}</xmin>\n'.format(el[0][0]) + \
        '      <ymin>{}</ymin>\n'.format(el[0][1]) + \
        '      <xmax>{}</xmax>\n'.format(el[0][2]) + \
        '      <ymax>{}</ymax>\n'.format(el[0][3]) + \
        '    </bndbox>\n' + \
        '  </object>\n'
    return res

In [14]:
# create the xml content for image
def xml_content(folder_name, file_name, db_name, selection_type, img_type, width, height, channels, voc_el):
    xml_objs = xml_objects(voc_el)
    if (xml_objs != ''):
        return  '<annotation>\n' + \
                '  <folder>{}</folder>\n'.format(folder_name) + \
                '  <filename>{}</filename>\n'.format(file_name) + \
                '  <source>\n' + \
                '    <database>{}</database>\n'.format(db_name) + \
                '    <annotation>{}</annotation>\n'.format(selection_type) + \
                '    <image>{}</image>\n'.format(img_type) + \
                '  </source>\n' + \
                '  <size>\n' + \
                '    <width>{}</width>\n'.format(width) + \
                '    <height>{}</height>\n'.format(height) + \
                '    <depth>{}</depth>\n'.format(channels) + \
                '  </size>\n' + xml_objs + '</annotation>'

In [15]:
def make_xml(xml_folder_name, xml_file, img_folder_name, img_file, db_name, annotation_type, img_type, width, height, channels, voc_el):
    with open(os.path.join(xml_folder_name, xml_file), 'w') as output_file:
        str_data = xml_content(img_folder_name, img_file, db_name, annotation_type, img_type, width, height, channels, voc_el)
        output_file.write(str_data)

In [16]:
# shuffle dict of ssd data
def shuffle_dict(ssd_data):
    res = dict()
    keys_list = list(ssd_data.keys())
    np.random.seed(42)
    np.random.shuffle(keys_list)
    for el in keys_list:
        res[el] = ssd_data[el]
    return res

In [17]:
ssd_dict = shuffle_dict(ssd_dict)

In [18]:
ssd_dict

{'q102_r4.jpg': [[[354, 35, 392, 143], 'm']],
 'q102_r15.jpg': [[[17, 4, 215, 305], 'f'], [[140, 7, 359, 275], 'f']],
 'q206_r30.jpg': [[[148, 9, 226, 95], 'm'],
  [[105, 7, 202, 108], 'm'],
  [[150, 13, 241, 152], 'm']],
 'q289_r32.jpg': [[[13, 15, 322, 464], 'f']],
 'q203_r7.jpg': [[[147, 91, 336, 331], 'f'], [[120, 84, 398, 332], 'f']],
 'q119_r16.jpg': [[[139, 69, 220, 272], 'm']],
 'q85_r27.jpg': [[[77, 58, 499, 464], 'm']],
 'q243_r36.jpg': [[[235, 134, 437, 374], 'f'], [[178, 129, 494, 374], 'f']],
 'q282_r17.jpg': [[[121, 89, 196, 184], 'm'],
  [[239, 87, 389, 286], 'm'],
  [[269, 78, 461, 332], 'm']],
 'q219_r19.jpg': [[[251, 231, 307, 376], 'f']],
 'q17_r27.jpg': [[[120, 9, 361, 281], 'f']],
 'q26_r45.jpg': [[[168, 139, 308, 499], 'm'], [[8, 127, 200, 392], 'f']],
 'q38_r59.jpg': [[[138, 93, 309, 318], 'f']],
 'q273_r25.jpg': [[[131, 71, 351, 499], 'm'], [[11, 148, 171, 386], 'm']],
 'q164_r37.jpg': [[[2, 105, 254, 374], 'm'], [[259, 77, 499, 371], 'm']],
 'q85_r46.jpg': [[[4

In [19]:
# ratio for extracting train and test data for ssd dict
ratio = 0.75

In [20]:
def train_test_split(ssd_data):
    train_data = dict(list(ssd_data.items())[:int(ratio*len(ssd_data))])
    test_data = dict(list(ssd_data.items())[int(ratio*len(ssd_data)):])
    return train_data, test_data

In [21]:
train_data, test_data = train_test_split(ssd_dict)

In [22]:
# create selection
# param 'selection_type' must be 'train' or 'test'
def make_selection(data, selection_type):
    selection_dict = dict() # data for txt,  which will be used by Caffe for creating lmdb
    # some variables for data, depending on type of selection 
    if (selection_type == 'test'):
        images_folder_name = TEST_IMAGES_FOLDER.split('\\')[-1]
        xml_folder_name = TEST_XML_FOLDER.split('\\')[-1]
        dest_folders = [TEST_XML_FOLDER, TEST_IMAGES_FOLDER]
        txt_file = TEST_TXT
    else:
        images_folder_name = TRAIN_IMAGES_FOLDER.split('\\')[-1]
        xml_folder_name = TRAIN_XML_FOLDER.split('\\')[-1]
        dest_folders = [TRAIN_XML_FOLDER, TRAIN_IMAGES_FOLDER]
        txt_file = TRAIN_TXT
    for img_file in data:
        voc_el = data[img_file]
        img = Image.open(os.path.join(IMAGES_FOLDER, img_file))
        width, height = img.size
        # xml marking
        xml_file = img_file.split('.')[0] + '.xml'
        make_xml(dest_folders[0], xml_file, images_folder_name, img_file, DB_NAME, 
                 selection_type, IMG_TYPE, width, height, CHANNELS, voc_el)
        # image
        shutil.copyfile(os.path.join(IMAGES_FOLDER, img_file), os.path.join(dest_folders[1], img_file))
        selection_dict[images_folder_name + '/' + img_file] = xml_folder_name + '/' + xml_file
    # txt for lmdb
    with open(txt_file, 'w+') as output_file:
        for k in selection_dict:
            output_file.write(k + ' ' + selection_dict[k] + '\n')

In [23]:
make_selection(train_data, 'train')

In [24]:
make_selection(test_data, 'test')