In [1]:
import pandas as pd
import numpy as np
import ast
import os
import cv2
from os import getcwd

## Setup

In [2]:
# File with object annotations
da = pd.read_csv('DaycaresAnnotations_original.csv')[['filename', 'region_shape_attributes']].drop_duplicates().reset_index()

# Filepaths to folders
filepath_to_daycares = 'Daycares/'
filepath_to_zca_daycares = 'zca/'
filepath_to_nondaycares = 'NonDaycares/'
filepath_to_labels = 'labels/'

# Name of file that will have annotations
save_to_file = 'annotate.txt'
image_path_file = 'all.txt'
cwd = getcwd()

In [3]:
zca_files = [x for x in filter(lambda x: x.endswith('jpg'), os.listdir(filepath_to_zca_daycares))]
print('Total of ZCA files - ', len(zca_files))
zca_to_org_map = [(file, file.replace('_ZCA', '')) for file in zca_files]
zca_org_df = pd.DataFrame(np.array(zca_to_org_map), columns=['zca', 'filename'])
zca_annot_map = pd.merge(zca_org_df, da, on='filename', how='inner').drop_duplicates()
# zca_annot_map[zca_annot_map.filename.isin(zca_annot_map[zca_annot_map.duplicated(['filename'])]['filename'].values)]
assert len(zca_annot_map) == len(zca_files)
zca_annot_map.head(5)

Total of ZCA files -  250


Unnamed: 0,zca,filename,index,region_shape_attributes
0,17610_20140903_ZCA.jpg,17610_20140903.jpg,67,"{""name"":""rect"",""x"":609,""y"":634,""width"":79,""hei..."
1,17610_20180706_ZCA.jpg,17610_20180706.jpg,79,"{""name"":""rect"",""x"":609,""y"":634,""width"":79,""hei..."
2,1713_20131114_ZCA.jpg,1713_20131114.jpg,1247,"{""name"":""rect"",""x"":818,""y"":470,""width"":61,""hei..."
3,19472_20171002_ZCA.jpg,19472_20171002.jpg,2525,"{""name"":""rect"",""x"":873,""y"":558,""width"":76,""hei..."
4,1765_20160308_ZCA.jpg,1765_20160308.jpg,1389,"{""name"":""rect"",""x"":778,""y"":190,""width"":81,""hei..."


## Parse

In [5]:
# Annotated output
output_annot = []
image_paths = []
###########################################################################
# DAYCARES
for r in range(len(da)):
    # Check if it has an object
    if da.loc[r, 'region_shape_attributes'] != "{}":
        # filename
        filepath = filepath_to_daycares + da.loc[r, 'filename']
        imagepath = cwd +'/'+ filepath
        filepath_label = filepath_to_daycares + da.loc[r, 'filename'] # both jpg and text need to be in same dir
#         filepath_label = filepath_to_labels + da.loc[r, 'filename']
        filepath_label = filepath_label.replace('.jpg', '.txt')
        img = cv2.imread(filepath)
        if not (img is None):
            # Get rect attributes
            rect = ast.literal_eval(da.loc[r, 'region_shape_attributes'])

            height, width, channels = img.shape
            rel_x = (rect['x'] + (rect['width'] / 2) )/ width
            rel_y = (rect['y'] + (rect['width'] / 2) )/ height
            rel_width = rect['width'] / width
            rel_height = rect['height'] / height



            # class_name ("0" = Daycare) as we have only 1 class

            current_annot = '{} {} {} {} {}'.format(0, rel_x, rel_y, rel_width, rel_height)
            image_paths.append(imagepath)
            output_annot.append(current_annot)
            # print(filepath_label)
            with open(filepath_label, 'w') as f:
                f.write("%s\n" % current_annot)
    
        else:
            print(filepath, ' not found')



In [8]:
# Annotated output
output_annot = []
image_paths = []
###########################################################################
# ZCA _ DAYCARES
for r in range(len(zca_annot_map)):
    # Check if it has an object
    if zca_annot_map.loc[r, 'region_shape_attributes'] != "{}":
        # filename
        filepath = filepath_to_zca_daycares + zca_annot_map.loc[r, 'zca']
        imagepath = cwd +'/'+ filepath
        filepath_label = filepath_to_zca_daycares + zca_annot_map.loc[r, 'zca'] # both jpg and text need to be in same dir
#         filepath_label = filepath_to_labels + da.loc[r, 'filename']
        filepath_label = filepath_label.replace('.jpg', '.txt')
        img = cv2.imread(filepath)
        if not (img is None):
            # Get rect attributes
            rect = ast.literal_eval(zca_annot_map.loc[r, 'region_shape_attributes'])

            height, width, channels = img.shape
            rel_x = (rect['x'] + (rect['width'] / 2) )/ width
            rel_y = (rect['y'] + (rect['width'] / 2) )/ height
            rel_width = rect['width'] / width
            rel_height = rect['height'] / height



            # class_name ("0" = Daycare) as we have only 1 class

            current_annot = '{} {} {} {} {}'.format(0, rel_x, rel_y, rel_width, rel_height)
            image_paths.append(imagepath)
            output_annot.append(current_annot)
            # print(filepath_label)
            with open(filepath_label, 'w') as f:
                f.write("%s\n" % current_annot)
    
        else:
            print(filepath, ' not found')



In [None]:
###########################################################################
# Non DAYCARES
nod = os.listdir(filepath_to_nondaycares)
for nd in nod:
    if nd.endswith('.jpg'):
        # filename
        current_annot = filepath_to_nondaycares + nd + ","
        
        # x-min (Does not matter for NonDaycare)
        current_annot += str(0) + ","
        # x-max (Does not matter for NonDaycare)
        current_annot += str(0) + ","
        # y-min (Does not matter for NonDaycare)
        current_annot += str(0) + ","
        # y-max (Does not matter for NonDaycare)
        current_annot += str(0) + ","
        
        # class_name ("1" = Daycare, "0" = NonDaycare)
        current_annot += "0"
        
        output_annot.append(current_annot)


In [None]:
# Remove duplicates
output_annot = list(set(output_annot))
image_paths = list(set(image_paths))

## Parse smaller dataset filenames

## Check output

In [None]:
len(output_annot)

In [None]:
output_annot

In [None]:
print(len(image_paths))
image_paths

## Save to file

In [None]:
# Save to FILE
with open(save_to_file, 'w') as f:
    for item in output_annot:
        f.write("%s\n" % item)

In [None]:
cwd = os.getcwd()
# image_paths = []
def add_full_path(filename):
    image_path = cwd +'/'+ filepath_to_daycares + filename
    exists = (os.path.isfile(image_path)) and (os.path.isfile(image_path.replace('.jpg', '.txt')))
    if exists:
        return image_path

train = pd.read_csv('train.csv', header=None)[0].values
test = pd.read_csv('test.csv', header=None)[0].values
# print(test)
train = [add_full_path(x) for x in train if add_full_path(x) is not None]
test = [add_full_path(x) for x in test if add_full_path(x) is not None]

In [None]:
# Save image paths to FILE
with open('train.txt', 'w') as f:
    for item in train:
        f.write("%s\n" % item)

with open('test.txt', 'w') as f:
    for item in test:
        f.write("%s\n" % item)

with open('all.txt', 'w') as f:
    for item in image_paths:
        f.write("%s\n" % item)