### Data Preparation and Splits for the Multimodal Object detection Experiment

In [1]:
import numpy as np
import pandas as pd
import shutil
from tqdm import tqdm
# path to the original KAIST dataset (undisturbed)
PATH = '/DATA1/chaitanya/KAIST/rgbt-ped-detection/data/kaist-rgbt'
# train-all-02 is the train split provided by KAIST, 02 refers to every second image
train_list = list(open(f'{PATH}/imageSets/train-all-02.txt'))
# to strip /n from all file names
train_list = [each.strip() for each in train_list]

In [2]:
len(train_list)

25086

In [3]:
ann = list(open('/DATA1/chaitanya/KAIST/rgbt-ped-detection/data/kaist-rgbt/annotations/set00/V004/I00751.txt'))
# ann_file_path = '/DATA1/chaitanya/KAIST/rgbt-ped-detection/data/kaist-rgbt/annotations/set03/V001/I01343.txt'
# ann_list = [each.split() for each in list(open(ann_file_path))]
# # ann_list
# if (len(ann_list) > 1) :
#             ann_list_np = np.array(ann_list[1:])
#             if 'person' in ann_list_np[:, 0]:
#                 print('yes')
# ann = list(open('/DATA1/chaitanya/KAIST/rgbt-ped-detection/data/kaist-rgbt/annotations/set00/V003/I00865.txt'))
ann

['% bbGt version=3\n',
 'person 402 214 20 43 0 0 0 0 0 0 0\n',
 'person 391 215 20 44 0 0 0 0 0 0 0\n']

In [4]:
import pdb
def get_person_annotations(annotations_path, images_list):
    """
    This function is used to get a subset of the images with atleast one person in the image
    annotations-path: Directory containing the annotations folder in format: set00/V000/I00001.txt
    images_list: List of the images with each entry of list in format: set00/V000/I00001
    
    returns: subset of the images_list which contain atleast one person annotation
    """
    # empty list to appned the subset of images_list which have atleast one person annotation
    images_with_person = []
    for each in images_list:
        ann_file_path = f'{annotations_path}/{each}.txt'
        # strip \n from the strings
        ann_list = [each_x.split() for each_x in list(open(ann_file_path))]
        # first line of txt file is '% bbGt version=3\n' so we used len(ann_list > 1)
        # first element in each sublist of ann_list consists the category, so we have 
        # taken the first column from entires list and checked if person is present 
        # in the annotation file or not
        if (len(ann_list) > 1) and ('person' in np.array(ann_list[1:])[:, 0]):
                images_with_person.append(each)
    
    return images_with_person

In [5]:
subset_person = get_person_annotations('/DATA1/chaitanya/KAIST/rgbt-ped-detection/data/kaist-rgbt/annotations', train_list)

In [6]:
len(subset_person)

9288

In [7]:
with open('person-only-kaist-train_02.txt', 'w') as f:
    for each in subset_person:
        f.write(f'{each}\n')

In [8]:
def get_person_only(annotations_path, subset_list):
    '''
    Extracts 'person' (only) annotations from the subset of images with atleast one person
    Input:  annotations-path: Directory containing the annotations folder in format: set00/V000/I00001.txt
            subset_list (output of get_person_annotations): subset of the images_list which contain atleast one person annotation
    
    returns: dataframe with file_path, category: 'person' and annotations
    '''
    # dict for holding the file paths and corresponding person annotatiosn from that
    id_to_ann = {}
    for each in subset_list:
        id_to_ann[each] = []
        # strip \n from the strings
        ann_file_path = f'{annotations_path}/{each}.txt'
        ann_list = [each_x.split() for each_x in list(open(ann_file_path))]
        for every_ann in ann_list:
            if (every_ann[0] == 'person' and int(every_ann[3])*int(every_ann[4]) > 50):
                id_to_ann[each].append(every_ann[1:5])
    return id_to_ann

In [9]:
dict_person_ann = get_person_only('/DATA1/chaitanya/KAIST/rgbt-ped-detection/data/kaist-rgbt/annotations', subset_person)

In [10]:
sum = 0
for each in dict_person_ann.values():
    sum += len(each)

In [11]:
sum

20733

In [17]:
sum = 0
for each in dict_person_ann:
    for each_ann in dict_person_ann[each]:
        if (int(each_ann[-1]) * int(each_ann[-2]) < 50): sum += 1

In [18]:
sum

0

In [9]:
string = ''
for each in ['192', '214', '20', '43']:
    
    string = string + ' ' + each 

In [23]:
def get_images_to_folder(src_path, images_list):
    for each in tqdm(images_list):
        each_split = each.split('/')
        src_lwir = f'{src_path}/images/{each_split[0]}/{each_split[1]}/lwir/{each_split[2]}.jpg'
        src_visible = f'{src_path}/images/{each_split[0]}/{each_split[1]}/visible/{each_split[2]}.jpg'
        fl_name = '_'.join(each_split)
        dest_lwir = f'./images/lwir/{fl_name}.jpg'
        dest_visble = f'./images/visible/{fl_name}.jpg'
        shutil.copyfile(src_lwir, dest_lwir)
        shutil.copyfile(src_visible, dest_visble)

In [24]:
get_images_to_folder('/DATA1/chaitanya/KAIST/rgbt-ped-detection/data/kaist-rgbt', list(dict_person_ann.keys()))

100%|██████████| 9288/9288 [00:05<00:00, 1603.64it/s]


In [32]:
def get_annotations_to_folder(dict_ann):
    
    for each in tqdm(dict_ann.keys()):
        fl_name = each.replace('/', '_')
        anns = [' '.join(each_ann) for each_ann in dict_ann[each]]
        with open(f'./annotations/{fl_name}.txt', 'w') as f:
            for item in anns:
                f.write("%s\n" % item)

In [33]:
get_annotations_to_folder(dict_person_ann)

100%|██████████| 9288/9288 [00:00<00:00, 12941.13it/s]
