In [24]:
import json
import pandas as pd  
import numpy as np
import glob

In [25]:
# Dataframe containing the ID of the images in train01
images_in_train01 = glob.glob("Datasets\\CrowdHuman\\CrowdHuman_train01\\Images\\*")
images_in_train01 = [i.replace('Datasets\\CrowdHuman\\CrowdHuman_train01\\Images\\', '') for i in images_in_train01]
images_in_train01 = pd.DataFrame({'ID2': images_in_train01})

In [26]:
# Open odgt files
with open('Datasets/CrowdHuman/annotation_train.odgt') as f:
    lines_train = f.readlines()
with open('Datasets/CrowdHuman/annotation_val.odgt') as f:
    lines_val = f.readlines()

In [27]:
def odgt_to_dataframe(line, images_path):
    '''
    This function extracts for every line of the odgt file the ID of the image, the the tags ('person' or 'mask'), and the full bounding box related to each of the tags.
    
    Input: a line from the odgt file
    Output: a dataframe  
    '''
    df = pd.json_normalize(json.loads(line)['gtboxes'])
    df['ID'] = '$'+images_path+json.loads(line)['ID']+'.jpg'
    df['ID2'] = json.loads(line)['ID']+'.jpg'
    # df1 = pd.DataFrame(df['fbox'].to_list(), columns = ['x1', 'y1', 'x2', 'y2'])
    df1 = pd.DataFrame(df['fbox'].to_list(), columns = ['x1', 'y1', 'w', 'h'])
    df1['x2'] = df1['x1'] + df1['w'] 
    df1['y2'] = df1['y1'] + df1['h']
    df1 = df1.drop(columns=['w','h']) 
    df1['tag'] = df['tag']
    df1['ID'] = df['ID']
    df1['ID2'] = df['ID2']
    return df1


In [28]:
# Apply odgt_to_dataframe to each line and save all results in a dataset 

# Training set
train_path = 'Datasets/CrowdHuman/CrowdHuman_train01/Images'
train0 = pd.concat([odgt_to_dataframe(line = l, images_path = train_path) for l in lines_train])
train = pd.merge(train0,images_in_train01,on='ID2') #keep only the information from images in train01
train = train.drop(columns=['ID2'])
first_column = train.pop('ID')
train.insert(0, 'ID', first_column)
print(train.shape)

# Validation set
val_path = 'Datasets/CrowdHuman/CrowdHuman_val/Images'
val = pd.concat([odgt_to_dataframe(line = l, images_path = val_path) for l in lines_val])
val = val.drop(columns=['ID2'])
first_column = val.pop('ID')
val.insert(0, 'ID', first_column)
print(val.shape)

(0, 6)
(127716, 6)


In [29]:
# Dataframe with the classes (person and mask)
classes = np.unique(train['tag'])
classes = pd.DataFrame({'a':classes, 'b': range(len(classes))})

In [30]:
# save in txt
train.to_csv('Datasets/CrowdHuman/train.txt', header=None, index=None, sep=' ')
val.to_csv('Datasets/CrowdHuman/val.txt', header=None, index=None, sep=' ')
classes.to_csv('Datasets/CrowdHuman/classes.txt', header=None, index=None, sep=' ')