In [1]:
import pandas as pd 
import os 

In [3]:
train_data = pd.read_csv("./data/dataset/train_data_processed.csv", index_col=0)
train_data.head() 

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,z2175422435650_c0e6ea5ea65fe03388d0de2c0000a3f...,416,416,dress,102,44,293,414
1,orange_pants--30-_jpg.rf.0d5fc77400a8306a3dfca...,195,259,bottom,53,130,141,234
2,73ef771d5ae7abb9f2f6_jpg.rf.d04a99711fd320d0bb...,416,416,dress,22,88,256,323
3,Pantalones-1740-_jpg.rf.58b49ab60b2df716ab9e0a...,224,224,bottom,6,0,217,222
4,47ccaaad0fe7feb9a7f6_jpg.rf.5bf20dc1bb14ca1e39...,416,416,dress,91,49,415,415


In [4]:
val_data = pd.read_csv("./data/dataset/val_data_processed.csv", index_col=0) 
val_data.shape

(900, 8)

In [11]:
train_data = pd.concat([train_data, val_data]).reset_index(drop=True)

In [12]:
test_data = pd.read_csv("./data/dataset/raw_data_pinterest/test/_annotations.csv") 

In [34]:
test_data.shape 

(7, 8)

In [7]:
classes = {value:key for (key,value) in train_data['class'].value_counts().index.to_series().reset_index(drop=True).to_dict().items()}
classes

{'bottom': 0, 'top': 1, 'dress': 2}

In [8]:
def parse_csv_to_pascal(df: pd.DataFrame): 
    data = {} 

    for i in range(len(df.index)):
        xmin = int(df.iloc[i]['xmin'])
        ymin = int(df.iloc[i]['ymin'])
        xmax = int(df.iloc[i]['xmax'])
        ymax = int(df.iloc[i]['ymax']) 

        object_class = classes[df.iloc[i]['class']]

        if df.iloc[i]['filename'] in data.keys():
            data[df.iloc[i]['filename']].append([xmin, ymin, xmax, ymax, object_class])
        else:
            data.update({df.iloc[i]['filename'] : [[xmin, ymin, xmax, ymax, object_class]]})
    

    return data 

In [9]:
train_pascal_dic = parse_csv_to_pascal(train_data)

In [11]:
test_pascal_dic = parse_csv_to_pascal(val_data) 

In [12]:
import shutil 

def modify_data(data, path_to_images, path_to_output): 
    path_to_save_annotations = os.path.join(path_to_output, 'annotations_train.txt') 

    with open(path_to_save_annotations, 'a+') as f:

        for img_name, detections in data.items(): 

            path_to_input_img = os.path.join(path_to_images, img_name) 

            name_without_spaces = img_name.replace(' ','') 
            path_to_output_img = os.path.join(path_to_output, name_without_spaces)

            shutil.copy(path_to_input_img, path_to_output_img)

            # save detections in the new annotations file
            f.write(f'{path_to_output_img} ')
            for detection in detections:
                xmin, ymin, xmax, ymax, c = detection
                f.write(f'{xmin},{ymin},{xmax},{ymax},{c} ')
            
            f.write('\n')

    print('Done saving annotations')

In [13]:
modify_data(train_pascal_dic, './data/dataset/data_merge', './data/dataset/processed_yolo/train/')

Done saving annotations


In [14]:
modify_data(test_pascal_dic, './data/dataset/data_merge', './data/dataset/processed_yolo/test/')

Done saving annotations
