# DeepFashion datapipeline

data link  : http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html


Data Manipulation to make them suitable for use by the YoloV5 model.


! Do not forget to unzip the images folder


input : 
- img directory : contains the images of the dataset
- Category and Attribute Prediction Benchmark directory : contains text files that associate images with garment labels, garment bboxes and the train/test/validation group


output : 
- 3 directories train/test/validation which store the images and labels. Folders directly suitable for use by the Yolov5 model. Images are resized to fit the model

In [1]:
import os
import numpy as np
import cProfile
from PIL import Image

In [2]:
#If needed to unzip the file

import zipfile

with zipfile.ZipFile("C:/Users/Montassier Paul/Documents/Bazar/iaAPI/datasets/images.zip","r") as zip_ref:
    zip_ref.extractall("C:/Users/Montassier Paul/Documents/Bazar/iaAPI/datasets/")

In [3]:
def image_resizig(image,bbox, target_size = 300, fill_color=(0, 0, 0)):
    """
    Resized the images so that they are all the same size. Repositioned the bboxes accordingly
    
    input :
    
        image : 
        bbox : 
    
    
    output :
    
        new_image : resized image
        b_center_x, b_center_y, b_width, b_height : way to describe the BBOX used by the Yolo model
    
    """
    
    image_w, image_h = image.size 
    
    #create background
    new_image = Image.new('RGB', (target_size, target_size), fill_color)
    
    #add image, image cropped if dimensions > (300, 300)
    new_image.paste(image.crop((0, 0, min(target_size, image_w), min(target_size, image_h))),
                    (int((target_size - min(target_size, image_w)) / 2), int((target_size - min(target_size, image_h)) / 2)))                          
    
    
    #calculus bbox parameters mandatory for IA 
    b_center_x = (int(bbox[0]) + int(bbox[2])) / 2 + int((target_size - min(target_size, image_w)) / 2)
    b_center_y = (int(bbox[1]) + int(bbox[3])) / 2 + int((target_size - min(target_size, image_h)) / 2)
    b_width    = (int(bbox[2]) - int(bbox[0])) / 2 
    b_height   = (int(bbox[3]) - int(bbox[1])) / 2 


    b_center_x /= target_size
    b_center_y /= target_size
    b_width    /= target_size
    b_height   /= target_size
    

     
    
    return new_image, b_center_x, b_center_y, b_width, b_height

In [6]:
def dataset_creation(bbox_file_path, location_file_path,categories_file_path, parameter = "train", limite = np.inf): 
    """
    creates a train/validation/test folder with the corresponding resized images. The folders are then usable by the Yolo model
    
    input : 
        bbox_file_path, location_file_path, categories_file_path : path of bbox ,categories and images  files
    
    
    output: 
    
    
    
    
    """
    
    

 
    bbox_file = open(bbox_file_path, 'r')
    location_file = open(location_file_path, 'r')
    categories_file = open(categories_file_path, 'r')
    
    bbox_lines = bbox_file.readlines()
    location_lines = location_file.readlines()
    categories_lines = categories_file.readlines()
    
    
    #check consistency between files
    if not(len(bbox_lines) == len(location_lines)) or not(len(bbox_lines) == len(categories_lines)):
        print("error. File size does not match!")
        bbox_file.close()
        location_file.close()
        categories_file.close()
        return
        
    #create if needed, target directory
    if( not os.path.isdir("../datasets/FashionIA/") ): 
        os.mkdir("../datasets/FashionIA/")
        
    if( not os.path.isdir("../datasets/FashionIA/" + parameter) ): 
        os.mkdir("../datasets/FashionIA/" + parameter)

    
    if( not os.path.isdir("../datasets/FashionIA/" + parameter + "/labels" ) ): 
        os.mkdir("../datasets/FashionIA/" + parameter + "/labels" )
        
        
    if( not os.path.isdir("../datasets/FashionIA/" + parameter + "/images" ) ): 
        os.mkdir("../datasets/FashionIA/" + parameter + "/images" )
        
    #populate directory
    for index in range(len(bbox_lines)):

        #clothing label
        clothe_cate = categories_lines[index][-2]
        
        #image location
        image_file_path = location_lines[index][:-1]
        
        #target location
        target_file_path = location_lines[index][:-1].split("/")[-2] + "_" + location_lines[index][:-1].split("/")[-1][:-4]

        image = Image.open("../datasets/" + image_file_path)

        #bbox 
        bbox = bbox_lines[index].split(" ")[:-1]

       
        image_resized, b_center_x, b_center_y, b_width, b_height = image_resizig(image,bbox)

        image.close(); 

        #Yolo labelling
        label = []
        label.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(clothe_cate, b_center_x, b_center_y, b_width, b_height))


        
        with open( "../datasets/FashionIA/" + parameter + "/labels/" + target_file_path + ".txt", 'w') as f:
            f.write('\n'.join(label))



        image_resized = image_resized.save("../datasets/FashionIA/" + parameter + "/images/" + target_file_path + ".jpg")
        
        
        index = index + 1
        
        if index > limite : 
            
            break 





    bbox_file.close()
    location_file.close()
    print(parameter + "  : finished")



In [10]:
bbox_train_path = "../datasets/Category and Attribute Prediction Benchmark/train_bbox.txt"
train_path = "../datasets/Category and Attribute Prediction Benchmark/train.txt"
categories_train_path = "../datasets/Category and Attribute Prediction Benchmark/train_cate.txt"

bbox_val_path = "../datasets/Category and Attribute Prediction Benchmark/val_bbox.txt"
val_path = "../datasets/Category and Attribute Prediction Benchmark/val.txt"
categories_val_path = "../datasets/Category and Attribute Prediction Benchmark/val_cate.txt"


bbox_test_path = "../datasets/Category and Attribute Prediction Benchmark/test_bbox.txt"
test_path = "../datasets/Category and Attribute Prediction Benchmark/test.txt"
categories_test_path = "../datasets/Category and Attribute Prediction Benchmark/test_cate.txt"


In [11]:
dataset_creation(bbox_train_path,train_path,categories_train_path, parameter = 'train')
dataset_creation(bbox_val_path,val_path,categories_val_path, parameter = 'val')
dataset_creation(bbox_test_path,test_path,categories_test_path, parameter = 'test')

val  : finished
test  : finished
