In [1]:
import pandas as pd
import json
import shutil
import os
import json

In [2]:
f = open('category_labels.json')  
category_all_labels = json.load(f)
f.close()

### Download Masks

#### Validation

Download segmentation masks for validation dataset from here, https://storage.googleapis.com/openimages/web/download_v7.html  
Once downloaded, unzip and move all the masks into a single directory called `validation_masks`.  
Run the cell below to format the mask names as per the requirement of the inpainting model.  
The cell renames the mask in the format `[images_name]_maskXXX[image_suffix]` and moves it to a common `masks` folder. 

In [None]:
val_human_labels = pd.read_csv('./validation_data/val-human-imagelabels.csv')
val_seg = pd.read_csv('./validation_data/val-segmentation.csv')

val_human_labels = val_human_labels[val_human_labels["Confidence"] == 1.0]

for category, labels in category_all_labels.items():
    human_ver_category_imageIDs = list(val_human_labels.loc[val_human_labels['LabelName'].isin(category_all_labels[category])]["ImageID"])
    human_ver_category_masks = list(val_seg.loc[val_seg['ImageID'].isin(human_ver_category_imageIDs)]["MaskPath"])

    target = f"./masks/validation/{category.lower()}"
    isExist = os.path.exists(target)
    if not isExist:
        os.makedirs(target)
            
    count = {}
    for path in human_ver_category_masks:
        parts = path.split("_")

        imageID = parts[0]

        if parts[0] not in count:
            count[imageID] = 1
        else:
            count[imageID] += 1

        new_path = f"{imageID}_mask{'{0:03}'.format(count[imageID])}.png" 
            
        shutil.copyfile(f"./validation_masks/{path}", f"{target}/{new_path}")

#### Test

Download segmentation masks for test dataset from here, https://storage.googleapis.com/openimages/web/download_v7.html  
Once downloaded, unzip and move all the masks into a single directory called `test_masks`.  
Run the cell below to format the mask names as per the requirement of the inpainting model.  
The cell renames the mask and moves it to a common `masks` folder. 

In [None]:
test_human_labels = pd.read_csv('./test_data/test-human-imagelabels.csv')
test_seg = pd.read_csv('./test_data/test-segmentation.csv')

# Only get images with 100% label confidence
test_human_labels = test_human_labels[test_human_labels["Confidence"] == 1.0]

for category, labels in category_all_labels.items():
    human_ver_category_imageIDs = list(test_human_labels.loc[test_human_labels['LabelName'].isin(category_all_labels[category])]["ImageID"])
    human_ver_category_imageIDs = list(test_seg.loc[test_seg['ImageID'].isin(human_ver_category_imageIDs)]["ImageID"])
    human_ver_category_imageIDs = [*set(human_ver_category_imageIDs)]
    
    file = open(f"./test_data/{category}.txt", "w")
    lines = []
    for imageID in human_ver_category_imageIDs:
        lines.append(f"test/{imageID} \n")

    file.writelines(lines)
    file.close()

#### Training

Download segmentation masks for training dataset from here, https://storage.googleapis.com/openimages/web/download_v7.html  
Once downloaded, unzip and move all the masks into a single directory called `train_masks`.  
Run the cell below to format the mask names as per the requirement of the inpainting model.  
The cell renames the mask and moves it to a common `masks` folder. 

In [None]:
for category, labels in category_all_labels.items():
    human_ver_category_imageIDs = list(train_human_labels.loc[train_human_labels['LabelName'].isin(category_all_labels[category])]["ImageID"])
    human_ver_category_masks = list(train_seg.loc[train_seg['ImageID'].isin(human_ver_category_imageIDs)]["MaskPath"])

    target = f"./masks/train/{category.lower()}"
    isExist = os.path.exists(target)
    if not isExist:
        os.makedirs(target)
            
    count = {}
    for path in human_ver_category_masks:
        parts = path.split("_")

        imageID = parts[0]

        if parts[0] not in count:
            count[imageID] = 1
        else:
            count[imageID] += 1

        new_path = f"{imageID}_mask{'{0:03}'.format(count[imageID])}.png" 
            
        shutil.copyfile(f"./train_masks/{path}", f"{target}/{new_path}")

### Images and Masks

Gather images and associated masks for each category and into a unified folder structure.

#### Validation

In [None]:
!categories=("food" "vehicle" "clothing" "sports_equipment" "person" "tool" "kitchenware" "furniture"); for category in "${categories[@]}"; do mkdir -p ./images_and_masks/validation/${category} && cp ./images/validation/${category}/* ./masks/validation/${category}/* ./images_and_masks/validation/${category}/; done

#### Test

In [None]:
!categories=("food" "vehicle" "clothing" "sports_equipment" "person" "tool" "kitchenware" "furniture"); for category in "${categories[@]}"; do mkdir -p ./images_and_masks/test/${category} && cp ./images/test/${category}/* ./masks/test/${category}/* ./images_and_masks/test/${category}/; done

#### Training

In [None]:
!categories=("food" "vehicle" "clothing" "sports_equipment" "person" "tool" "kitchenware" "furniture"); for category in "${categories[@]}"; do mkdir -p ./images_and_masks/train/${category} && cp ./images/train/${category}/* ./masks/train/${category}/* ./images_and_masks/train/${category}/; done

### Resize images to 512x512

In [None]:
!pip install Pillow

In [None]:
categories = ["food","vehicle","clothing","sports_equipment","person","tool","kitchenware","furniture"]

#### Validation

In [None]:
import glob
import os
from PIL import Image

for category in categories:
    images = glob.glob(f'/home/ramaraja/lama/open-images/images_and_masks_reduced/validation/{category}/*.jpg')

    for image in images:
        file_name = os.path.splitext(os.path.basename(image))[0]
        masks = glob.glob(f'/home/ramaraja/lama/open-images/images_and_masks_reduced/validation/{category}/{file_name}*.png')

        im1 = Image.open(image)

        w = 512
        h = 512
        new_image = im1.resize((w, h))
        new_image.save(image)
        
        for mask in masks:
            im2 = Image.open(mask)
            new_image = im2.resize((w, h))
            new_image.save(mask)

#### Test

In [None]:
import glob
import os
from PIL import Image

for category in categories:
    images = glob.glob(f'/home/ramaraja/lama/open-images/images_and_masks_reduced/test/{category}/*.jpg')

    for image in images:
        file_name = os.path.splitext(os.path.basename(image))[0]
        masks = glob.glob(f'/home/ramaraja/lama/open-images/images_and_masks_reduced/test/{category}/{file_name}*.png')

        im1 = Image.open(image)

        w = 512
        h = 512
        new_image = im1.resize((w, h))
        new_image.save(image)
        
        for mask in masks:
            im2 = Image.open(mask)
            new_image = im2.resize((w, h))
            new_image.save(mask)

#### Training

In [None]:
import glob
import os
from PIL import Image

for category in categories:
    images = glob.glob(f'/home/ramaraja/lama/open-images/images_and_masks_reduced/train/{category}/*.jpg')

    for image in images:
        file_name = os.path.splitext(os.path.basename(image))[0]
        masks = glob.glob(f'/home/ramaraja/lama/open-images/images_and_masks_reduced/train/{category}/{file_name}*.png')

        im1 = Image.open(image)

        w = 512
        h = 512
        new_image = im1.resize((w, h))
        new_image.save(image)
        
        for mask in masks:
            im2 = Image.open(mask)
            new_image = im2.resize((w, h))
            new_image.save(mask)