In [1]:
import json
from tqdm.notebook import tqdm

# Create target class folders and move images

In [11]:
import shutil
import os

def copy_images(root_folder,target_root,data_type,target_folders):
    """
    Copies `labels[taret_labels]` images from parent folder to
    new subset named after dataset folder
    
    Eg. move steak images to data/steak_subset/train & 
                             data/steak_subset/test
    
    Parameters
    ----------
    root_folder (str) - original folder path with all data
    target_root (str) - name of parent folder to copy to
    data_type (str) - train or test 
    labels (list) - list of training and test labels
    target_folders (list) - list of target labels to copy to.eg. ['steak','pizza']
    """
    print(f"\nUsing {data_type} labels...")
    labels = get_labels(root_folder + "/meta/" + data_type + ".json")
    
    for i in target_folders:
        # make target dir
        os.makedirs(target_root + "/" + data_type + "/" + i,exist_ok=True)
        # go through the labels and get appropriate classes
        images_moved = []
        for j in labels[i]:
            # create original image path and new path
            orig_path = root_folder + "/images/" + j + ".jpg"
            new_path  = target_root + "/" +  data_type + "/" + j + ".jpg"
            
            # copy images from old path to new path
            shutil.copy2(orig_path,new_path)
            images_moved.append(new_path)
        print(f"Copied {len(images_moved)} images from {data_type} dataset {i} class...")
    

In [14]:
copy_images(root_folder='food-101',target_root='data',data_type='train',target_folders=['pizza'])
copy_images(root_folder='food-101',target_root='data',data_type='train',target_folders=['steak'])


Using train labels...
Copied 750 images from train dataset pizza class...

Using train labels...
Copied 750 images from train dataset steak class...


In [15]:
copy_images(root_folder='food-101',target_root='data',data_type='test',target_folders=['pizza'])
copy_images(root_folder='food-101',target_root='data',data_type='test',target_folders=['steak'])


Using test labels...
Copied 250 images from test dataset pizza class...

Using test labels...
Copied 250 images from test dataset steak class...


In [5]:
# Get labels
def get_labels(label_path):
    """
    Accepts a label path (in the form of json) and returns the file
    as python object
    """
    with open(label_path) as f:
        return json.load(f)

train_labels = get_labels('food-101/meta/train.json')
test_labels  = get_labels("food-101/meta/test.json")

In [8]:
len(train_labels['pizza'])

750