In [1]:
import pandas as pd

import os
import shutil
from tqdm import tqdm 

# Summary

In [2]:
# The purpose of this script is to arrange tiny-imagenet data in the following way:

# data_dir/train/n03444034/xxx.JPEG
# data_dir/train/n03444034/xxy.JPEG
# data_dir/train/n03444034/xxz.JPEG
# ...

# data_dir/train/n04067472/123.JPEG
# data_dir/train/n04067472/nsdf3.JPEG
# data_dir/train/n04067472/asd932_.JPEG
# ...

# And in the same way arrange validation data.

# Load metadata

In [3]:
# the folder from tiny-imagenet-200.zip file
data_dir = '/home/ubuntu/data/tiny-imagenet-200/'

In [4]:
val_data = pd.read_csv(data_dir + 'val/val_annotations.txt', sep='\t', header=None)
# drop bounding boxes info
val_data.drop([2, 3, 4, 5], axis=1, inplace=True)

val_data.columns = ['img_name', 'img_class']

In [5]:
unique_classes = val_data.img_class.unique()
len(unique_classes)

200

# Move validation data

In [6]:
# create new folders to move data into
os.mkdir(data_dir + 'validation')
for name in unique_classes:
    os.mkdir(data_dir + 'validation/' + name)

In [7]:
# loop over all classes
for name in tqdm(unique_classes):
    
    # choose images from a specific class
    class_images = val_data.loc[val_data.img_class == name, 'img_name'].values
    
    # copy images to a new folder
    for img in class_images:
        shutil.copyfile(
            data_dir + 'val/images/' + img,
            data_dir + 'validation/' + name + '/' + img
        )

100%|██████████| 200/200 [00:00<00:00, 384.19it/s]


# Move training data

In [8]:
# create new folders to move data into
os.mkdir(data_dir + 'training')
for name in unique_classes:
    os.mkdir(data_dir + 'training/' + name)

In [9]:
# loop over all classes
for name in tqdm(unique_classes):
    
    # choose images from a specific class
    class_images = os.listdir(data_dir + 'train/' + name + '/images')
    
    # copy images to a new folder
    for img in class_images:
        shutil.copyfile(
            data_dir + 'train/' + name + '/images/' + img,
            data_dir + 'training/' + name + '/' + img
        )

100%|██████████| 200/200 [00:03<00:00, 54.65it/s]
