# Download ImageNet 1k dataset

In [None]:
import json
import os
import shutil

Kaggle config file is required to download the dataset. You can create one by going to your account settings and clicking on "Create New API Token". Then upload the file to the notebook.

In [None]:
! kaggle datasets download -d ifigotin/imagenetmini-1000

After downloading the dataset, we will extract the files and place them in the Dataset folder. Then we will follow with renaming the folders to make it sense for other notebooks.

In [None]:
with open("in_cls_idx.json", "r") as f:
    imagenet_id_to_name = {label: int(cls_id) for cls_id, (label, name) in json.load(f).items()}

mapping = dict(sorted(imagenet_id_to_name.items()))

In [None]:
# Replace it with your train path
directory_path = '../Dataset/imagenet-mini/train'

for old_folder_name in os.listdir(directory_path):
    old_file_name = old_folder_name
    new_file_name = mapping.get(old_folder_name)

    if new_file_name < 10:
        new_file_name = f"00{new_file_name}"
    elif new_file_name < 100:
        new_file_name = f"0{new_file_name}"
    else:
        new_file_name = f"{new_file_name}"

    # Check if the new file name exists in label_mapping
    if new_file_name is not None:
        old_file_path = directory_path + '/' + str(old_file_name)
        new_file_path = directory_path + '/' + str(new_file_name)
        print(f'Replacing {old_file_path} with {new_file_path}')

        # Check if the new file path already exists
        if not os.path.exists(new_file_path):
            os.rename(old_file_path, new_file_path)
        else:
            print(f"File '{new_file_name}' already exists. Skipping renaming.")
    else:
        print(f"No mapping found for folder '{old_folder_name}'. Skipping renaming.")

print("Folder names replacement in the 'train' directory is complete.")

In [None]:
source_dir = '../Dataset/imagenet-mini/train'
destination_dir = '../Dataset/combined_images'

if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)


for root, dirs, files in os.walk(source_dir):
    for filename in files:
        source_file = os.path.join(root, filename)
        parent_folder = os.path.basename(root)
        new_filename = f"{parent_folder}_{filename}"
        destination_file = os.path.join(destination_dir, new_filename)
        shutil.copy(source_file, destination_file)

print("Images have been combined and renamed in the destination directory.")