In [1]:
import os               # for file and directory management
import numpy as np      # for numerical operations
import pickle
from PIL import Image
from tqdm import tqdm

In [None]:
# A helper method to unpickle the given file
# Note that this will work with Python3
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo)
    return dict

def load_databatch(data_folder, idx = '', img_size=8):
    if 'train' in data_folder: 
        data_file = os.path.join(data_folder, 'train_data_batch_')
        IS_TRAIN = True 
    elif 'val' in data_folder: 
        data_file = os.path.join(data_folder, 'val_data')
        IS_TRAIN = False
    else: 
        return "ERROR: no train or val in data_folder path! "

    d = unpickle(data_file + str(idx))
    x = d['data']
    y = d['labels']
    if IS_TRAIN: # val don't have mean
        mean_image = d['mean']

    x = x/np.float32(255)
    if IS_TRAIN: # val don't have mean
        mean_image = mean_image/np.float32(255)

    # Labels are indexed from 1, shift it so that indexes start at 0
    y = [i-1 for i in y]
    data_size = x.shape[0]

    if IS_TRAIN: # val don't have mean
        x -= mean_image

    img_size2 = img_size * img_size

    x = np.dstack((x[:, :img_size2], x[:, img_size2:2*img_size2], x[:, 2*img_size2:]))
    x = x.reshape((x.shape[0], img_size, img_size, 3)).transpose(0, 3, 1, 2)

    # create mirrored images
    X_train = x[0:data_size, :, :, :]
    Y_train = y[0:data_size]
    X_train_flip = X_train[:, :, :, ::-1]
    Y_train_flip = Y_train
    X_train = np.concatenate((X_train, X_train_flip), axis=0)
    Y_train = np.concatenate((Y_train, Y_train_flip), axis=0)

    return dict(
        X_train=X_train.astype('float32'),
        Y_train=Y_train
        # mean=mean_image.astype('float32')
    )

def load_classnames(filename):
    labels_map = {}
    names_map = {}

    with open(filename, 'r') as f:
        for line in f:
            line_elements = line.strip().split()
            class_id, idx, class_name = line_elements[0], int(line_elements[1]) - 1, line_elements[2]
            names_map[class_id] = class_name
            labels_map[idx] = class_id
    return labels_map, names_map

def save_images(X_data, Y_data, labels_map, save_dir):
    # Ensure the save_dir exists
    os.makedirs(save_dir, exist_ok=True)

    # Create subdirectories for each class
    for label_id, class_name in labels_map.items():
        label_dir = os.path.join(save_dir, str(class_name))
        os.makedirs(label_dir, exist_ok=True)

    for idx, (img_array, label_id) in tqdm( enumerate(zip(X_data, Y_data)) ):
        label_folder = labels_map[label_id]
        label_path = os.path.join(save_dir, label_folder)

        img = Image.fromarray((img_array.transpose(1, 2, 0) * 255).astype(np.uint8))  # Convert to RGB format
        img.save(os.path.join(label_path, f"{idx}.png"))  # Save as .png or preferred format

In [None]:
# imagenet8_train_batch_1 = load_databatch("D:\\CU\\Capstone\\DATA\\imagenet\\images\\train\\Imagenet8_train", 1)
# imagenet8_train_batch_1['X_train'].shape
imagenet32_val = load_databatch("D:\\CU\\Capstone\\DATA\\imagenet\\images\\val\\Imagenet32_val", img_size=32)
imagenet32_val['X_train'].shape

# load class names, start to create the target directory as CoOp requires 
labels_map, names_map = load_classnames('map_clsloc.txt')

# save_images(imagenet8_train_batch_1['X_train'], imagenet8_train_batch_1['Y_train'], labels_map, "D:\\CU\\Capstone\\DATA\\imagenet\\images\\train\\Imagenet8_train\\processed")
save_images(imagenet32_val['X_train'], imagenet32_val['Y_train'], labels_map, "D:\\CU\\Capstone\\DATA\\imagenet\\images\\val\\Imagenet32_val\\processed")