In [17]:
import cv2
import os
import numpy as np

# Define paths to your dataset
dataset_dir = "tiny-imagenet-200"  
output_dir = "preprocessed_dataset"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Resize images to 64x64 pixels
image_size = (64, 64)

# Function to preprocess an image
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, image_size)
    image = image.astype(np.float32) / 255.0  # Normalize pixel values
    return image

# Load WNIDs from the wnid.txt file
wnid_file_path = os.path.join(dataset_dir, "wnids.txt")
with open(wnid_file_path, "r") as f:
    wnids = [line.strip() for line in f]

# Create a mapping from WNIDs to numerical labels
wnid_to_label = {wnid: label for label, wnid in enumerate(wnids)}

# Preprocess images and labels in the training set based on directory structure
X_train = []
y_train = []
train_data_dir = os.path.join(dataset_dir, "train")
class_dirs = os.listdir(train_data_dir)
for label, class_dir in enumerate(class_dirs):
    image_files = os.listdir(os.path.join(train_data_dir, class_dir, "images"))
    for image_file in image_files:
        image_path = os.path.join(train_data_dir, class_dir, "images", image_file)
        image = preprocess_image(image_path)
        X_train.append(image)
        y_train.append(wnid_to_label[class_dir])
X_train = np.array(X_train)
y_train = np.array(y_train)


np.save("preprocessed_dataset/train/X_train.npy", X_train)
np.save("preprocessed_dataset/train/y_train.npy", y_train)

# Preprocess images and labels in the validation set based on val_annotations.txt
X_val = []
y_val = []
val_data_dir = os.path.join(dataset_dir, "val")
with open(os.path.join(val_data_dir, "val_annotations.txt"), "r") as f:
    for line in f:
        split_line = line.strip().split("\t")
        image_file = split_line[0]
        wnid = split_line[1]
        image_path = os.path.join(val_data_dir, "images", image_file)
        image = preprocess_image(image_path)
        X_val.append(image)
        y_val.append(wnid_to_label[wnid])
X_val = np.array(X_val)
y_val = np.array(y_val)

# Ensure the output directory exists for saving preprocessed validation data and labels
output_val_dir = os.path.join(output_dir, "validation")
os.makedirs(output_val_dir, exist_ok=True)


np.save("preprocessed_dataset/validation/X_val.npy", X_val)
np.save("preprocessed_dataset/validation/y_val.npy", y_val)

(10000, 64, 64, 3)
