In [1]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from tqdm import tqdm, trange
import os
import pandas as pd

In [2]:
base_dir = "/home/aritra/project/quatLT23/data/tiny_imagenet/tiny-imagenet-200"
train_dir = base_dir+"/train"
val_dir = base_dir+"/val"
classes = sorted(os.listdir(train_dir))

classinfo = {i: classes[i] for i in range(200)}
# classinfo

# Training

In [3]:
# correcting grayscales
badcount = 0
for i in trange(200):
    for j in range(500):
        img = plt.imread(f"{train_dir}/{classinfo[i]}/images/{classinfo[i]}_{j}.JPEG")
        if img.shape == (64,64):
            badcount += 1
            plt.imsave(
                f"{train_dir}/{classinfo[i]}/images/{classinfo[i]}_{j}.JPEG",
                np.tile(img[:, :, np.newaxis], (1, 1, 3))
            )

print(f"grayscale:{badcount}")
# grayscale_train: 1821

100%|██████████| 200/200 [00:13<00:00, 14.33it/s]

grayscale:0





In [4]:
x_train = None
y_train = None
for i in trange(200):
    x_temp = None
    y_temp = None
    for j in range(500):
        if x_temp is None:
            x_temp = np.array([plt.imread(f"{train_dir}/{classinfo[i]}/images/{classinfo[i]}_{j}.JPEG")])
            y_temp = np.array([i])
        else:
            x_temp = np.append(x_temp, np.array([plt.imread(f"{train_dir}/{classinfo[i]}/images/{classinfo[i]}_{j}.JPEG")]), axis=0)
            y_temp = np.append(y_temp, np.array([i]), axis=0)
    x_train = x_temp if x_train is None else np.append(x_train, x_temp, axis=0)
    y_train = y_temp if y_train is None else np.append(y_train, y_temp, axis=0)

# print("Shuffling...")    
# np.random.seed(21)
# np.random.shuffle(x_train)
# np.random.seed(21)
# np.random.shuffle(y_train)

print("type cast...")
x_train = x_train.astype(np.float32)/255
y_train = y_train.astype(np.uint8)

print("transposing...")
x_train = x_train.transpose(0, 3, 1, 2)

100%|██████████| 200/200 [00:38<00:00,  5.21it/s]


type cast...
transposing...


In [5]:
np.save(f"{base_dir}/x_train_3.npy", x_train)
np.save(f"{base_dir}/y_train.npy", y_train)

In [6]:
mat = np.array(
	[
		[1, 0, 0, 0.299],
		[0, 1, 0, 0.587],
		[0, 0, 1, 0.144]
	]
)

x_train_4 = np.dot(x_train.transpose(0, 2, 3, 1), mat).transpose(0, 3, 1, 2)
x_train_4 = x_train_4.astype(np.float32)

In [7]:
np.save(f"{base_dir}/x_train_4.npy", x_train_4)

# Validation

In [8]:
# correcting grayscales
badcount = 0
for image_name in tqdm(os.listdir(val_dir+"/images")):
    img = plt.imread(f"{val_dir}/images/{image_name}")
    if img.shape == (64, 64):
        badcount += 1
        plt.imsave(
            f"{val_dir}/images/{image_name}",
            np.tile(img[:, :, np.newaxis], (1, 1, 3))
        )

print(f"grayscale: {badcount}")
# grayscale_val: 168

100%|██████████| 10000/10000 [00:01<00:00, 7224.77it/s]

grayscale: 0





In [9]:
reverse_classinfo = {classinfo[i]: i for i in range(200)}
df = pd.read_csv(f"{val_dir}/val_annotations.txt", sep="\t", header=None).iloc[:, :2]
df[1] = df[1].map(reverse_classinfo)
files = df.to_numpy()

In [10]:
x_val = None
y_val = None
for file_name, label in tqdm(files):
    if x_val is None:
        x_val = np.array([plt.imread(f"{val_dir}/images/{file_name}")])
        y_val = np.array([label])
    else:
        x_val = np.append(x_val, np.array([plt.imread(f"{val_dir}/images/{file_name}")]), axis=0)
        y_val = np.append(y_val, np.array([label]), axis=0)

x_val = x_val.astype(np.float32)/255
y_val = y_val.astype(np.uint8)

x_val = x_val.transpose(0, 3, 1, 2)

100%|██████████| 10000/10000 [01:11<00:00, 139.49it/s]


In [11]:
np.save(f"{base_dir}/x_val_3.npy", x_val)
np.save(f"{base_dir}/y_val.npy", y_val)

In [None]:
x_val_4 = np.dot(x_val.transpose(0, 2, 3, 1), mat).transpose(0, 3, 1, 2)
x_val_4 = x_val_4.astype(np.float32)

In [None]:
np.save(f"{base_dir}/x_val_4.npy", x_val_4)