<h1> General Info</h1>

In [None]:
#TO DO

"""
1- Implement a naive CNN on the imagenet data
2- Implement Capsules

Other info:
Stanford prepared the Tiny ImageNet dataset for their CS231n course. 
The dataset spans 200 image classes with 500 training examples per class. 
The dataset also has 50 validation and 50 test examples per class.

Tiny ImageNet Challenge is a similar challenge as ImageNet with a smaller dataset but
less image classes. It contains 200 image classes, a training 
dataset of 100, 000 images, a validation dataset of 10, 000
images, and a test dataset of 10, 000 images. All images are
of size 64×64.
"""

In [None]:
# To Clear Memory

from numba import cuda
cuda.select_device(0)
cuda.close()

<h1> Import Data - Train<h1>

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
import datetime
import os
from tensorflow.keras.callbacks import TensorBoard

%load_ext tensorboard

In [None]:
train_datagen = ImageDataGenerator(validation_split=0.9)

train_generator = train_datagen.flow_from_directory(directory= 'tiny-imagenet-200/train/', 
                                                    target_size=(64, 64), 
                                                    batch_size=256, 
                                                    class_mode='categorical', 
                                                    shuffle=True, 
                                                    seed=42,
                                                    subset ="training"
                                                   )

<h2> Examine Train Images </h2>

In [None]:
import numpy as np

x_batch, y_batch = next(train_generator)

print(x_batch.shape)
print(y_batch.shape)

fig = plt.figure(figsize=(14, 5))

for i in range(8):
    sub = fig.add_subplot(2, 4, i + 1)
    sub.imshow((x_batch[i,:,:,:]/255), interpolation='bilinear')

<h2> Import Data - Val<h2>

In [None]:
val_data = pd.read_csv('./tiny-imagenet-200/val/val_annotations.txt', sep='\t', header=None, names=['File', 'Class', 'X', 'Y', 'H', 'W'])
val_data.drop(['X', 'Y', 'H', 'W'], axis=1, inplace=True)

valid_datagen  = ImageDataGenerator(validation_split=0.9)

validation_generator = valid_datagen.flow_from_dataframe(dataframe=val_data, 
                                                         directory='./tiny-imagenet-200/val/images/', 
                                                         x_col='File', 
                                                         y_col='Class', 
                                                         target_size=(64, 64),
                                                         color_mode='rgb', 
                                                         class_mode='categorical', 
                                                         batch_size=256, 
                                                         shuffle=True, 
                                                         seed=42,
                                                        subset ="training")

<h2> Building the Model </h2>

In [None]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = TensorBoard(logdir)


model= Sequential()

model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(64,64,3),
                 kernel_initializer='he_normal',))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(200, activation='softmax'))

model.compile(loss="categorical_crossentropy",
              optimizer="Adam",
              metrics=['accuracy'])

<h2> Training the Model </h2>

In [None]:
history = model.fit_generator(train_generator, 
                    epochs=2, 
                    validation_data=validation_generator, 
                    #callbacks=[tensorboard_callback]
                             )

<h2> Model Evaluation </h2>

In [None]:
score = model.evaluate_generator(validation_generator)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

<h1> Archive </h1>

In [None]:
import glob
import re
import os
import numpy as np
from PIL import Image


class TinyImageNetLoader(object):
    """Loader for images from the the Tiny ImageNet set for images of a specific class."""
    def __init__(self, mode, data_path, data_type='float32'):
        # mode = 'train' or 'val';
        # data_path: the relative path of tiniy-imagenet, '../tiny-imagenet-200',
        self.data_path = data_path
        self.data_type = data_type
        with open(self.path('wnids.txt')) as f: # detail info of image
            wnids = f.readlines()
            assert len(wnids) == 200
            wnids = [x.strip() for x in wnids]
            self.wnids = wnids
            self.mode = mode
        images = {}
        if mode == 'val': # load validation set
            with open(self.path('val/val_annotations.txt')) as f:
                labels = f.readlines()
                #assert len(labels) == 10000
                labels = [x.split('\t')[:2] for x in labels]
                for image, wnid in labels:
                    #assert wnid in self.wnids
                    #assert image.endswith('.JPEG')
                    images.setdefault(wnid, []).append(data_path + '/val/images/' + image)

                #assert len(images) == len(wnids)
                #for wnid in images:
                #    assert len(images[wnid]) == 50
        if mode == 'train': # load training set
            filenames = glob.glob(data_path + '/train/*/images/*.JPEG')
            for filename in filenames:
                wnid = re.search(r'n\d+', filename)
                #label = str(label_dict[match.group()])
                #filenames_labels.append((filename, label))
                images.setdefault(wnid.group(), []).append(filename)
        self.imagefiles_wnid_dict = images
        # print(len(self.imagefiles_wnid_dict))
            

    def path(self, *path):
        return os.path.join(self.data_path, *path)

    def load_image(self, filename):
        path = os.path.join(filename)
        image = Image.open(path)
        image = np.asarray(image)
        if image.shape != (64, 64, 3):
            # e.g. grayscale
            return None
        assert image.dtype == np.uint8
        image = image.astype(self.data_type)
        assert image.shape == (64, 64, 3)
        return image

    def load_n_images_all_classes(self, nb):
        # nb: number of samples per class, for validation data, max =50, for train, max=500
        # return: X_data in [n, 64, 64, 3], Y_data in [n, 1]
        X_data=np.ndarray([nb*200,64,64,3], dtype=self.data_type)
        Y_data=np.zeros([nb*200,200], dtype=self.data_type)# one hot encoded
        arr = np.arange(nb*200)
        np.random.shuffle(arr) # shuffle images
        
        for label in range(200):
            wnid = self.wnids[label]
            files = self.imagefiles_wnid_dict[wnid]
            for i, filename in enumerate(files):
                if i==nb:
                    break
                #print(i,filename)
                raw_image = self.load_image(filename)
                #raw_image = raw_image.astype(np.float32)
                #print(label*nb+i)
                X_data[arr[label*nb+i],:,:,:] = raw_image
                Y_data[arr[label*nb+i],label] = 1
                #print(label)
        return X_data, Y_data

In [None]:
path="C:\\Users\\Maged Helmy\\Desktop\\Projects\\jupyterNotebooks_general\\4_imageNet\\tiny-imagenet-200\\"

load_data=TinyImageNetLoader("val",data_path=path)
x_data, y_data= load_data.load_n_images_all_classes(nb=50)

In [None]:
len(x_data)