In [55]:
import numpy as np
import keras
from keras.preprocessing import image
import os
from keras.utils import Sequence
from PIL import Image


In [56]:
base_path = '../tiny-imagenet-200/train'
#Process Validation Data
base_path_valid = '../tiny-imagenet-200/val'
st = '../tiny-imagenet-200/val/images/'

BATCH_SIZE = 128
IMG_DIM = (224,224)

In [57]:

def preprocess_input(x,mode='tf'):
        """
            - tf: will scale pixels between -1 and 1,
                sample-wise.
            - torch: will scale pixels between 0 and 1 and then
                will normalize each channel with respect to the
                ImageNet dataset.
                
        """
        if mode=='tf':
            x = x/127.5
            x -= 1
            return x
        
        if mode=='torch':
            mean = [0.485,0.456,0.406]
            std = [0.229,0.224,0.225]
            x /= 255.0
            x[...,0] -= mean[0]
            x[...,1] -= mean[1]
            x[...,2] -= mean[2]
            
            x[...,0] /= std[0]
            x[...,1] /= std[1]
            x[...,2] /= std[2]
            
        return x
        

In [66]:
class DataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=None, n_channels=1,
                 n_classes=10, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, f in enumerate(list_IDs_temp):
            # Store sample
            img = Image.open(f)
            img = img.resize(self.dim)
            img = img.convert('RGB')
            X[i,] = preprocess_input(np.array(img,dtype=np.float32))
            img.close()
            
            # Store class
            y[i] = self.labels[f]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

In [67]:
partition = {
    'train': [],
    'validation': []
}
labels = {}
class_ids = {}
cnt = 0

In [68]:
#Process Training Data

In [69]:
for label in os.listdir(base_path):
    temp = os.path.join(base_path, label)
    
    if class_ids.get(label) is None:
        class_ids[label] = cnt
        cnt += 1
    
    img_fldr_path = os.path.join(temp, 'images')
    for imgs in os.listdir(img_fldr_path):
        ID = os.path.join(img_fldr_path, imgs)
        partition['train'].append( ID )
        labels[ID] = class_ids[label]

In [70]:
NO_OF_CLASSES = len((os.listdir(base_path)))

In [71]:


with open(os.path.join(base_path_valid,"val_annotations.txt")) as f:
    
    lines = f.readlines()
    for line in lines:
        tokens = line.split()
        img_name = tokens[0]
        img_label = tokens[1]
        ID = os.path.join(st,img_name)
        partition['validation'].append(ID)
        labels[ID] = class_ids[img_label]

In [72]:
def load_validation_data(dim,channels,no_of_classes):
    with open(os.path.join(base_path_valid,"val_annotations.txt")) as f:
        lines = f.readlines()
        
        m = len(lines)
        X = np.empty((m, *dim,channels))
        y = np.empty(m,dtype=int)
        
        for i,line in enumerate(lines):
            tokens = line.split()
            img_name = tokens[0]
            img_label = tokens[1]
            ID = os.path.join(st,img_name)
            img = Image.open(ID)
            img = img.resize(dim)
            img = img.convert('RGB')
            X[i,] = preprocess_input(np.array(img,dtype=np.float32))
            img.close()
            y[i] = class_ids[img_label]
               
        return X, keras.utils.to_categorical(y, num_classes=no_of_classes)


In [83]:
# Testing Data Generator

In [81]:
X_val,y_val = load_validation_data(dim=(224,224),channels=3,no_of_classes=200)
print(X_val.shape)
print(y_val.shape)

(10000, 224, 224, 3)
(10000, 200)


In [78]:

train_generator = DataGenerator(batch_size=128,dim=(224,224),n_channels=3,list_IDs=partition['train'],
                                labels=labels,n_classes=NO_OF_CLASSES)

#val_generator = DataGenerator(batch_size=128,dim=(224,224),n_channels=3,list_IDs=partition['validation'],labels=labels,n_classes=NO_OF_CLASSES)


In [82]:
print(np.argmax(y_val,axis=-1))

[ 28  19 168 ...   5  84 123]


In [79]:
for i,(x,y) in enumerate(train_generator):
    #print(np.argmax(y,axis=1))
    print(np.argmax(y,axis=-1))
    print(x.shape,y.shape)
    break    

[ 94 176  37  39 116 194   5  79  14 182 106 129  23 179 188   2  23 190
 174 179  58 142  56  63  65 121 145 110 147   8  43  93 128  17  63  86
   5  55  35  24  18  53  79 158 163 115  59 167  39  85  31  26 199  37
  69  14 112  83  95 124 198 171  16 154 112  22  34 109  94  10 147  89
 134  66  63 146   7 155  29 187  69  72 103  60  71 122  90 165 115   9
  15 109 192 196 117 188 192 154 169 164 196  76  32 151 153  19  12  24
  51  74  47  38  89 152  62 173 103 122 135 149 140  24  35  80 193 177
  74 169]
(128, 224, 224, 3) (128, 200)
