In [1]:
import numpy as np
import keras
from keras.preprocessing import image
import os
from keras.utils import Sequence
from PIL import Image


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
base_path = '../tiny-imagenet-200/train'
#Process Validation Data
base_path_valid = '../tiny-imagenet-200/val'
st = '../tiny-imagenet-200/val/images/'

BATCH_SIZE = 128
IMG_DIM = (224,224)

In [3]:

def preprocess_input(x,mode='tf'):
        """
            - tf: will scale pixels between -1 and 1,
                sample-wise.
            - torch: will scale pixels between 0 and 1 and then
                will normalize each channel with respect to the
                ImageNet dataset.
                
        """
        if mode=='tf':
            x = x/127.5
            x -= 1
            return x
        
        if mode=='torch':
            mean = [0.485,0.456,0.406]
            std = [0.229,0.224,0.225]
            x /= 255.0
            x[...,0] -= mean[0]
            x[...,1] -= mean[1]
            x[...,2] -= mean[2]
            
            x[...,0] /= std[0]
            x[...,1] /= std[1]
            x[...,2] /= std[2]
            
        return x
        

In [4]:
class DataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=None, n_channels=1,
                 n_classes=10, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, f in enumerate(list_IDs_temp):
            # Store sample
            img = Image.open(f)
            img = img.resize(self.dim)
            img = img.convert('RGB')
            X[i,] = preprocess_input(np.array(img,dtype=np.float32))
            img.close()
            
            
            # Store class
            y[i] = self.labels[f]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

In [5]:
partition = {
    'train': [],
    'validation': []
}
labels = {}
class_ids = {}
cnt = 0

In [6]:
#Process Training Data

In [7]:
for label in os.listdir(base_path):
    temp = os.path.join(base_path, label)
    
    if class_ids.get(label) is None:
        class_ids[label] = cnt
        cnt += 1
    
    img_fldr_path = os.path.join(temp, 'images')
    for imgs in os.listdir(img_fldr_path):
        ID = os.path.join(img_fldr_path, imgs)
        partition['train'].append( ID )
        labels[ID] = class_ids[label]

In [9]:
print(len(labels))
print(sorted(class_ids.values()))

100000
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199]


In [10]:
NO_OF_CLASSES = len((os.listdir(base_path)))

In [11]:


with open(os.path.join(base_path_valid,"val_annotations.txt")) as f:
    
    lines = f.readlines()
    for line in lines:
        tokens = line.split()
        img_name = tokens[0]
        img_label = tokens[1]
        ID = os.path.join(st,img_name)
        partition['validation'].append(ID)
        labels[ID] = class_ids[img_label]

In [16]:

train_generator = DataGenerator(batch_size=128,dim=(224,224),n_channels=3,list_IDs=partition['train'],
                                labels=labels,n_classes=NO_OF_CLASSES)

val_generator = DataGenerator(batch_size=128,dim=(224,224),n_channels=3,list_IDs=partition['train'],
                                labels=labels,n_classes=NO_OF_CLASSES)


In [17]:
for i,(x,y) in enumerate(train_generator):
    #print(np.argmax(y,axis=1))
    print(np.argmax(y,axis=-1))
    print(x.shape,y.shape)
    break    

[ 82 105   0  73  93 104 180 107 133 124 196 144  21 133 141  91 102  61
  40  22 101  10  48 136 164  17  51 103  13  31 160 126  31  99  33  25
  19 144  79 199 122  37  55  39   1  42  37  66 185 120  86 171 170  22
  46  20   7  92  84  14  35   1 171 178 114  62 148 133  87  84  23 107
  20 140 192 105  33   5  94  77 143 157  64 101  99  89  74  95 199 102
  75  68 169  33   8  43 136 110 123  68  76 110 151 198  24 119 137 126
  11  64 169  29  33 198  48  25 189 197  85 114 122  86  32 124 124 125
  27 185]
(128, 224, 224, 3) (128, 200)
