# **Mega Byte** 
This is a compilaton of code with the addition of transfer learning

# **Config**

In [1]:
!pip install datetime

Collecting datetime
[?25l  Downloading https://files.pythonhosted.org/packages/73/22/a5297f3a1f92468cc737f8ce7ba6e5f245fcfafeae810ba37bd1039ea01c/DateTime-4.3-py2.py3-none-any.whl (60kB)
[K     |█████▌                          | 10kB 12.1MB/s eta 0:00:01[K     |███████████                     | 20kB 18.1MB/s eta 0:00:01[K     |████████████████▍               | 30kB 10.4MB/s eta 0:00:01[K     |█████████████████████▉          | 40kB 8.8MB/s eta 0:00:01[K     |███████████████████████████▎    | 51kB 7.6MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 4.2MB/s 
Collecting zope.interface
[?25l  Downloading https://files.pythonhosted.org/packages/bb/a7/94e1a92c71436f934cdd2102826fa041c83dcb7d21dd0f1fb1a57f6e0620/zope.interface-5.4.0-cp37-cp37m-manylinux2010_x86_64.whl (251kB)
[K     |████████████████████████████████| 256kB 8.8MB/s 
Installing collected packages: zope.interface, datetime
Successfully installed datetime-4.3 zope.interface-5.4.0


In [2]:
# This is a config file for the project, it will contain all configurable variables in one place
# Once development of the code is complete, this should be the only file that needs to be edited.
import datetime
# Set to True if you want to print some information about the model and
# view sample images from the dataset
VISUALIZATION = False

# Set to True if you want to see the histogram for the dataset
HISTOGRAM = False

# Set to True to apply data Augmentation to the dataset before training
# Options:
DATA_AUGMENTATION = False

# Select the Model you want to train
# Options:
MODEL = 5

# Specify the number of epochs for training
EPOCHS = 1

# Specify the batch size for pre-processing and training
# Ex: 32, 64, 128
BATCH = 32

# Below are the Variables dependant on the Model selected
if MODEL == 1 and DATA_AUGMENTATION is False:
    # Using Model 1 without data augmentation
    MODEL_ID = 1
    LOG_DIR = "./tmp/logs/model1"
    MODEL_NAME = "cnn_model_1"

elif MODEL == 1 and DATA_AUGMENTATION is not False:
    # Using Model 1 with data augmentation
    MODEL_ID = 2
    LOG_DIR = "./tmp/logs/model1_data_aug"
    MODEL_NAME = "cnn_model_1_data_aug"

elif MODEL == 2 and DATA_AUGMENTATION is False:
    # Using Model 2 without data augmentation
    MODEL_ID = 3
    LOG_DIR = "./tmp/logs/model2"
    MODEL_NAME = "cnn_model_2"

elif MODEL == 2 and DATA_AUGMENTATION is not False:
    # Using Model 2 with data augmentation
    MODEL_ID = 4
    LOG_DIR = "./tmp/logs/model2_data_aug"
    MODEL_NAME = "cnn_model_2_data_aug"

elif MODEL == 5 and DATA_AUGMENTATION is False:
    # Using Model 2 (Transfer Learning VGG16) without data augmentation
    MODEL_ID = 5
    LOG_DIR = "./tmp/logs/model_TL"
    MODEL_NAME = "TL_model_VGG16"

else:
    # Error Did not Select a Model
    print("ERROR: Value entered for MODEL is incorrect. Received: ", MODEL, "\n EXPECTING: 1 or 2")
    exit(500)

LOG_FILE = LOG_DIR + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# **Utils**

In [3]:
# This file contains common functions for data processing independent of the dataset and model used.
# Not all of these functions are required to be used in the project
import tensorflow as tf
#import config

# Loading Tensorboard Logging dir and file
log_dir = LOG_DIR
log_file = LOG_FILE
model_name = MODEL_NAME

# ------- THE FUNCTIONS BELOW ARE USED IN THIS PROJECT ------- #
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

data_augmentation_flip_rotate = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2)])

data_scaling_resizing = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.Resizing(512, 512),
    tf.keras.layers.experimental.preprocessing.Rescaling(1. / 255)])

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

monitor_func = tf.keras.callbacks.ModelCheckpoint(model_name, monitor='val_loss',
                                                  verbose=0, save_best_only=True,
                                                  save_weights_only=True, mode='min')
# Learning rate schedule
def scheduler(epoch, lr):
    if epoch % 10 == 0:
        lr = lr / 2
    return lr

lr_schedule = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=0)

### The functions below will not be used but are here kept anyway

In [4]:
# ------- THE FUNCTIONS BELOW WILL NoT be USED IN THIS PROJECT ------- #
# ------- They Did not work with our dataset


# Shuffle indexes of given dataset and labels
def shuffle_indexes(X, Y):
    import numpy as np
    indexes = np.arange(X.shape[0], dtype=int)
    np.random.shuffle(indexes)
    X_new = X[indexes]
    Y_new = Y[indexes]
    return X_new, Y_new


# Splitting the given dataset (dataset_X, dataset_Y) into two portions
# dataset_X is the data values and dataset_Y are the corresponding labels
# (X_LG, Y_LG) will have the first {percent*100}% of the dataset and
# (X_SM, Y_SM) will have the last {1 - percent}*100% of the dataset
def split_dataset(dataset_X, dataset_Y, percent):
    # Calculate splitting index
    nsplit = int(percent * dataset_X.shape[0])

    # split dataset into
    X_LG = dataset_X[:nsplit]
    Y_LG = dataset_Y[:nsplit]
    X_SM = dataset_X[nsplit:]
    Y_SM = dataset_Y[nsplit:]
    return X_LG, Y_LG, X_SM, Y_SM


# Returns One Hot Encoding for given train, validate and test dataset labels or None
def one_hot_encoding(train=None, validate=None, test=None):
    train_oh = None
    validate_oh = None
    test_oh = None
    if train is not None:
        train_oh = tf.keras.utils.to_categorical(train)
    if validate is not None:
        validate_oh = tf.keras.utils.to_categorical(validate)
    if test is not None:
        test_oh = tf.keras.utils.to_categorical(test)
    return train_oh, validate_oh, test_oh


# returns normalized dataset values
# norm_type = 0 -> min-max; norm_type = 1 -> standardization
def normalise_data(train, val, test, norm_type=0):
    if norm_type == 0:
        X_train = train / 255
        X_val = val / 255
        X_test = test / 255
    else:
        train_mean, train_std = train.mean(), train.std()
        X_train = (train - train_mean) / train_std
        X_val = (val - train_mean) / train_std
        X_test = (test - train_mean) / train_std
    return X_train, X_val, X_test


# Resize Images from TFDS Dataset
def resize_dataset(img, label):
    img = tf.image.resize(img, (512, 512))
    return img, label


# Extract Images and Labels in Dataset
def feature_extraction(ds):
    img = []
    lbl = []
    for i in ds:
        img.append(i[0])
        lbl.append(i[1])
    return img, lbl


# Normalizes Images in Dataset using min-max method
def dataset_normalization_min_max(img, lbl):
    img = img / 255
    return img, lbl


# Normalizes Images in Dataset using Standardization method
def dataset_normalization_std(img, lbl):
    mean, std = img.mean(), img.std()
    img = (img - mean) / std
    return img, lbl


# Augment Images in Dataset using random flip method
def dataset_augmentation_flip(img, lbl):
    return tf.image.random_flip_left_right(img), lbl


# Augment Images in Dataset using random contrast method
def dataset_augmentation_contrast(img, lbl):
    return tf.image.random_contrast(img, lower=0.0, upper=1.0), lbl

# **CNN Model**

Note that since transfer learning has a coarse fit and fine tune it is split up into seperate cells. These are defined below under training

In [5]:
import datetime

import tensorflow as tf
#import config

model_name = MODEL_NAME


# Different CNN are defined below
# After determining which architecture is better (model 1 or model 2)
# TODO apply different dropouts to the best one

def model_1(num_class, k=128):

    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(16, (3, 3), input_shape=(512, 512, 3), data_format="channels_last", padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D((2, 2)),
        tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(k, activation='relu'),
        tf.keras.layers.Dense(num_class, activation='softmax')
    ])

    return model


# CNN From Assignment 2
def model_2(k=101):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(48, (3, 3), input_shape=(512, 512, 3), padding='same', activation='relu'),
        tf.keras.layers.Conv2D(48, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D((2, 2)),
        tf.keras.layers.Conv2D(96, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.Conv2D(96, (3, 3), padding='same', activation='relu'),
        tf.keras.layers.MaxPool2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(k, activation='softmax')
    ])

    return model

def compile_model(model,  lr=1e-4, optim=0):
    if optim == 1:
        optimizer_cnn = tf.keras.optimizers.SGD(learning_rate=lr)
    else:
        optimizer_cnn = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=optimizer_cnn, loss='categorical_crossentropy',
                  metrics=["accuracy"])
    return model



# **Dataset Prep**

load dataset and get things ready

In [6]:
import matplotlib.pylab as plt
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
#import utils
#import CNN
#import config

# Load Variables from Configuration File
VISUALIZE_IMG = VISUALIZATION
HISTOGRAM = HISTOGRAM
norm_type = "min-max"
data_augment = DATA_AUGMENTATION
MODEL = MODEL_ID
EPOCHS = EPOCHS
LOG_DIR = LOG_DIR
LOG_FILE = LOG_FILE
MODEL_NAME = MODEL_NAME
batch = BATCH
num_classes = 101

In [None]:
# Get GPU Working with CuDNN
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

## **Loading the Dataset**

In [7]:
# ---------- LOADING Dataset ---------- #
# Loading Food 101 train validation and test datasets with shuffled indexes  'validation',
(food101_ds_train, food101_ds_val, food101_ds_test), metadata = tfds.load('food101',
                                                                          split=['train[:86%]', 'validation', 'train[-14%:]'],
                                                                          shuffle_files=False, as_supervised=True, with_info=True) #swi
print("Split Keys: ", list(metadata.splits.keys()))
print("info.features: ", metadata.features)
print("train type before: ", type(food101_ds_train))
print("Num of Classes: ", metadata.features["label"].num_classes)
print("Lengths: ", len(food101_ds_train), len(food101_ds_val), len(food101_ds_test))
exit(0)
assert isinstance(food101_ds_train, tf.data.Dataset), "Training dataset is not a TF Dataset"
assert isinstance(food101_ds_val, tf.data.Dataset), "Validation dataset is not a TF Dataset"
assert isinstance(food101_ds_test, tf.data.Dataset), "Test dataset is not a TF Dataset"

get_label_name = metadata.features['label'].int2str
num_training = tf.data.experimental.cardinality(food101_ds_train).numpy()
print("Num training images: ", str(num_training))
print("Num Val images: ", str(tf.data.experimental.cardinality(food101_ds_val).numpy()))
print("Num test images: ", str(tf.data.experimental.cardinality(food101_ds_test).numpy()))

[1mDownloading and preparing dataset food101/2.0.0 (download: 4.65 GiB, generated: Unknown size, total: 4.65 GiB) to /root/tensorflow_datasets/food101/2.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Extraction completed...', max=1.0, styl…









HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/food101/2.0.0.incomplete0D9LEA/food101-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=75750.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/food101/2.0.0.incomplete0D9LEA/food101-validation.tfrecord


HBox(children=(FloatProgress(value=0.0, max=25250.0), HTML(value='')))

[1mDataset food101 downloaded and prepared to /root/tensorflow_datasets/food101/2.0.0. Subsequent calls will reuse this data.[0m
Split Keys:  ['train', 'validation']
info.features:  FeaturesDict({
    'image': Image(shape=(None, None, 3), dtype=tf.uint8),
    'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=101),
})
train type before:  <class 'tensorflow.python.data.ops.dataset_ops.PrefetchDataset'>
Num of Classes:  101
Lengths:  65145 25250 10605
Num training images:  65145
Num Val images:  25250
Num test images:  10605


In [1]:
# ---------- Pre-Processing Dataset ---------- #
# Resizing training and validation datasets to be (512, 512, 3)
# Applying One-Hot encoding to Labels
# DO NOT APPLY THIS TO THE TESTING SET
print("train type before: ", type(food101_ds_train))
food101_ds_train = food101_ds_train.map(lambda im_t, l_t: (data_scaling_resizing(im_t, training=True), l_t))
food101_ds_train = food101_ds_train.map(lambda im_t, l_t: (im_t, tf.one_hot(l_t,depth=101)))
food101_ds_val = food101_ds_val.map(lambda im_v, l_v: (data_scaling_resizing(im_v, training=True), l_v))
food101_ds_val = food101_ds_val.map(lambda im_v, l_v: (im_v, tf.one_hot(l_v,depth=101)))
food101_ds_train = food101_ds_train.batch(batch)
food101_ds_val = food101_ds_val.batch(batch)
print("train type after: ", type(food101_ds_train))

i = 0
for img, lbl in food101_ds_train:
    i += 1
    print("Train take one: ", img.shape, " lbl shape: ", lbl.shape, " lbl class: ", type(lbl) )
    if i > 4:
        break

NameError: ignored

# **Training**

In [2]:
# ---------- Training Models ---------- #
print("Training Model ", MODEL, "  Norm Type: ", norm_type, "  Data Aug: ", data_augment)
k = 128
optimizer = 0
learning_rate = 1e-4
if MODEL == 1:
    model1 = model_1(num_classes, k)  
    model1 = compile_model(model1, learning_rate, optimizer)
    history1 = model1.fit(food101_ds_train, validation_data=food101_ds_val,
                          epochs=EPOCHS, verbose=1, batch_size=batch,
                          callbacks=[early_stop, monitor_func,
                                     lr_schedule, tensorboard_callback])
elif MODEL == 2:
    food101_ds_train = food101_ds_train.map(lambda im_t, l_t: (data_augmentation_flip_rotate(im_t, training=True), l_t))
    food101_ds_val = food101_ds_val.map(lambda im_v, l_v: (data_augmentation_flip_rotate(im_v, training=True), l_v))

    model2 = model_1(num_classes, k)
    model2 = compile_model(model2, learning_rate, optimizer)
    history2 = model2.fit(food101_ds_train, validation_data=food101_ds_val,
                          epochs=EPOCHS, verbose=1,
                          callbacks=[early_stop, monitor_func,
                                     lr_schedule, tensorboard_callback])
elif MODEL == 3:
    model3 = model_2(num_classes)
    model3 = compile_model(model3, learning_rate, optimizer)
    history3 = model3.fit(food101_ds_train, validation_data=food101_ds_val,
                          epochs=EPOCHS, verbose=1,
                          callbacks=[early_stop, monitor_func,
                                     lr_schedule, tensorboard_callback])
elif MODEL == 4:
    food101_ds_train = food101_ds_train.map(lambda im_t, l_t: (data_augmentation_flip_rotate(im_t, training=True), l_t))
    food101_ds_val = food101_ds_val.map(lambda im_v, l_v: (data_augmentation_flip_rotate(im_v, training=True), l_v))

    model4 = model_2(num_classes)
    model4 = compile_model(model4, learning_rate, optimizer)
    history4 = model4.fit(food101_ds_train, validation_data=food101_ds_val,
                          epochs=EPOCHS, verbose=1,
                          callbacks=[early_stop, monitor_func,
                                     lr_schedule, tensorboard_callback])
# Transfer Learning
elif MODEL ==5:
        print("Tranfer learning selected. Proceed to next cell") # seperate from other models due to coarse and mine model training needed

else:
  print("error! no valid model selected")

# TensorBoard Logging ------- Optional
writer = tf.summary.create_file_writer(LOG_DIR)

NameError: ignored

## **Transfer Learning**

In [None]:
if MODEL == 5: #Transfer Learning model setup (VGG16)
  img_height = 512
  img_width = 512

  base_VGG16 = tf.keras.applications.VGG16( # Base model must have unique name as multiple models are in play
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    input_shape=(img_height, img_width, 3), # Recieves input shape as specified
    include_top=False) # removes prediction layer so we can add our own

  base_VGG16.trainable = False # freezes model weights so we can train a new prediction layer
  input_image = tf.keras.Input(shape=(img_height, img_width, 3)) # define input size for model
  x1 = base_VGG16(input_image, training=False) # this layer is the VGG16 model we brought in without predicting layer
  x2 = tf.keras.layers.Flatten()(x1) #flattens VGG16 output for dense prediction layer
  out = tf.keras.layers.Dense(num_classes,activation = 'softmax')(x2)
  model_VGG16 = tf.keras.Model(inputs = input_image, outputs =out) # Model must have unique name as multiple models are in play (VGG16, CNN)

  print(model_VGG16.summary())
else:
  print("Transfer learning not selected")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 512, 512, 3)]     0         
_________________________________________________________________
vgg16 (Functional)           (None, 16, 16, 512)       14714688  
_________________________________________________________________
flatten (Flatten)            (None, 131072)            0         
_________________________________________________________________
dense (Dense)                (None, 101)               13238373  
Total params: 27,953,061
Trainable params: 13,238,373
Non-trainable params: 14,714,688
_________________________________________________________________
None


In [None]:
if MODEL == 5: #load in weights if training is broken into sessions due to long training times
  print("Transfer learning selected")
  #from numpy import loadtxt
  #from tensorflow.keras.models import load_model

  #model_VGG16 = load_model("VGG16_Team_18_Final_Project__pft_apr12.h5") #change date, pft = pre fine tuning
  ##check to make sure it worked
  #model_VGG16.summary()

else:
  print("Transfer learning not selected")


Transfer learning selected


In [None]:
if MODEL == 5: #compile and fit model (Coarse fit)
  model_VGG16.compile(optimizer=tf.keras.optimizers.Adam(lr = 1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


  model_VGG16.fit(food101_ds_train,epochs = EPOCHS, \
        verbose = 1, callbacks= [early_stop, monitor_func, lr_schedule],validation_data= food101_ds_val) # check that data names are correct

else:
  print("Transfer learning not selected")

NameError: ignored

In [None]:
if MODEL == 5: #compile and fit model
  print("Transfer learning selected")
  ##save VGG16 model weights - to be used if training is broken into sessions
  #model_VGG16.save("VGG16_Team_18_Final_Project__pft_apr12.h5.h5") #ensure date correct

  #print("saved model weights")

else:
  print("Transfer learning not selected")

Transfer learning selected


### Transfer Learning Fine Tuning

In [None]:
if MODEL == 5: #Fine tuning model
  model_VGG16.trainable = True
  #model_VGG16 = load_model('VGG16_Team_18_Final_Project_pft_apr12.h5') #change date -to be used if training is broken into sessions or just training fine tunin, pft = pre fine tuning
  model_VGG16.compile(optimizer=tf.keras.optimizers.Adam(lr = 1e-9), # Very low learning rate to fine tune
               loss='categorical_crossentropy',
               metrics=['accuracy'])
  print(model_VGG16.summary())
  model_VGG16.fit(food101_ds_train, epochs = EPOCHS, \
            verbose = 1, callbacks= [early_stop, monitor_func, lr_schedule],validation_data=food101_ds_val)

else:
  print("Transfer learning not selected")

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 512, 512, 3)]     0         
_________________________________________________________________
vgg16 (Functional)           (None, 16, 16, 512)       14714688  
_________________________________________________________________
flatten (Flatten)            (None, 131072)            0         
_________________________________________________________________
dense (Dense)                (None, 101)               13238373  
Total params: 27,953,061
Trainable params: 27,953,061
Non-trainable params: 0
_________________________________________________________________
None


NameError: ignored

In [None]:
if MODEL == 5: #Save Model weights after fine tuning
  print("Transfer learning selected")
  #model_VGG16.save('VGG16_Team_18_Final_Project_aft_apr12.h5') #saves weigths, aft = after fine tuning

  #print("saved model weights")

else:
  print("Transfer learning not selected")

Transfer learning selected


# **Visualization**

In [None]:
# ---------- Visualizing Dataset ---------- #
if VISUALIZE_IMG is True:
    # Method 1 of splitting Dataset to Images and Labels
    # train_X, train_Y = tuple(zip(*food101_ds_train))
    # val_X, val_Y = tuple(zip(*food101_ds_val))
    # test_X, test_Y = tuple(zip(*food101_ds_test))

    # Method 2 of Splitting ataset to Images and Labels
    train_X, train_Y = feature_extraction(food101_ds_train)
    val_X, val_Y = feature_extraction(food101_ds_val)
    test_X, test_Y = feature_extraction(food101_ds_test)

    # Printing some information about the Dataset
    print("info.features: ", metadata.features)
    print("Split Keys: ", list(metadata.splits.keys()))
    # print("Num of Training examples 1%: ", info.splits['validation[1%:]'].num_examples)
    print("Num of Classes: ", metadata.features["label"].num_classes)
    # print("Classes: ", info.features["label"].names)

    # Displaying Figure of samples from Dataset
    fig2 = tfds.show_examples(food101_ds_train, metadata, rows=5)

    # Displaying Figure of samples from Dataset using PyPlot
    i = 0
    plt.figure(1)
    for img, lbl in food101_ds_train:
        i = i + 1
        print("Train take one: ", img.shape, int(lbl))
        ax = plt.subplot(2, 2, i)
        plt.imshow(img.numpy().astype("uint8"))
        ax.set_title(get_label_name(lbl))
        plt.axis("off")
        if i >= 4:
            break

    # Displaying Figure of samples from Dataset using PyPlot
    # and using the Images and Labels dataset instead of the combined one (for comparison)
    i = 0
    plt.figure(2)
    for item in train_X:
        i = i + 1
        print("Train img: ", item.shape)
        ax = plt.subplot(2, 2, i)
        plt.imshow(item.numpy().astype("uint8"))
        plt.axis("off")
        if i >= 4:
            break

    i = 0
    for item in train_Y:
        i = i + 1
        print("Train lbl: ", int(item), "  name: ", get_label_name(item))
        if i >= 4:
            break
    plt.show()

    # Displaying the Histogram
    if HISTOGRAM is True:
        sample_indexes = np.random.choice(np.arange(len(train_X), dtype=int), size=10, replace=False)
        plt.figure()
        for (ii, jj) in enumerate(sample_indexes):
            plt.subplot(5, 6, ii + 1)
            plt.imshow(train_X[jj], cmap="gray")
            plt.title("Label: %d" % train_Y[jj])
        plt.show()

In [None]:
import csv

with open ('food101.csv') as f:
  d = dict (filter(None, csv.reader(f)))