In [111]:
import gzip
import numpy as np
import pandas as pd
from time import time

from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
import keras.layers as layers
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.callbacks import TensorBoard

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from requests import get

2020-04-21 19:45:43,299 - matplotlib.pyplot - DEBUG - Loaded backend module://ipykernel.pylab.backend_inline version unknown.


In [112]:
def download_file(url, file_name):
    with open(file_name, "wb") as file:
        response = get(url)
        file.write(response.content)

In [113]:
nb_classes = 10

In [114]:
def read_mnist(images_path: str, labels_path: str):
    with gzip.open(labels_path, 'rb') as labelsFile:
        labels = np.frombuffer(labelsFile.read(), dtype=np.uint8, offset=8)

    with gzip.open(images_path,'rb') as imagesFile:
        length = len(labels)
        # Load flat 28x28 px images (784 px), and convert them to 28x28 px
        features = np.frombuffer(imagesFile.read(), dtype=np.uint8, offset=16) \
                        .reshape(length, 784) \
                        .reshape(length, 28, 28, 1)
        
    return features, labels

In [115]:
download_file('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', 'train-images-idx3-ubyte.gz')
download_file('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 'train-labels-idx1-ubyte.gz')
download_file('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', 't10k-images-idx3-ubyte.gz')
download_file('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 't10k-labels-idx1-ubyte.gz')

2020-04-21 19:45:44,130 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): yann.lecun.com:80
2020-04-21 19:45:44,787 - urllib3.connectionpool - DEBUG - http://yann.lecun.com:80 "GET /exdb/mnist/train-images-idx3-ubyte.gz HTTP/1.1" 200 9912422
2020-04-21 19:45:46,477 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): yann.lecun.com:80
2020-04-21 19:45:46,937 - urllib3.connectionpool - DEBUG - http://yann.lecun.com:80 "GET /exdb/mnist/train-labels-idx1-ubyte.gz HTTP/1.1" 200 28881
2020-04-21 19:45:46,942 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): yann.lecun.com:80
2020-04-21 19:45:47,318 - urllib3.connectionpool - DEBUG - http://yann.lecun.com:80 "GET /exdb/mnist/t10k-images-idx3-ubyte.gz HTTP/1.1" 200 1648877
2020-04-21 19:45:48,144 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): yann.lecun.com:80
2020-04-21 19:45:48,502 - urllib3.connectionpool - DEBUG - http://yann.lecun.com:80 "GET /exdb/mnist/t10k-lab

In [116]:
train = {}
test = {}

train['features'], train['labels'] = read_mnist('train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz')
test['features'], test['labels'] = read_mnist('t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz')

In [117]:
print('# of training images:', train['features'].shape[0])
print('# of test images:', test['features'].shape[0])

# of training images: 60000
# of test images: 10000


In [118]:
# Pad images with 0s
train['features']      = np.pad(train['features'], ((0,0),(2,2),(2,2),(0,0)), 'constant')
test['features']       = np.pad(test['features'], ((0,0),(2,2),(2,2),(0,0)), 'constant')
    
print("Updated Image Shape: {}".format(train['features'][0].shape))

Updated Image Shape: (32, 32, 1)


In [119]:
#LeNet-5 as teacher
model = keras.Sequential()

model.add(layers.Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(32,32,1)))
model.add(layers.AveragePooling2D())

model.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
model.add(layers.AveragePooling2D())

model.add(layers.Flatten())

model.add(layers.Dense(units=120, activation='relu'))

model.add(layers.Dense(units=84, activation='relu'))

model.add(layers.Dense(units=10, activation = 'softmax'))

model.summary()

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_7 (Average (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_8 (Average (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_17 (Flatten)         (None, 576)               0         
_________________________________________________________________
dense_38 (Dense)             (None, 120)               69240     
_________________________________________________________________
dense_39 (Dense)             (None, 84)              

In [120]:
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])

In [121]:
EPOCHS = 10
BATCH_SIZE = 128

In [122]:
X_train, y_train = train['features'], to_categorical(train['labels'])
X_test, y_test = test['features'], to_categorical(test['labels'])

train_generator = ImageDataGenerator().flow(X_train, y_train, batch_size=BATCH_SIZE)
test_generator = ImageDataGenerator().flow(X_test, y_test, batch_size=BATCH_SIZE)

In [123]:
print('# of training images:', train['features'].shape[0])
print('# of validation images:', test['features'].shape[0])

steps_per_epoch = X_train.shape[0]//BATCH_SIZE
test_steps = X_test.shape[0]//BATCH_SIZE

tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, 
                    validation_data=test_generator, validation_steps=test_steps, 
                    shuffle=True, callbacks=[tensorboard])

# of training images: 60000
# of validation images: 10000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x1a661a8650>

In [150]:
student = Sequential()
student.add(Flatten(input_shape=(32,32,1)))
student.add(Dense(32, activation='relu'))
student.add(Dropout(0.2))
student.add(Dense(nb_classes))
student.add(Activation('softmax'))

student.summary()

Model: "sequential_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_20 (Flatten)         (None, 1024)              0         
_________________________________________________________________
dense_46 (Dense)             (None, 32)                32800     
_________________________________________________________________
dropout_18 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_47 (Dense)             (None, 10)                330       
_________________________________________________________________
activation_28 (Activation)   (None, 10)                0         
Total params: 33,130
Trainable params: 33,130
Non-trainable params: 0
_________________________________________________________________


In [151]:
temp = 1

teacher_WO_Softmax = Model(model.input, model.get_layer('dense_40').output)

In [152]:
def softmax(x):
    return np.exp(x)/(np.exp(x).sum())

In [153]:
teacher_train_logits = teacher_WO_Softmax.predict(X_train)
teacher_test_logits = teacher_WO_Softmax.predict(X_test) 
train_logits_T = teacher_train_logits/temp
test_logits_T = teacher_test_logits / temp 

Y_train_soft = softmax(train_logits_T)
Y_test_soft = softmax(test_logits_T)

Y_train_new = np.concatenate([Y_train, Y_train_soft], axis=1)
Y_test_new =  np.concatenate([Y_test, Y_test_soft], axis =1)

In [154]:
Y_train_new.shape

(60000, 20)

In [155]:
Y_test_new.shape

(10000, 20)

In [156]:
X_train.shape


(60000, 32, 32, 1)

In [157]:
Y_train_new[0]

array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 1.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 1.4228697e-06, 1.4228697e-06,
       1.4228697e-06, 1.4776838e-06, 1.4228697e-06, 3.7242855e-06,
       1.4228697e-06, 1.4228697e-06, 1.4228707e-06, 1.4228697e-06],
      dtype=float32)

In [158]:
student.layers.pop()


logits = student.layers[-1].output 
probs = Activation('softmax')(logits)

logits_T = Lambda(lambda x: x / temp)(logits)
probs_T = Activation('softmax')(logits_T)

output = concatenate([probs, probs_T])

student = Model(student.input, output)

student.summary()

Model: "model_15"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
flatten_20_input (InputLayer)   (None, 32, 32, 1)    0                                            
__________________________________________________________________________________________________
flatten_20 (Flatten)            (None, 1024)         0           flatten_20_input[0][0]           
__________________________________________________________________________________________________
dense_46 (Dense)                (None, 32)           32800       flatten_20[0][0]                 
__________________________________________________________________________________________________
dropout_18 (Dropout)            (None, 32)           0           dense_46[0][0]                   
___________________________________________________________________________________________

In [159]:
def knowledge_distillation_loss(y_true, y_pred, alpha):
    y_true, y_true_softs = y_true[: , :nb_classes], y_true[: , nb_classes:]
    
    y_pred, y_pred_softs = y_pred[: , :nb_classes], y_pred[: , nb_classes:]
    
    loss = alpha*logloss(y_true,y_pred) + logloss(y_true_softs, y_pred_softs)
    
    return loss

def acc(y_true, y_pred):
    y_true = y_true[:, :nb_classes]
    y_pred = y_pred[:, :nb_classes]
    return categorical_accuracy(y_true, y_pred)

In [160]:
student.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])

In [162]:
student.fit(X_train, Y_train_new,
          batch_size=BATCH_SIZE,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, Y_test_new))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x1a8802c690>