<a href="https://colab.research.google.com/github/sp7412/colab/blob/master/distilling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np

from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.models import Model
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import RMSprop

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
def load_mnist():
  (x_train, y_train), (x_test, y_test) = mnist.load_data()
  x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.
  x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.

  y_train = to_categorical(y_train.astype('float32'))
  y_test = to_categorical(y_test.astype('float32'))

  return (x_train, y_train), (x_test, y_test)

In [None]:
def build_simple_model():
  inputs = layers.Input(shape=(28, 28, 1))
  x = layers.Conv2D(32, kernel_size=(3,3), activation='relu', name='Conv1')(inputs)
  x = layers.Conv2D(64, kernel_size=(3,3), activation='relu', name='Conv2')(x)
  
  x = layers.MaxPooling2D(pool_size=(2,2), name='MaxPool')(x)
  x = layers.Dropout(rate=0.25, name='Dropout1')(x)
  
  x = layers.Flatten(name='Flat')(x)
  x = layers.Dense(128, activation='relu', name='FC1')(x)
  x = layers.Dropout(rate=0.5, name='Dropout2')(x)
  
  x = layers.Dense(10, name='logits')(x)
  preds = layers.Activation('softmax', name='Softmax')(x)
  
  model = Model(inputs=inputs, outputs=preds)
  model.summary()
  return model

In [None]:
model = build_simple_model()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
Conv1 (Conv2D)               (None, 26, 26, 32)        320       
_________________________________________________________________
Conv2 (Conv2D)               (None, 24, 24, 64)        18496     
_________________________________________________________________
MaxPool (MaxPooling2D)       (None, 12, 12, 64)        0         
_________________________________________________________________
Dropout1 (Dropout)           (None, 12, 12, 64)        0         
_________________________________________________________________
Flat (Flatten)               (None, 9216)              0         
_________________________________________________________________
FC1 (Dense)                  (None, 128)              

In [None]:
(x_train, y_train), (x_test, y_test) = load_mnist()

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=15, batch_size=512)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7fb620138d30>

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

Test Loss: 0.029050899669528008
Test Accuracy: 0.9919999837875366


In [None]:
def softmax_with_temperature(logits, temperature=1):
  logits = logits / temperature
  return np.exp(logits) / np.sum(np.exp(logits))

In [None]:
model_sans_softmax = Model(inputs=model.input, outputs=model.get_layer("logits").output)
model_logits = model_sans_softmax.predict(x_train)

In [None]:
unsoftened_train_prob = softmax_with_temperature(model_logits, 1)

In [None]:
def build_small_model():
  inputs = layers.Input(shape=(28, 28, 1))
  x = layers.Flatten()(inputs)
  x = layers.Dense(128, activation='relu', name='FC1')(x)
  x = layers.Dense(128, activation='relu', name='FC2')(x)
  x = layers.Dense(10, name='logits')(x)
  preds = layers.Activation('softmax', name='Softmax')(x)

  model = Model(inputs=inputs, outputs=preds)
  model.summary()
  return model

In [None]:
small_model = build_small_model()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
FC1 (Dense)                  (None, 128)               100480    
_________________________________________________________________
FC2 (Dense)                  (None, 128)               16512     
_________________________________________________________________
logits (Dense)               (None, 10)                1290      
_________________________________________________________________
Softmax (Activation)         (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
________________________________________________

In [None]:
small_model.compile(optimizer=RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
small_model.fit(x_train, y_train, epochs=50, batch_size=512)

test_loss, test_acc = small_model.evaluate(x_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 0.13884003460407257
Test Accuracy: 0.9803000092506409


In [None]:
model_sans_softmax = Model(inputs=model.input, outputs=model.get_layer("logits").output)
model_logits = model_sans_softmax.predict(x_train)
temperature = 4.0
softened_train_prob = softmax_with_temperature(model_logits, temperature)

In [None]:
new_small_model = build_small_model()

Model: "functional_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 784)               0         
_________________________________________________________________
FC1 (Dense)                  (None, 128)               100480    
_________________________________________________________________
FC2 (Dense)                  (None, 128)               16512     
_________________________________________________________________
logits (Dense)               (None, 10)                1290      
_________________________________________________________________
Softmax (Activation)         (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_______________________________________________

In [None]:
logits = new_small_model.get_layer('logits').output
logits = layers.Lambda(lambda x: x / temperature, name='Temperature')(logits)
preds = layers.Activation('softmax', name='Softmax')(logits)
  
new_small_model = Model(inputs=new_small_model.input, outputs=preds)
new_small_model.summary()

Model: "functional_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 784)               0         
_________________________________________________________________
FC1 (Dense)                  (None, 128)               100480    
_________________________________________________________________
FC2 (Dense)                  (None, 128)               16512     
_________________________________________________________________
logits (Dense)               (None, 10)                1290      
_________________________________________________________________
Temperature (Lambda)         (None, 10)                0         
_________________________________________________________________
Softmax (Activation)         (None, 10)              

In [None]:
new_small_model.compile(optimizer=RMSprop(lr=0.03), loss='categorical_crossentropy', metrics=['accuracy'])
new_small_model.fit(x_train, softened_train_prob, epochs=100, batch_size=512)

test_loss, test_acc = new_small_model.evaluate(x_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78