In [1]:
from __future__ import print_function, division

from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.datasets import cifar10
from keras.callbacks import LearningRateScheduler
from keras.models import model_from_json
from keras.models import load_model
import numpy as np
import keras
from keract import get_activations
import math
import time
import matplotlib.pyplot as plt
import sys
import tensorflow as tf
'''
Function that returns the trainand test data of the CIFAR10 already preprocessed
'''
def getCIFAR10():
    # input image dimensions
    img_rows, img_cols = 32, 32
    num_classes = 10

    # the data, split between train and test sets
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    
    # format of the tensor
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 3, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols)
        input_shape = (3, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
        input_shape = (img_rows, img_cols, 3)

    # convert in to float the images
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    # new normalization with z-score
    mean = np.mean(x_train,axis=(0,1,2,3))
    std = np.std(x_train,axis=(0,1,2,3))
    x_train = (x_train-mean)/(std+1e-7)
    x_test = (x_test-mean)/(std+1e-7)
    
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    print('CIFAR10 loaded')
    return x_train,y_train,x_test,y_test

'''
Small function that returns the shape of the CIFAR10 images
'''
def getCIFAR10InputShape():
    img_rows, img_cols = 32, 32
    if K.image_data_format() == 'channels_first':
        input_shape = (3, img_rows, img_cols)
    else:
        input_shape = (img_rows, img_cols, 3)
        
    return input_shape

'''
Function that loads from a file the teacher
'''
def getTeacher(file_name):
    # Model reconstruction from JSON file
    with open(file_name + '.json', 'r') as f:
        model = model_from_json(f.read())

    # Load weights into the new model
    model.load_weights(file_name + '.h5')
    
    print('Teacher loaded from' + file_name + '.h5')
    return model
    
'''
Function that loads from a file the teacher and test it on the CIRAF10 dataset
'''
def testTeacher(file_name):
    
    x_train,y_train,x_test,y_test = getCIFAR10()
    
    model = getTeacher(file_name)
    
    # define optimizer
    opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=opt_rms,
                  metrics=['accuracy'])

    # final evaluation on test
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Teacher test loss:', score[0])
    print('Teacher test accuracy:', score[1])

    
'''
Function that returns a simple student done by 2 convolutions, a maxpool and a final two fully connected layers
'''
def getSimpleStudent(input_shape):
    num_classes = 10
    #model definition
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    print('Simple student loaded')
    return model
    
'''
Function to try to train the simple sutdent in order to unerstand its capabilites
'''
def trainSimpleStudent(epochs):
    
    x_train,y_train,x_test,y_test = getCIFAR10()
    
    input_shape = getCIFAR10InputShape()
    
    model = getSimpleStudent(input_shape)
    
    model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
    
    batch_size = 128
    n_batches = math.floor( x_train.shape[0] / batch_size)

    for e in range(epochs):
    
        for i in range(0,n_batches):
            imgs = x_train[i*batch_size:(i+1)*batch_size]
            labels = y_train[i*batch_size:(i+1)*batch_size]
            loss = model.train_on_batch(imgs,labels)
            print("Epoch: " + str(e+1) + " batch " + str(i) + " loss: " + str(loss[0]) + " acc: " + str( 100*loss[1]))
            
        score = model.evaluate(x_test, y_test, verbose=0)
        print('After epoch ' + str(e+1) + ' test loss ' + str(score[0]) + ' test accuracy ' + str(score[1]))

'''
Function that returns a simple generator
'''
def getGenerator():

        noise_shape = (100,)

        model = Sequential()
        
        img_shape = getCIFAR10InputShape()

        model.add(Dense(128, input_shape=noise_shape))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(256))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(np.prod(img_shape), activation='tanh'))
        model.add(Reshape(img_shape))

        noise = Input(shape=noise_shape)
        img = model(noise)
        
        print('Generator loaded')
        return Model(noise, img)

def minus_kld():
    
    keras_kld = tf.keras.losses.KLDivergence() 
        
    def loss(y_true, y_pred):
        
        original_loss = keras_kld(y_true,y_pred)
        loss_to_return = - original_loss
        
        return original_loss
    
    
    return loss
        
def main():
    #testTeacher('model-16-2')
    #trainSimpleStudent(4)
    
    x_train,y_train,x_test,y_test = getCIFAR10()
    
    teacher = getTeacher('model-16-2')
    teacher.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer='adam',
                  metrics=['accuracy'])
    
    input_shape = getCIFAR10InputShape()
    student = getSimpleStudent(input_shape)
    
    student.compile(loss='kld',
                  optimizer='sgd',
                  metrics = ['accuracy'])
    
    generator = getGenerator()
    
    # The generator takes noise as input and generated imgs
    z = Input(shape=(100,))
    img = generator(z)

    # For the combined model we will only train the generator
    student.trainable = False

    # The valid takes generated images as input and determines validity
    valid = student(img)

    # The combined model  (stacked generator and discriminator) takes
    # noise as input => generates images => determines validity
    combined = Model(z, valid)
    combined.compile(loss=minus_kld(), optimizer='adam')
    
    
    n_batches = 1000
    batch_size = 128
    log_freq = 10
    ns = 10
    
    for i in range(n_batches):
        noise = np.random.normal(0, 1, (batch_size, 100))
        gen_imgs = generator.predict(noise)
        
        t_predictions = teacher.predict(gen_imgs)
        
        g_loss = combined.train_on_batch(noise,t_predictions)
        
        s_loss = 0
        for j in range(ns):
            s_loss += student.train_on_batch(gen_imgs,t_predictions)[0]
        
        print('batch ' + str(i) + '/' + str(n_batches) + ' G loss: ' + str(g_loss) + ' S loss: ' + str(s_loss))
        
        if (i % log_freq) == 0:
        # final evaluation on test
            score = student.evaluate(x_test, y_test, verbose=0)
            print('Student test loss: '  + str(score))
        
        
    score = student.evaluate(x_test, y_test, verbose=0)
    print('Student test loss: '  + str(score))
    
    
    '''
    kl_div = tf.keras.losses.KLDivergence()    
    
    for i in range(0,n_batches):
        print('batch ' + str(i))
        imgs = x_train[i*batch_size:(i+1)*batch_size]
        labels = y_train[i*batch_size:(i+1)*batch_size]
        t_predictions = teacher.predict(imgs)
        s_predictions = student.predict(imgs)
        print('teacher predictions: ')
        print(t_predictions)
        print('student predictions: ')
        print(s_predictions)
        
        
        
        # to print the KL divergence
        for j in range(batch_size):
            loss = kl_div(t_predictions[j],s_predictions[j])
            with tf.Session() as sess:
                init = tf.global_variables_initializer()
                sess.run(init)
                print(loss.eval())
        '''





main()


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


CIFAR10 loaded

Teacher loaded frommodel-16-2.h5

Simple student loaded
Generator loaded

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


  'Discrepancy between trainable weights and collected trainable'
  'Discrepancy between trainable weights and collected trainable'


batch 0/1000 G loss: 1.5640485 S loss: 12.02066445350647
Student test loss: [4.892328601074219, 0.10000000149011612]
batch 1/1000 G loss: 0.20010361 S loss: 2.76778544485569
batch 2/1000 G loss: 0.86611784 S loss: 1.874023124575615
batch 3/1000 G loss: 0.889666 S loss: 1.7952301353216171
batch 4/1000 G loss: 0.67435026 S loss: 1.4522293657064438
batch 5/1000 G loss: 0.44669244 S loss: 1.2702387273311615
batch 6/1000 G loss: 0.30275822 S loss: 1.1187561228871346
batch 7/1000 G loss: 0.19647768 S loss: 1.0229307860136032
batch 8/1000 G loss: 0.1623143 S loss: 0.9493248090147972
batch 9/1000 G loss: 0.124591246 S loss: 0.8769833222031593
batch 10/1000 G loss: 0.110888615 S loss: 0.8893177434802055
Student test loss: [3.8602321464538574, 0.10000000149011612]
batch 11/1000 G loss: 0.11092573 S loss: 0.8281595781445503
batch 12/1000 G loss: 0.09544526 S loss: 0.8888693749904633
batch 13/1000 G loss: 0.09714234 S loss: 0.8194840103387833
batch 14/1000 G loss: 0.08662433 S loss: 0.823203854262

In [2]:
from __future__ import print_function, division

from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.datasets import cifar10
from keras.callbacks import LearningRateScheduler
from keras.models import model_from_json
from keras.models import load_model
import numpy as np
import keras
from keract import get_activations
import math
import time
import matplotlib.pyplot as plt
import sys
import tensorflow as tf
'''
Function that returns the trainand test data of the CIFAR10 already preprocessed
'''
def getCIFAR10():
    # input image dimensions
    img_rows, img_cols = 32, 32
    num_classes = 10

    # the data, split between train and test sets
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    
    # format of the tensor
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 3, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols)
        input_shape = (3, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
        input_shape = (img_rows, img_cols, 3)

    # convert in to float the images
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    # new normalization with z-score
    mean = np.mean(x_train,axis=(0,1,2,3))
    std = np.std(x_train,axis=(0,1,2,3))
    x_train = (x_train-mean)/(std+1e-7)
    x_test = (x_test-mean)/(std+1e-7)
    
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    print('CIFAR10 loaded')
    return x_train,y_train,x_test,y_test

'''
Small function that returns the shape of the CIFAR10 images
'''
def getCIFAR10InputShape():
    img_rows, img_cols = 32, 32
    if K.image_data_format() == 'channels_first':
        input_shape = (3, img_rows, img_cols)
    else:
        input_shape = (img_rows, img_cols, 3)
        
    return input_shape
    
'''
Function that returns a simple student done by 2 convolutions, a maxpool and a final two fully connected layers
'''
def getSimpleStudent(input_shape):
    num_classes = 10
    #model definition
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    print('Simple student loaded')
    return model
    
'''
Function to try to train the simple sutdent in order to unerstand its capabilites
'''
def trainSimpleStudent(epochs):
    
    x_train,y_train,x_test,y_test = getCIFAR10()
    
    input_shape = getCIFAR10InputShape()
    
    model = getSimpleStudent(input_shape)
    
    model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
    
    batch_size = 128
    n_batches = math.floor( x_train.shape[0] / batch_size)

    for e in range(epochs):
    
        for i in range(0,n_batches):
            imgs = x_train[i*batch_size:(i+1)*batch_size]
            labels = y_train[i*batch_size:(i+1)*batch_size]
            loss = model.train_on_batch(imgs,labels)
            print("Epoch: " + str(e+1) + " batch " + str(i) + " loss: " + str(loss[0]) + " acc: " + str( 100*loss[1]))
            
        score = model.evaluate(x_test, y_test, verbose=0)
        print('After epoch ' + str(e+1) + ' test loss ' + str(score[0]) + ' test accuracy ' + str(score[1]))

trainSimpleStudent(10)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


CIFAR10 loaded

Simple student loaded

Epoch: 1 batch 0 loss: 2.4208488 acc: 10.15625
Epoch: 1 batch 1 loss: 2.8245845 acc: 12.5
Epoch: 1 batch 2 loss: 2.6858475 acc: 14.0625
Epoch: 1 batch 3 loss: 2.195191 acc: 13.28125
Epoch: 1 batch 4 loss: 2.2178483 acc: 21.09375
Epoch: 1 batch 5 loss: 2.1723037 acc: 21.875
Epoch: 1 batch 6 loss: 2.1966252 acc: 14.84375
Epoch: 1 batch 7 loss: 2.2557395 acc: 16.40625
Epoch: 1 batch 8 loss: 2.1758876 acc: 14.0625
Epoch: 1 batch 9 loss: 2.1140943 acc: 18.75
Epoch: 1 batch 10 loss: 2.1333125 acc: 19.53125
Epoch: 1 batch 11 loss: 2.1481454 acc: 23.4375
Epoch: 1 batch 12 loss: 2.1112804 acc: 24.21875
Epoch: 1 batch 13 loss: 2.1517854 acc: 25.0
Epoch: 1 batch 14 loss: 2.2812161 acc: 19.53125
Epoch: 1 batch 15 loss: 2.240492 acc: 14.0625
Epoch: 1 batch 16 loss: 2.1011388 acc: 21.09375
Epoch: 1 batch 17 loss: 2.1198192 acc: 18.75
Epoch: 1 batch 18 loss: 2.1487713 acc: 20.3125
Epoch: 1 batch 19 loss: 2.0973768 acc: 24.21875
Epoch: 1 batch 20 loss: 2.087379 a

In [None]:


# pseudocodice per il paper

# for i in range(batches):
    # z = noise(100)
    # generated_images = Generator(z)
    # (output, teacher_activations) = teacher(generated_images)
    ''' Teacher is the pre-trained network that outputs its activations and the result 
    or it can outputs only the result and we can get the activations with K.function '''
    # combined.train_on_batch(generated_images, (outputs,teacher_activations)) 
    '''Combined is a network that has at the start the generator and then the freezed student.
    The labels it gets are the results and the activations of the teacher
    The loss is to increase the distance between the labels and the output of itself
    therefore this network has to output its intermediate activations'''
    
    # for j in range(ns):
        # student.train_on_batch(generated_images, (outputs,teacher_activations))
        ''' Student network that outputs its results and its intermediate activations, 
        and its loss is to match the output and activations of the teacher'''

    ''' if i don't find a way to output intermediate activations from a model we could always
        get the activations with K.fuction and the data, and then input them to the loss as the label.
        This method does not seems to increase too much the training time...
    '''


