In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras
import tensorflow.keras.backend as K
import os
from tensorflow.keras.datasets import fashion_mnist,mnist,cifar10
# import keras.backend as K
from tensorflow.keras.layers import Conv2D,Activation,BatchNormalization,UpSampling2D,Embedding,ZeroPadding2D, Input, Flatten, Dense, Reshape, LeakyReLU, Dropout,MaxPooling2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras import regularizers

from tensorflow.keras.utils import Progbar
from keras.initializers import RandomNormal
import random
from sklearn.model_selection import train_test_split
# from keras.utils import np_utils
from tensorflow.keras import utils as np_utils



In [2]:
nb_classes = 10
batch_size = 128
maxepoches = 250
learning_rate = 0.1
lr_decay = 1e-6
lr_drop = 20
def lr_scheduler(epoch):
    return learning_rate * (0.5 ** (epoch // lr_drop))
reduce_lr = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)

In [3]:
#Loading and splitting the dataset into train, validation and test


(X_Train, y_Train), (X_test, y_test) = cifar10.load_data()
X_train, X_val, y_train, y_val = train_test_split(X_Train, y_Train, test_size=0.20)
# convert y_train and y_test to categorical binary values 
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_val = np_utils.to_categorical(y_val, nb_classes)
y_test = np_utils.to_categorical(y_test, nb_classes)

In [4]:
X_Train.shape

(50000, 32, 32, 3)

In [5]:
# Reshape them to batch_size, width,height,#channels
X_train = X_train.reshape(40000, 32, 32, 3)
X_val = X_val.reshape(10000, 32, 32, 3)
X_test = X_test.reshape(10000, 32, 32, 3)

X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_test = X_test.astype('float32')

# Normalize the values
X_train /= 255
X_val /= 255
X_test /= 255

In [6]:
init=RandomNormal(mean=0,stddev=0.02)
input_shape = (32, 32, 3) # Input shape of each image
weight_decay = 0.0005

def build_model():
    # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.

    model = Sequential()

    model.add(Conv2D(64, (3, 3), padding='same',
                     input_shape=input_shape,kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

    model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

    model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

    model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

    model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))
    model.add(Dropout(0.5))

    model.add(Flatten())
    model.add(Dense(256,kernel_regularizer=regularizers.l2(weight_decay), name='dense_1'))
    model.add(Activation('relu'))
    model.add(BatchNormalization())

    model.add(Dropout(0.5))
    model.add(Dense(10, name='dense_2'))
    model.add(Activation('softmax'))
    return model
teacher = build_model()

sgd = SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)

teacher.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy'])


In [7]:
# teacher.fit(X_train,Y_train,batch_size=128,epochs=150,verbose=1,callbacks=[reduce_lr],validation_data=(X_val,Y_val))

In [8]:
teacher.load_weights('Cifar10_Teacher.h5')

In [9]:
# Y_test = np_utils.to_categorical(y_test, nb_classes)
loss, acc =teacher.evaluate(X_test, y_test, verbose=1)
loss, acc



(0.834343671798706, 0.8992000222206116)

In [10]:
#Collect the dense vector from the previous layer output and store it in a different model
teacher_WO_Softmax = Model(teacher.input, teacher.get_layer('dense_1').output)

In [11]:
#Extracting dense representation from the teacher network
train_dense = teacher_WO_Softmax.predict(X_train)
# val_dense = teacher_WO_Softmax.predict(X_val)

In [10]:
#Splitting the training dense vector among N students(in this case 2)
s1Train=train_dense[:,:32]
s2Train=train_dense[:,32:64]
s3Train=train_dense[:,64:96]
s4Train=train_dense[:,96:128]

s5Train=train_dense[:,128:160]
s6Train=train_dense[:,160:192]
s7Train=train_dense[:,192:224]
s8Train=train_dense[:,224:256]


NameError: name 'train_dense' is not defined

In [12]:
def define_model(name):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3), name=name))
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.3))
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(16, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu', kernel_initializer='he_uniform',name='req'+name))

    model.compile(optimizer='nadam', loss='mse', metrics=['accuracy'])
    return model

student1 = define_model('s1')
student1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
s1 (Conv2D)                  (None, 32, 32, 32)        896       
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 32, 32, 32)        9248      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 16, 16, 64)        18496     
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 16, 16, 64)        36928     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 8, 8, 64)         

In [22]:
#Edited version of my friend's implementation
# import np.random import random
BATCH_SIZE=32
def smooth_real_labels(y):
    return y - 0.3+(np.random.random(y.shape)*0.5)
def smooth_fake_labels(y):
    return y + (0.3 * np.random.random(y.shape))
def build_gan(gen,disc): 
    disc.trainable = False
    input= Input(shape=input_shape)
    output = gen(input)
    output2= disc(output)
    gan=Model(input,output2)

    gan.compile(Adam(lr=0.0002),loss=['binary_crossentropy','mse'],metrics=['accuracy'])

    return gan

In [23]:
def build_sdiscriminator():
    
    input2 = Input(shape=(32,),name='input')
    inp=Dense(128)(input2)

    leaky_relu = LeakyReLU(alpha=0.2)(inp)
    
    conv3 = Dense(128,activation='relu')(leaky_relu)
    b_n = BatchNormalization()(conv3)
    # leaky_relu = LeakyReLU(alpha=0.2)(b_n)
    conv3 = Dense(128,activation='relu')(leaky_relu)
    b_n = BatchNormalization()(conv3)
    # leaky_relu = LeakyReLU(alpha=0.2)(b_n)
    conv3 = Dense(128,activation='relu')(b_n)
    b_n = BatchNormalization()(conv3)
    # leaky_relu = LeakyReLU(alpha=0.2)(b_n)
    conv4 = Dense(256,activation='relu')(b_n)
    b_n = BatchNormalization()(conv4)
    # leaky_relu = LeakyReLU(alpha=0.2)(b_n)
    conv4 = Dense(256,activation='relu')(b_n)
    b_n = BatchNormalization()(conv4)
    # leaky_relu = LeakyReLU(alpha=0.2)(b_n)
    # conv4 = Dense(512)(leaky_relu)
    # b_n = BatchNormalization()(conv4)
    # leaky_relu = LeakyReLU(alpha=0.2)(b_n)

    dense = Dense(1,activation='sigmoid')(b_n)

    output2=Dense(32)(b_n)

    
    disc = Model(input2,[dense,output2])          
    disc.compile(optd,loss=['binary_crossentropy','mse'],metrics=['accuracy'])


    return disc

In [24]:
def training(generator,discriminator,gan,features,epo=20):
    # Setup Models here
    BATCH_SIZE = 128
    discriminator.trainable = True
    total_size = X_train.shape[0]
    indices = np.arange(0,total_size ,BATCH_SIZE)
    all_disc_loss = []
    all_gen_loss = []
    all_class_loss=[]
    if total_size % BATCH_SIZE:
        indices = indices[:-1]
    for e in range(epo):
        
        progress_bar = Progbar(target=len(indices))
        np.random.shuffle(indices)
        epoch_gen_loss = []
        epoch_disc_loss = []
        epoch_class_loss= []
        for i,index in enumerate(indices):
        
            # Write your code here
            inputs=X_train[index:index+BATCH_SIZE]
            real_image = features[index:index+BATCH_SIZE]
            y_train = features[index:index+BATCH_SIZE]

            y_real = np.ones((BATCH_SIZE,1))
            y_fake = np.zeros((BATCH_SIZE,1))

            #Generator Training
            fake_images = generator.predict_on_batch(inputs)

            #Disrciminator Training
            disc_real_loss1,_,disc_real_loss2,_,_= discriminator.train_on_batch(real_image,[y_real,y_train])
            disc_fake_loss1,_,disc_fake_loss2,_,_= discriminator.train_on_batch(fake_images,[y_fake,y_train])

            #Gans Training
            discriminator.trainable = False
            gan_loss,_,gan_loss2,_,_ = gan.train_on_batch(inputs, [y_real,y_train])
            gan_loss,_,gan_loss2,_,_ = gan.train_on_batch(inputs, [y_real,y_train])
            gan_loss,_,gan_loss2,_,_ = gan.train_on_batch(inputs, [y_real,y_train])
            gan_loss,_,gan_loss2,_,_ = gan.train_on_batch(inputs, [y_real,y_train])

            discriminator.trainable = True

            disc_loss = (disc_fake_loss1 + disc_real_loss1)/2
            epoch_disc_loss.append(disc_loss)
            progress_bar.update(i+1)

            epoch_gen_loss.append((gan_loss))

        avg_epoch_disc_loss = np.array(epoch_disc_loss).mean()
        avg_epoch_gen_loss = np.array(epoch_gen_loss).mean()
        all_disc_loss.append(avg_epoch_disc_loss)
        all_gen_loss.append(avg_epoch_gen_loss)
        print("Epoch: %d | Discriminator Loss: %f | Generator Loss: %f | " % (e+1,avg_epoch_disc_loss,avg_epoch_gen_loss))

    return generator

In [11]:
optd = Adam(lr=0.0002)
opt = Adam(lr=0.0002)

In [26]:
discriminator1 = build_sdiscriminator()
s1=define_model("s1")
gan1 = build_gan(s1,discriminator1)
s1 = training(s1,discriminator1,gan1,s1Train,epo=90)

Epoch: 1 | Discriminator Loss: 1.734173 | Generator Loss: 2.096475 | 
Epoch: 2 | Discriminator Loss: 1.353976 | Generator Loss: 1.965397 | 
Epoch: 3 | Discriminator Loss: 1.284604 | Generator Loss: 1.887134 | 
Epoch: 4 | Discriminator Loss: 1.224197 | Generator Loss: 1.817051 | 
Epoch: 5 | Discriminator Loss: 1.156467 | Generator Loss: 1.721271 | 
Epoch: 6 | Discriminator Loss: 1.098294 | Generator Loss: 1.600327 | 
Epoch: 7 | Discriminator Loss: 1.049023 | Generator Loss: 1.505542 | 
Epoch: 8 | Discriminator Loss: 1.009800 | Generator Loss: 1.419982 | 
Epoch: 9 | Discriminator Loss: 0.979339 | Generator Loss: 1.376354 | 
Epoch: 10 | Discriminator Loss: 0.955097 | Generator Loss: 1.334823 | 
Epoch: 11 | Discriminator Loss: 0.934851 | Generator Loss: 1.308693 | 
Epoch: 12 | Discriminator Loss: 0.918248 | Generator Loss: 1.277639 | 
Epoch: 13 | Discriminator Loss: 0.901831 | Generator Loss: 1.249857 | 
Epoch: 14 | Discriminator Loss: 0.889242 | Generator Loss: 1.228266 | 
Epoch: 15 | Dis

In [27]:
discriminator2 = build_sdiscriminator()
s2=define_model("s2")
gan2 = build_gan(s2,discriminator2)
s2 = training(s2,discriminator2,gan2,s2Train,epo=90)

Epoch: 1 | Discriminator Loss: 1.425561 | Generator Loss: 2.012998 | 
Epoch: 2 | Discriminator Loss: 1.250220 | Generator Loss: 1.891332 | 
Epoch: 3 | Discriminator Loss: 1.178727 | Generator Loss: 1.755405 | 
Epoch: 4 | Discriminator Loss: 1.106283 | Generator Loss: 1.657059 | 
Epoch: 5 | Discriminator Loss: 1.055358 | Generator Loss: 1.564335 | 
Epoch: 6 | Discriminator Loss: 1.015856 | Generator Loss: 1.489488 | 
Epoch: 7 | Discriminator Loss: 0.983004 | Generator Loss: 1.418492 | 
Epoch: 8 | Discriminator Loss: 0.956289 | Generator Loss: 1.363011 | 
Epoch: 9 | Discriminator Loss: 0.934748 | Generator Loss: 1.324713 | 
Epoch: 10 | Discriminator Loss: 0.916285 | Generator Loss: 1.286677 | 
Epoch: 11 | Discriminator Loss: 0.899871 | Generator Loss: 1.256415 | 
Epoch: 12 | Discriminator Loss: 0.886575 | Generator Loss: 1.236454 | 
Epoch: 13 | Discriminator Loss: 0.874915 | Generator Loss: 1.212591 | 
Epoch: 14 | Discriminator Loss: 0.865355 | Generator Loss: 1.202134 | 
Epoch: 15 | Dis

In [28]:
discriminator3 = build_sdiscriminator()
s3=define_model("s3")
gan3 = build_gan(s3,discriminator3)
s3 = training(s3,discriminator3,gan3,s3Train,epo=94)

Epoch: 1 | Discriminator Loss: 1.458599 | Generator Loss: 2.041242 | 
Epoch: 2 | Discriminator Loss: 1.270196 | Generator Loss: 1.889364 | 
Epoch: 3 | Discriminator Loss: 1.174336 | Generator Loss: 1.737784 | 
Epoch: 4 | Discriminator Loss: 1.091523 | Generator Loss: 1.606382 | 
Epoch: 5 | Discriminator Loss: 1.041292 | Generator Loss: 1.518211 | 
Epoch: 6 | Discriminator Loss: 1.001387 | Generator Loss: 1.450784 | 
Epoch: 7 | Discriminator Loss: 0.968270 | Generator Loss: 1.399477 | 
Epoch: 8 | Discriminator Loss: 0.944475 | Generator Loss: 1.352251 | 
Epoch: 9 | Discriminator Loss: 0.923772 | Generator Loss: 1.318598 | 
Epoch: 10 | Discriminator Loss: 0.904783 | Generator Loss: 1.286411 | 
Epoch: 11 | Discriminator Loss: 0.891448 | Generator Loss: 1.266053 | 
Epoch: 12 | Discriminator Loss: 0.880073 | Generator Loss: 1.239391 | 
Epoch: 13 | Discriminator Loss: 0.867796 | Generator Loss: 1.220627 | 
Epoch: 14 | Discriminator Loss: 0.858663 | Generator Loss: 1.195488 | 
Epoch: 15 | Dis

In [29]:
discriminator4 = build_sdiscriminator()
s4=define_model("s4")
gan4 = build_gan(s4,discriminator4)
s4 = training(s4,discriminator4,gan4,s4Train,epo=95)


Epoch: 1 | Discriminator Loss: 1.378951 | Generator Loss: 1.913779 | 
Epoch: 2 | Discriminator Loss: 1.201454 | Generator Loss: 1.825489 | 
Epoch: 3 | Discriminator Loss: 1.134674 | Generator Loss: 1.698849 | 
Epoch: 4 | Discriminator Loss: 1.064294 | Generator Loss: 1.546674 | 
Epoch: 5 | Discriminator Loss: 1.013233 | Generator Loss: 1.471074 | 
Epoch: 6 | Discriminator Loss: 0.978064 | Generator Loss: 1.409808 | 
Epoch: 7 | Discriminator Loss: 0.953697 | Generator Loss: 1.363780 | 
Epoch: 8 | Discriminator Loss: 0.932713 | Generator Loss: 1.322521 | 
Epoch: 9 | Discriminator Loss: 0.914705 | Generator Loss: 1.301353 | 
Epoch: 10 | Discriminator Loss: 0.900606 | Generator Loss: 1.269164 | 
Epoch: 11 | Discriminator Loss: 0.887559 | Generator Loss: 1.245187 | 
Epoch: 12 | Discriminator Loss: 0.875588 | Generator Loss: 1.220971 | 
Epoch: 13 | Discriminator Loss: 0.866083 | Generator Loss: 1.199993 | 
Epoch: 14 | Discriminator Loss: 0.856759 | Generator Loss: 1.183352 | 
Epoch: 15 | Dis

In [30]:
discriminator5 = build_sdiscriminator()
s5=define_model("s5")
gan5 = build_gan(s5,discriminator5)
s5 = training(s5,discriminator5,gan5,s5Train,epo=95)


Epoch: 1 | Discriminator Loss: 1.424087 | Generator Loss: 1.959537 | 
Epoch: 2 | Discriminator Loss: 1.192094 | Generator Loss: 1.760965 | 
Epoch: 3 | Discriminator Loss: 1.113773 | Generator Loss: 1.613933 | 
Epoch: 4 | Discriminator Loss: 1.064670 | Generator Loss: 1.536221 | 
Epoch: 5 | Discriminator Loss: 1.030896 | Generator Loss: 1.479514 | 
Epoch: 6 | Discriminator Loss: 1.003329 | Generator Loss: 1.426575 | 
Epoch: 7 | Discriminator Loss: 0.983128 | Generator Loss: 1.392968 | 
Epoch: 8 | Discriminator Loss: 0.962820 | Generator Loss: 1.355667 | 
Epoch: 9 | Discriminator Loss: 0.940608 | Generator Loss: 1.321592 | 
Epoch: 10 | Discriminator Loss: 0.922792 | Generator Loss: 1.291237 | 
Epoch: 11 | Discriminator Loss: 0.908532 | Generator Loss: 1.274118 | 
Epoch: 12 | Discriminator Loss: 0.894947 | Generator Loss: 1.247723 | 
Epoch: 13 | Discriminator Loss: 0.882956 | Generator Loss: 1.230298 | 
Epoch: 14 | Discriminator Loss: 0.871057 | Generator Loss: 1.211109 | 
Epoch: 15 | Dis

In [31]:
discriminator6 = build_sdiscriminator()
s6=define_model("s6")
gan6 = build_gan(s6,discriminator6)
s6 = training(s6,discriminator6,gan6,s6Train,epo=95)


Epoch: 1 | Discriminator Loss: 1.364790 | Generator Loss: 1.811572 | 
Epoch: 2 | Discriminator Loss: 1.170114 | Generator Loss: 1.716230 | 
Epoch: 3 | Discriminator Loss: 1.111115 | Generator Loss: 1.615334 | 
Epoch: 4 | Discriminator Loss: 1.058287 | Generator Loss: 1.519940 | 
Epoch: 5 | Discriminator Loss: 1.015388 | Generator Loss: 1.439656 | 
Epoch: 6 | Discriminator Loss: 0.982052 | Generator Loss: 1.380489 | 
Epoch: 7 | Discriminator Loss: 0.955049 | Generator Loss: 1.343048 | 
Epoch: 8 | Discriminator Loss: 0.932583 | Generator Loss: 1.300576 | 
Epoch: 9 | Discriminator Loss: 0.912532 | Generator Loss: 1.259023 | 
Epoch: 10 | Discriminator Loss: 0.895944 | Generator Loss: 1.235743 | 
Epoch: 11 | Discriminator Loss: 0.879805 | Generator Loss: 1.204228 | 
Epoch: 12 | Discriminator Loss: 0.867442 | Generator Loss: 1.179725 | 
Epoch: 13 | Discriminator Loss: 0.857819 | Generator Loss: 1.159266 | 
Epoch: 14 | Discriminator Loss: 0.847032 | Generator Loss: 1.139662 | 
Epoch: 15 | Dis

In [32]:
discriminator7 = build_sdiscriminator()
s7=define_model("s7")
gan7 = build_gan(s7,discriminator7)
s7 = training(s7,discriminator7,gan7,s7Train,epo=95)


Epoch: 1 | Discriminator Loss: 1.365458 | Generator Loss: 1.808065 | 
Epoch: 2 | Discriminator Loss: 1.169194 | Generator Loss: 1.687783 | 
Epoch: 3 | Discriminator Loss: 1.095441 | Generator Loss: 1.583961 | 
Epoch: 4 | Discriminator Loss: 1.042124 | Generator Loss: 1.492673 | 
Epoch: 5 | Discriminator Loss: 1.002813 | Generator Loss: 1.418261 | 
Epoch: 6 | Discriminator Loss: 0.971326 | Generator Loss: 1.355573 | 
Epoch: 7 | Discriminator Loss: 0.942502 | Generator Loss: 1.280913 | 
Epoch: 8 | Discriminator Loss: 0.921466 | Generator Loss: 1.247024 | 
Epoch: 9 | Discriminator Loss: 0.903227 | Generator Loss: 1.218179 | 
Epoch: 10 | Discriminator Loss: 0.889358 | Generator Loss: 1.198250 | 
Epoch: 11 | Discriminator Loss: 0.877257 | Generator Loss: 1.174888 | 
Epoch: 12 | Discriminator Loss: 0.866067 | Generator Loss: 1.156022 | 
Epoch: 13 | Discriminator Loss: 0.855349 | Generator Loss: 1.130796 | 
Epoch: 14 | Discriminator Loss: 0.847418 | Generator Loss: 1.117347 | 
Epoch: 15 | Dis

In [33]:
discriminator8 = build_sdiscriminator()
s8=define_model("s8")
gan8 = build_gan(s8,discriminator8)
s8 = training(s8,discriminator8,gan8,s8Train,epo=95)


Epoch: 1 | Discriminator Loss: 1.349530 | Generator Loss: 1.839125 | 
Epoch: 2 | Discriminator Loss: 1.146157 | Generator Loss: 1.639352 | 
Epoch: 3 | Discriminator Loss: 1.064911 | Generator Loss: 1.507785 | 
Epoch: 4 | Discriminator Loss: 1.014463 | Generator Loss: 1.410052 | 
Epoch: 5 | Discriminator Loss: 0.979392 | Generator Loss: 1.358326 | 
Epoch: 6 | Discriminator Loss: 0.951197 | Generator Loss: 1.317157 | 
Epoch: 7 | Discriminator Loss: 0.927999 | Generator Loss: 1.289440 | 
Epoch: 8 | Discriminator Loss: 0.910292 | Generator Loss: 1.253738 | 
Epoch: 9 | Discriminator Loss: 0.893163 | Generator Loss: 1.216171 | 
Epoch: 10 | Discriminator Loss: 0.878414 | Generator Loss: 1.195231 | 
Epoch: 11 | Discriminator Loss: 0.866254 | Generator Loss: 1.175093 | 
Epoch: 12 | Discriminator Loss: 0.855354 | Generator Loss: 1.152920 | 
Epoch: 13 | Discriminator Loss: 0.845325 | Generator Loss: 1.128817 | 
Epoch: 14 | Discriminator Loss: 0.836472 | Generator Loss: 1.110329 | 
Epoch: 15 | Dis

**8 Students**

In [14]:
o1=s1.get_layer("reqs1").output
o2=s2.get_layer("reqs2").output
o3=s3.get_layer("reqs3").output
o4=s4.get_layer("reqs4").output

o5=s5.get_layer("reqs5").output
o6=s6.get_layer("reqs6").output
o7=s7.get_layer("reqs7").output
o8=s8.get_layer("reqs8").output

output=tensorflow.keras.layers.concatenate([o1,o2,o3,o4,o5,o6,o7,o8])

output=Activation('relu')(output)
output2=Dropout(0.5)(output) # For reguralization
output3=Dense(10,activation="softmax", name="d1")(output2)

mm8=Model([s1.get_layer("s1").input,s2.get_layer("s2").input,
           s3.get_layer("s3").input,s4.get_layer("s4").input,
           s5.get_layer("s5").input,s6.get_layer("s6").input,
           s7.get_layer("s7").input,s8.get_layer("s8").input], output3)
my_weights=teacher.get_layer('dense_2').get_weights()
mm8.get_layer('d1').set_weights(my_weights)

In [15]:
i=0
for l in mm8.layers[:len(mm8.layers)-2]:
    l.trainable=False

In [17]:
# Tweak with learning rate to enhance the accuracy
mm8.compile(loss='categorical_crossentropy',
              optimizer=Adam(learning_rate=0.0002),
              metrics=['accuracy'])

In [37]:
# Without finetune
batch_size = 256
mm8_history=mm8.fit([X_train,X_train,X_train,X_train,X_train,X_train,X_train,X_train], Y_train,
          batch_size=batch_size,
          epochs=5,
          verbose=1,
          validation_data=([X_val,X_val,X_val,X_val,X_val,X_val,X_val,X_val], Y_val))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [38]:
l,a = mm8.evaluate([X_test,X_test,X_test,X_test,X_test,X_test,X_test,X_test], y_test)
l, a



(0.7439749240875244, 0.8741999864578247)