In [None]:
import torch
import numpy as np 
import pandas as pd
import os
import xgboost as xgb
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import Input
from sklearn.utils import class_weight
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D
import tensorflow.keras.preprocessing.image 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Conv2D,MaxPool2D,Dropout,Flatten,Dense,BatchNormalization
from sklearn.utils import shuffle 
from sklearn.model_selection import train_test_split

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print("Name:", gpu.name, "  Type:", gpu.device_type)
from tensorflow.python.client import device_lib

In [None]:
tf.test.is_gpu_available()

In [None]:
width = 128
height = 128

In [None]:
datagen = ImageDataGenerator(rescale=1/255.0, validation_split=0.2)

In [None]:
trainDatagen = datagen.flow_from_directory(directory='/kaggle/input/cell-images-for-detecting-malaria/cell_images/cell_images/',
                                           target_size=(height,width,),
                                           class_mode = 'binary',
                                           batch_size = 32,
                                           subset='training')

In [None]:
valDatagen = datagen.flow_from_directory(directory='/kaggle/input/cell-images-for-detecting-malaria/cell_images/cell_images/',
                                           target_size=(height,width),
                                           class_mode = 'binary',
                                           batch_size = 32,
                                           subset='validation')

In [None]:
#Making DataGenerator dataframe compatible for our Distillation arch--Tonmoy
def test_train_split(traingen, testgen):
    X_train, Y_train = next(iter(traingen))
    X_test,Y_test = next(iter(testgen))
    
    return X_train, X_test, Y_train, Y_test

In [None]:
print(len(trainDatagen))

In [None]:
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super(Distiller, self).__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.1,
        temperature=3,
    ):
        
        super(Distiller, self).compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

In [None]:
  def train_step(self, data):
        # Unpack data
        x, y = data

        # Forward pass of teacher
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            # Forward pass of student
            student_predictions = self.student(x, training=True)

            # Compute losses
            student_loss = self.student_loss_fn(y, student_predictions)
            distillation_loss = self.distillation_loss_fn(
                tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
                tf.nn.softmax(student_predictions / self.temperature, axis=1),
            )
            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        # Compute gradients
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`.
        self.compiled_metrics.update_state(y, student_predictions)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update(
            {"student_loss": student_loss, "distillation_loss": distillation_loss}
        )
        return results


In [None]:
   def test_step(self, data):
        # Unpack the data
        x, y = data

        # Compute predictions
        y_prediction = self.student(x, training=False)

        # Calculate the loss
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update the metrics.
        self.compiled_metrics.update_state(y, y_prediction)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss}) 
        print(results)
        return results

In [None]:
from tensorflow.keras.applications import EfficientNetB5

effnetb5 = EfficientNetB5(weights=None,include_top=False,input_shape=(height, width, 3))
#effnet.load_weights("../input/efficientnet-keras-weights-b0b5/efficientnet-b5_imagenet_1000_notop.h5")
for layer in effnetb5.layers:
    layer.trainable = False

In [None]:
# Create the teacher
teacher = Sequential(name="teacher") 
teacher.add(Conv2D(16,(3,3),activation='relu',input_shape=(128,128,3)))
teacher.add(MaxPool2D(2,2))
teacher.add(Dropout(0.2))

teacher.add(Conv2D(32,(3,3),activation='relu'))
teacher.add(MaxPool2D(2,2))
teacher.add(Dropout(0.3))

teacher.add(Conv2D(64,(3,3),activation='relu'))
teacher.add(MaxPool2D(2,2))
teacher.add(Dropout(0.3))

teacher.add(Flatten())
teacher.add(Dense(64,activation='relu'))
teacher.add(Dropout(0.5)) 
teacher.add(Dense(1,activation='sigmoid'))

teacher.summary()

In [None]:
"""
# Create the student
params = {"objective": "binary:logistic",
          "eta": 0.2,
          "max_depth": 3,
          "min_child_weight": 1,
          "silent": 1,
          "seed": 1} 

num_trees=300

student = xgb.train(params, xgb.XGBClassifier(results, train["signal"]), num_trees)
"""

In [None]:
# Create the student
student = Sequential(name="student") 
student.add(Conv2D(16,(3,3),activation='relu',input_shape=(128,128,3)))
student.add(MaxPool2D(2,2))
student.add(Dropout(0.2))

student.add(Conv2D(32,(3,3),activation='relu'))
student.add(MaxPool2D(2,2))
student.add(Dropout(0.3))

student.add(Flatten())
student.add(Dense(64,activation='relu'))
student.add(Dropout(0.5)) 
student.add(Dense(1,activation='sigmoid')) 

student.summary()

In [None]:
x_train,  x_test, y_train, y_test = test_train_split(trainDatagen, valDatagen)

In [None]:
teacher.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss',patience=2)

# Train and evaluate teacher on data.
teacher.fit(x_train, y_train, epochs=31, callbacks=[early_stop])


#teacher.fit_generator(generator = trainDatagen, steps_per_epoch = len(trainDatagen),
                                #validation_data = valDatagen,
                                #validation_steps=len(valDatagen),
                                #callbacks=[early_stop])

In [None]:
# Clone student for later comparison
student_scratch = keras.models.clone_model(student)

#batch_size = 64
x_train, y_train, x_test, y_test = test_train_split(trainDatagen, valDatagen)

# Normalize data
#x_train = x_train.astype("float32") / 255.0
#x_train = np.reshape(x_train, (-1, 28, 28, 1))

#x_test = x_test.astype("float32") / 255.0
#x_test = np.reshape(x_test, (-1, 28, 28, 1))

# Train teacher as usual
#teacher.compile(
    #optimizer=keras.optimizers.Adam(),
    #loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    #metrics=[keras.metrics.SparseCategoricalAccuracy()],)

# Train and evaluate teacher on data.
teacher.fit(x_train, y_train, epochs=5)
teacher.evaluate(x_test, y_test)


# Initialize and compile distiller
distiller = Distiller(student=student, teacher=teacher)
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    student_loss_fn=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=10,
)

# Distill teacher to student
distiller.fit(x_train, y_train, epochs=3)

# Evaluate student on test dataset
distiller.evaluate(x_test, y_test)
