In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/klimb_llm_optimization_challenge.zip

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers

# Path of train and test data
train_dir = "/content/klimb_llm_optimization_challenge/seg_train"
test_dir = "/content/klimb_llm_optimization_challenge/seg_test/"

# Data configs
batch_size = 32
img_height = 150
img_width = 150

# Load train data
train_ds = tf.keras.utils.image_dataset_from_directory(
	train_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

# Load test data
test_ds = tf.keras.utils.image_dataset_from_directory(
  test_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)


# Training the MASTER Model - using Transfer Learning
# Here we are using ImageNet pre-trained model weights
base_model = keras.applications.ResNet152(
		weights='imagenet',  # Load weights pre-trained on ImageNet.
		input_shape=(img_height, img_width, 3),
		include_top=False)  # Do not include the ImageNet classifier at the top.
base_model.trainable = False
inputs = keras.Input(shape=(img_height, img_width, 3))
# We make sure that the base_model is running in inference mode here,
# by passing `training=False`. This is important for fine-tuning.
x = base_model(inputs, training=False)
# Convert features of shape `base_model.output_shape[1:]` to vectors
x = keras.layers.GlobalAveragePooling2D()(x)
# A Dense classifier with a single unit (binary classification)
outputs = keras.layers.Dense(6)(x)
model = keras.Model(inputs, outputs)
model.summary()
model.compile(
		optimizer=keras.optimizers.Adam(),
		loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
		metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

epochs = 20
model.fit(train_ds, epochs=epochs)

# Generate results on test data
results = model.evaluate(test_ds)
print(f"Test accuracy with trained teacher model:{results[1]*100 :.2f} %")

Found 14034 files belonging to 6 classes.
Using 11228 files for training.
Found 3000 files belonging to 6 classes.
Using 600 files for validation.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 150, 150, 3)]     0         
                                                                 
 resnet152 (Functional)      (None, 5, 5, 2048)        58370944  
                                                                 
 global_average_pooling2d (  (None, 2048)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 6)                 12294     
                                  

In [22]:
student = keras.Sequential(
    [
        keras.Input(shape=(150, 150, 3,1)),
        layers.Conv3D(16, 3, strides=(2, 2,2), padding="same"),
        layers.LeakyReLU(),
        layers.MaxPooling3D(pool_size=(2, 2,2), strides=(1, 1
        ,1), padding="same"),
        layers.Conv3D(16, (3, 3,3), strides=(2, 2,2), padding="same"),
        layers.Flatten(),
        layers.Dense(6),
    ],
    name="student",
)

In [23]:
student.summary()

Model: "student"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_4 (Conv3D)           (None, 75, 75, 2, 16)     448       
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 75, 75, 2, 16)     0         
                                                                 
 max_pooling3d_3 (MaxPoolin  (None, 75, 75, 2, 16)     0         
 g3D)                                                            
                                                                 
 conv3d_5 (Conv3D)           (None, 38, 38, 1, 16)     6928      
                                                                 
 flatten_3 (Flatten)         (None, 23104)             0         
                                                                 
 dense_4 (Dense)             (None, 6)                 138630    
                                                           

In [12]:
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super().__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.1,
        temperature=3,
    ):
        """Configure the distiller.

        Args:
            optimizer: Keras optimizer for the student weights
            metrics: Keras metrics for evaluation
            student_loss_fn: Loss function of difference between student
                predictions and ground-truth
            distillation_loss_fn: Loss function of difference between soft
                student predictions and soft teacher predictions
            alpha: weight to student_loss_fn and 1-alpha to distillation_loss_fn
            temperature: Temperature for softening probability distributions.
                Larger temperature gives softer distributions.
        """
        super().compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def compute_loss(
        self, x=None, y=None, y_pred=None, sample_weight=None, allow_empty=False
    ):
        teacher_pred = self.teacher(x, training=False)
        student_loss = self.student_loss_fn(y, y_pred)
        distillation_loss = self.distillation_loss_fn(
                tf.nn.softmax(teacher_pred / self.temperature, axis=1),
                tf.nn.softmax(teacher_pred / self.temperature, axis=1),
            )

        loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss
        return loss

    def call(self, x):
        return self.student(x)


In [26]:
# Initialize and compile distiller
distiller = Distiller(student=student, teacher=model)
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    student_loss_fn=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    distillation_loss_fn=keras.losses.KLDivergence(),
    alpha=0.1,
    temperature=10,
)

# Distill teacher to student
distiller.fit(train_ds, epochs=3)

# Evaluate student on test dataset
distiller.evaluate(test_ds)

Epoch 1/3
Epoch 2/3
Epoch 3/3


0.5833333134651184

In [27]:
import sys
model_size_ratio = sys.getsizeof(model)/sys.getsizeof(student)
print("model_size_ratio : ",model_size_ratio)

model_size_ratio :  1.0


In [30]:
model_parameter_ratio = 12294/146006
model_parameter_ratio

0.08420201909510568

In [31]:
x = np.random.randint(0,255,(150,150,3))
x = np.expand_dims(x, axis=0)


In [32]:
import time
start = time.time()
result = distiller.predict(x)
print("time taken by student model:",time.time()-start)

time taken by student model: 0.17504167556762695


In [18]:
import time
start = time.time()
result = model.predict(x)
print("time taken by master model:",time.time()-start)

time taken by master model: 4.19510555267334
