In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import KLDivergence, SparseCategoricalCrossentropy
import numpy as np
import matplotlib.pyplot as plt

# Load and preprocess the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize pixel values to be between 0 and 1
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Define class names
class_names = [
    "airplane", "automobile", "bird", "cat", "deer",
    "dog", "frog", "horse", "ship", "truck"
]

print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)

2025-07-28 10:17:26.664374: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-28 10:17:27.386252: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-28 10:17:27.389362: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


x_train shape: (50000, 32, 32, 3)
y_train shape: (50000, 1)


In [2]:
import numpy as np

# Create a new dataset with 2000 samples per class
num_classes = 10
samples_per_class = 2000

# Arrays to collect selected samples
x_train_reduced = []
y_train_reduced = []

# Track how many samples we've added per class
class_counts = {i: 0 for i in range(num_classes)}

# Iterate through the full training set
for img, label in zip(x_train, y_train):
    cls = label[0]
    if class_counts[cls] < samples_per_class:
        x_train_reduced.append(img)
        y_train_reduced.append(label)
        class_counts[cls] += 1
    if sum(class_counts.values()) == samples_per_class * num_classes:
        break

# Convert to numpy arrays
x_train_reduced = np.array(x_train_reduced)
y_train_reduced = np.array(y_train_reduced)

# Final shape confirmation
print("Reduced x_train shape:", x_train_reduced.shape)
print("Reduced y_train shape:", y_train_reduced.shape)

# Optional: check class distribution
(unique, counts) = np.unique(y_train_reduced, return_counts=True)
print("Class distribution:", dict(zip(unique, counts)))


Reduced x_train shape: (20000, 32, 32, 3)
Reduced y_train shape: (20000, 1)
Class distribution: {0: 2000, 1: 2000, 2: 2000, 3: 2000, 4: 2000, 5: 2000, 6: 2000, 7: 2000, 8: 2000, 9: 2000}


In [3]:
x_train = x_train_reduced
y_train = y_train_reduced

In [4]:
def build_small_cnn(input_shape, num_classes):
    model = keras.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

small_cnn = build_small_cnn(x_train.shape[1:], len(class_names))
small_cnn.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

print("Small CNN Summary:")
small_cnn.summary()

# Train the small CNN
history_small_cnn = small_cnn.fit(x_train, y_train, epochs=15,
                                  validation_data=(x_test, y_test),
                                  batch_size=64)

Small CNN Summary:
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 15, 15, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 13, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 6, 6, 64)          0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 4, 4, 128)         73856     
                                                                 
 flatten (Flatten)           (None, 2

2025-07-28 10:17:33.880129: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: tesla
2025-07-28 10:17:33.880164: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: tesla
2025-07-28 10:17:33.880315: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program
2025-07-28 10:17:33.880369: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 575.64.3


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [5]:
small_cnn_loss, small_cnn_accuracy = small_cnn.evaluate(x_test, y_test)
print(f"Small CNN Test Accuracy: {small_cnn_accuracy * 100:.2f}%")

Small CNN Test Accuracy: 64.95%


In [6]:
# Preprocess data for VGG16 (upsample images)
x_train_vgg = tf.image.resize(x_train, (48, 48))
x_test_vgg = tf.image.resize(x_test, (48, 48))

# Load VGG16 with pre-trained ImageNet weights, excluding the top classification layer
base_model_vgg = VGG16(weights='imagenet', include_top=False, input_shape=(48, 48, 3))

# Freeze the convolutional base
base_model_vgg.trainable = False

# Add a new classifier head
x = layers.Flatten()(base_model_vgg.output)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(len(class_names), activation='softmax')(x)

vgg16_finetuned = Model(inputs=base_model_vgg.input, outputs=outputs)

vgg16_finetuned.compile(optimizer=Adam(learning_rate=1e-4),
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])

print("Fine-tuned VGG16 Summary:")
vgg16_finetuned.summary()

# Train the fine-tuned VGG16
history_vgg16 = vgg16_finetuned.fit(x_train_vgg, y_train, epochs=10,
                                    validation_data=(x_test_vgg, y_test),
                                    batch_size=64)

Fine-tuned VGG16 Summary:
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 48, 48, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 48, 48, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 48, 48, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 24, 24, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 24, 24, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 24, 24, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None,

In [7]:
vgg16_loss, vgg16_accuracy = vgg16_finetuned.evaluate(x_test_vgg, y_test)
print(f"Fine-tuned VGG16 Test Accuracy: {vgg16_accuracy * 100:.2f}%")

Fine-tuned VGG16 Test Accuracy: 56.91%


In [None]:
# Preprocess data for ResNet50
x_train_resnet = tf.image.resize(x_train, (224, 224)) # ResNet50 was trained on 224x224
x_test_resnet = tf.image.resize(x_test, (224, 224))

# Load ResNet50 with pre-trained ImageNet weights
base_model_resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model_resnet.trainable = False

# Add a new classifier head
x = layers.GlobalAveragePooling2D()(base_model_resnet.output)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(len(class_names), activation='softmax')(x)

resnet50_finetuned = Model(inputs=base_model_resnet.input, outputs=outputs)

resnet50_finetuned.compile(optimizer=Adam(learning_rate=1e-4),
                           loss='sparse_categorical_crossentropy',
                           metrics=['accuracy'])

print("Fine-tuned ResNet50 Summary:")
resnet50_finetuned.summary()

# Train the fine-tuned ResNet50
history_resnet50 = resnet50_finetuned.fit(x_train_resnet, y_train, epochs=10,
                                          validation_data=(x_test_resnet, y_test),
                                          batch_size=32) # Smaller batch size for larger images

2025-07-28 10:43:17.845505: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 12042240000 exceeds 10% of free system memory.


In [None]:
resnet50_loss, resnet50_accuracy = resnet50_finetuned.evaluate(x_test_resnet, y_test)
print(f"Fine-tuned ResNet50 Test Accuracy: {resnet50_accuracy * 100:.2f}%")

In [None]:
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super(Distiller, self).__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.1,
        temperature=3,
    ):
        super(Distiller, self).compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def train_step(self, data):
        x, y = data

        # The teacher model needs upscaled images
        teacher_predictions = self.teacher(tf.image.resize(x, (224, 224)), training=False)

        with tf.GradientTape() as tape:
            student_predictions = self.student(x, training=True)

            # Compute losses
            student_loss = self.student_loss_fn(y, student_predictions)
            distillation_loss = self.distillation_loss_fn(
                tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
                tf.nn.softmax(student_predictions / self.temperature, axis=1),
            )
            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        # Compute gradients
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(y, student_predictions)
        return {m.name: m.result() for m in self.metrics}

# Create a new instance of the small CNN as the student
student_model = build_small_cnn(x_train.shape[1:], len(class_names))

# Initialize and compile the distiller
distiller = Distiller(student=student_model, teacher=resnet50_finetuned)
distiller.compile(
    optimizer=Adam(),
    metrics=['accuracy'],
    student_loss_fn=SparseCategoricalCrossentropy(from_logits=False),
    distillation_loss_fn=KLDivergence(),
    alpha=0.1,
    temperature=10,
)

# Distill knowledge from teacher to student
history_distiller = distiller.fit(x_train, y_train, epochs=15,
                                  validation_data=(x_test, y_test),
                                  batch_size=64)

In [None]:
student_loss, student_accuracy = distiller.student.evaluate(x_test, y_test)
print(f"Distilled Student Model Test Accuracy: {student_accuracy * 100:.2f}%")

In [None]:
student_loss, student_accuracy = distiller.student.evaluate(x_test, y_test)
print(f"Distilled Student Model Test Accuracy: {student_accuracy * 100:.2f}%")

In [None]:
print(f"1. Small CNN (Baseline) Accuracy: {small_cnn_accuracy * 100:.2f}%")
print(f"2. Fine-tuned VGG16 Accuracy: {vgg16_accuracy * 100:.2f}%")
print(f"3. Fine-tuned ResNet50 Accuracy: {resnet50_accuracy * 100:.2f}%")
print(f"4. Distilled Small CNN (Student) Accuracy: {student_accuracy * 100:.2f}%")

# Plotting the results
models = ['Small CNN', 'Fine-tuned VGG16', 'Fine-tuned ResNet50', 'Distilled CNN']
accuracies = [small_cnn_accuracy, vgg16_accuracy, resnet50_accuracy, student_accuracy]

plt.figure(figsize=(10, 6))
plt.bar(models, accuracies, color=['blue', 'green', 'red', 'purple'])
plt.ylabel('Accuracy')
plt.title('Comparison of Classifier Performances')
plt.ylim([0, 1])
for i, acc in enumerate(accuracies):
    plt.text(i, acc + 0.01, f"{acc*100:.2f}%", ha='center')
plt.show()