In [1]:
import tensorflow as tf

print("TF version:", tf.__version__)
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("Available GPUs:", tf.config.list_physical_devices('GPU'))

2025-07-27 20:29:14.994340: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TF version: 2.13.1
Built with CUDA: True
Available GPUs: []


2025-07-27 20:29:19.034598: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_SYSTEM_DRIVER_MISMATCH: system has unsupported display driver / cuda driver combination
2025-07-27 20:29:19.034630: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: tesla
2025-07-27 20:29:19.034642: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: tesla
2025-07-27 20:29:19.034734: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: 545.23.8
2025-07-27 20:29:19.034759: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 575.64.3
2025-07-27 20:29:19.034768: E tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:312] kernel version 575.64.3 does not match DSO version 545.23.8 -- cannot find working devices in this configuration


In [2]:
# CNN Classifier Comparison and Knowledge Distillation - CIFAR-100 (10 classes)

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models, applications, optimizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import cifar100
from sklearn.metrics import classification_report

# Step 1: Load CIFAR-100 and prepare 10-class subset
(x_train_all, y_train_all), (x_test_all, y_test_all) = cifar100.load_data(label_mode='fine')
x_train_all, x_test_all = x_train_all / 255.0, x_test_all / 255.0
selected_classes = list(range(10))

# Filter and remap classes
def filter_10_classes(x, y):
    mask = np.isin(y, selected_classes).flatten()
    x, y = x[mask], y[mask]
    y = np.array([selected_classes.index(label[0]) for label in y])
    return x, y

x_train, y_train = filter_10_classes(x_train_all, y_train_all)
x_test, y_test = filter_10_classes(x_test_all, y_test_all)
y_train_cat, y_test_cat = to_categorical(y_train), to_categorical(y_test)

# Step 2: Build small CNN from scratch
def build_small_cnn():
    model = models.Sequential([
        layers.Conv2D(32, (3,3), activation='relu', input_shape=(32,32,3)),
        layers.MaxPooling2D(2,2),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D(2,2),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

small_cnn = build_small_cnn()
history_small = small_cnn.fit(x_train, y_train_cat, epochs=10, batch_size=64,
                               validation_data=(x_test, y_test_cat))

# Step 3: Fine-tune two big pretrained models

def build_finetuned_model(base_model_fn, input_shape=(32, 32, 3)):
    base_model = base_model_fn(weights='imagenet', include_top=False, input_shape=(224,224,3))
    for layer in base_model.layers[:-2]:  # freeze all but last 2
        layer.trainable = False
    model = models.Sequential([
        layers.Resizing(224,224),
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

vgg_model = build_finetuned_model(applications.VGG16)
eff_model = build_finetuned_model(applications.EfficientNetB0)

history_vgg = vgg_model.fit(x_train, y_train_cat, epochs=5, batch_size=64, validation_data=(x_test, y_test_cat))
history_eff = eff_model.fit(x_train, y_train_cat, epochs=5, batch_size=64, validation_data=(x_test, y_test_cat))

# Step 4: Knowledge Distillation
class Distiller(tf.keras.Model):
    def __init__(self, student, teacher):
        super().__init__()
        self.teacher = teacher
        self.student = student

    def compile(self, optimizer, metrics, student_loss_fn, distill_loss_fn, alpha=0.1, temperature=3):
        super().compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distill_loss_fn = distill_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def train_step(self, data):
        x, y = data
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            student_predictions = self.student(x, training=True)
            student_loss = self.student_loss_fn(y, student_predictions)
            distillation_loss = self.distill_loss_fn(
                tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
                tf.nn.softmax(student_predictions / self.temperature, axis=1),
            )
            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        self.compiled_metrics.update_state(y, student_predictions)
        return {m.name: m.result() for m in self.metrics}

student_scratch = build_small_cnn()
distiller = Distiller(student=student_scratch, teacher=vgg_model)
distiller.compile(
    optimizer=optimizers.Adam(),
    metrics=['accuracy'],
    student_loss_fn=losses.CategoricalCrossentropy(),
    distill_loss_fn=losses.KLDivergence(),
    alpha=0.2, temperature=5
)
history_distill = distiller.fit(x_train, y_train_cat, epochs=10, batch_size=64, validation_data=(x_test, y_test_cat))

# Step 5: Evaluate all classifiers
models_eval = {
    "Small CNN": small_cnn,
    "VGG16 Fine-tuned": vgg_model,
    "EfficientNetB0 Fine-tuned": eff_model,
    "Distilled CNN": student_scratch
}

results = {}
for name, model in models_eval.items():
    loss, acc = model.evaluate(x_test, y_test_cat, verbose=0)
    results[name] = acc * 100

# Plot results
plt.figure(figsize=(10,6))
plt.bar(results.keys(), results.values(), color='skyblue')
plt.ylabel("Accuracy (%)")
plt.title("Comparison of 10-class Classifiers")
plt.ylim(0, 100)
plt.grid(axis='y')
plt.show()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/5


2025-07-27 20:29:42.817817: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 822083584 exceeds 10% of free system memory.
2025-07-27 20:29:42.972016: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 822083584 exceeds 10% of free system memory.
2025-07-27 20:29:43.804539: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 205520896 exceeds 10% of free system memory.
2025-07-27 20:29:43.929584: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 411041792 exceeds 10% of free system memory.
2025-07-27 20:29:44.305655: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 411041792 exceeds 10% of free system memory.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


NameError: name 'losses' is not defined