# CS366 Project 5 - So you think you can train a small model?

## Svita Kiran
### 12/5/25

### 0. Dataset

In [1]:
import keras
import tensorflow as tf
# import tensorflow_model_optimization as tfmot

2025-12-05 21:24:28.136724: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### 1. Base Task: Compressing a Model

**Install libraries**

In [2]:
import requests
import gzip
import shutil
import numpy as np
import os
import time
import tempfile
import matplotlib.pyplot as plt
from keras import layers
import pandas as pd

**Define preliminaries**

In [3]:
def count_params(model):
    return model.count_params()

def estimate_flops_cnn(model, input_shape=(1, 28, 28, 1)):
    flops = 0
    
    x = tf.ones(input_shape, dtype=tf.float32)
    
    print("\n[FLOPs] Tracing layer shapes...")
    for layer in model.layers:
        if isinstance(layer, (layers.Conv2D, layers.Dense)):
            
            try:
                current_input_shape = x.shape
            except Exception:
                continue

            if isinstance(layer, layers.Conv2D):
                _, h, w, c_in = current_input_shape
                k_h, k_w = layer.kernel_size
                c_out = layer.filters
                
                layer_flops = int(h) * int(w) * int(c_out) * (k_h * k_w * int(c_in) * 2)
                flops += layer_flops
                
            elif isinstance(layer, layers.Dense):
                in_dim = current_input_shape[-1]
                out_dim = layer.units
                
                layer_flops = int(in_dim) * int(out_dim) * 2
                flops += layer_flops


        try:
            x = layer(x)
        except Exception as e:
            print(f"  [FLOPs Tracer Error] Failed to trace shape through {layer.name}. Stopping FLOPs calculation.")
            return -1
            
    return flops

def get_model_size_kb(model, fname="temp_model_weights.weights.h5"):
    model.save_weights(fname)
    size_kb = os.path.getsize(fname) / 1024
    os.remove(fname)
    return size_kb

def benchmark_inference(model, test_ds):
    print("\n[Benchmark] Inference (evaluation):")
    start = time.time()
    loss, acc = model.evaluate(test_ds, verbose=0)
    end = time.time()
    elapsed = end - start
    print(f"  Test loss: {loss:.4f}, Test accuracy: {acc:.4f}")
    print(f"  Evaluation time: {elapsed:.3f} s")
    return loss, acc, elapsed

def measure_energy_inference(model, sample_input, repeats=200):
    if not CODECARBON_AVAILABLE:
        return None
    try:
        tracker = EmissionsTracker(log_level="error")
        tracker.start()
        for _ in range(repeats):
            _ = model(sample_input, training=False)
        emissions_kg = tracker.stop()
        return emissions_kg
    except Exception as e:
        print(f"CodeCarbon measurement failed: {e}")
        return None

def benchmark_training(model, train_ds, epochs=1):
    print("\n[Benchmark] Training:")
    start = time.time()
    history = model.fit(train_ds, epochs=epochs, verbose=1)
    end = time.time()
    elapsed = end - start
    print(f"  Training time for {epochs} epoch(s): {elapsed:.3f} s")
    return history, elapsed

results = {}

**Data**

In [4]:
def download_and_load_fashion_mnist(batch_size=128):
    base_url = "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/"
    files = {
        "train_images": "train-images-idx3-ubyte.gz",
        "train_labels": "train-labels-idx1-ubyte.gz",
        "test_images": "t10k-images-idx3-ubyte.gz",
        "test_labels": "t10k-labels-idx1-ubyte.gz",
    }
    
    data_dir = "./fashion_mnist_data"
    os.makedirs(data_dir, exist_ok=True)

    for key, filename in files.items():
        url = base_url + filename
        gz_path = os.path.join(data_dir, filename)
        final_path = os.path.join(data_dir, filename.replace(".gz", ""))

        if os.path.exists(final_path):
            continue
            
        try:
            r = requests.get(url, stream=True)
            r.raise_for_status()
            
            with open(gz_path, 'wb') as f:
                f.write(r.content)
            
            with gzip.open(gz_path, 'rb') as f_in:
                with open(final_path, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)
            os.remove(gz_path)
            
        except requests.exceptions.RequestException as e:
            print(f"\nerror downloading {filename}: {e}")
            return None, None, None 
    
    def load_idx_file(path, is_label):
        with open(path, 'rb') as f:
            header_size = 8 if is_label else 16
            f.read(header_size)
            
            data = np.frombuffer(f.read(), dtype=np.uint8)
            
            if not is_label:
                return data.reshape(-1, 28, 28)
            return data

    x_train = load_idx_file(os.path.join(data_dir, files["train_images"].replace(".gz", "")), False)
    y_train = load_idx_file(os.path.join(data_dir, files["train_labels"].replace(".gz", "")), True)
    x_test = load_idx_file(os.path.join(data_dir, files["test_images"].replace(".gz", "")), False)
    y_test = load_idx_file(os.path.join(data_dir, files["test_labels"].replace(".gz", "")), True)

    x_train = (x_train.astype("float32") / 255.0)[..., None]
    x_test = (x_test.astype("float32") / 255.0)[..., None]

    num_classes = 10
    y_train_cat = keras.utils.to_categorical(y_train, num_classes)
    y_test_cat = keras.utils.to_categorical(y_test, num_classes)

    train_ds = (
        tf.data.Dataset.from_tensor_slices((x_train, y_train_cat))
        .shuffle(60_000)
        .batch(batch_size)
    )
    test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test_cat)).batch(batch_size)

    return train_ds, test_ds, (x_train, y_train_cat, x_test, y_test_cat)


train_ds, test_ds, raw_data = download_and_load_fashion_mnist()

if raw_data is not None:
    x_train, y_train_cat, x_test, y_test_cat = raw_data
    print("fashion-MNIST loaded")
    print("train set shape:", x_train.shape)
    print("test set shape:", x_test.shape)
else:
    print("\ndata loading failed")

fashion-MNIST loaded
train set shape: (60000, 28, 28, 1)
test set shape: (10000, 28, 28, 1)


**Create a baseline**

In [5]:
def build_baseline_model():
    inputs = keras.Input(shape=(28, 28, 1))
    x = layers.Conv2D(32, 3, activation="relu")(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, 3, activation="relu")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation="relu")(x)
    outputs = layers.Dense(10, activation="softmax")(x)
    model = keras.Model(inputs, outputs, name="baseline_cnn")
    return model

baseline_model = build_baseline_model()
baseline_model.compile(
    loss="categorical_crossentropy",
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)
baseline_model.summary()


print("\nTraining Baseline Model (1 epoch)")
_, baseline_train_time = benchmark_training(baseline_model, train_ds, epochs=1) 

baseline_loss, baseline_acc, baseline_eval_time = benchmark_inference(baseline_model, test_ds)
baseline_params = count_params(baseline_model)
baseline_flops = estimate_flops_cnn(baseline_model, input_shape=(1, 28, 28, 1)) 
baseline_size_kb = get_model_size_kb(baseline_model, fname="baseline_weights.weights.h5")

sample_input = tf.convert_to_tensor(x_test[:1])
try:
    from codecarbon import EmissionsTracker
    CODECARBON_AVAILABLE = True
except ImportError:
    CODECARBON_AVAILABLE = False

baseline_emissions = measure_energy_inference(baseline_model, sample_input, repeats=200)

results["baseline"] = {
    "accuracy": baseline_acc,
    "eval_time_s": baseline_eval_time,
    "params": baseline_params,
    "flops": baseline_flops,
    "size_kb": baseline_size_kb,
    "emissions_kg": baseline_emissions,
}
print("\nBaseline Metrics Stored")
print(results["baseline"])


Training Baseline Model (1 epoch)

[Benchmark] Training:
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 49ms/step - accuracy: 0.8066 - loss: 0.5424
  Training time for 1 epoch(s): 26.109 s

[Benchmark] Inference (evaluation):
  Test loss: 0.3937, Test accuracy: 0.8596
  Evaluation time: 1.294 s

[FLOPs] Tracing layer shapes...
  [FLOPs Tracer Error] Failed to trace shape through input_layer. Stopping FLOPs calculation.

Baseline Metrics Stored
{'accuracy': 0.8596000075340271, 'eval_time_s': 1.2938241958618164, 'params': 225034, 'flops': -1, 'size_kb': 2670.1484375, 'emissions_kg': None}


**Pruning**

In [6]:
def manual_prune_model(model, sparsity_target=0.5):
    print(f"\nApplying manual pruning with target sparsity: {sparsity_target*100:.0f}%...")
    
    all_weights = []
    layer_weight_shapes = []
    
    for layer in model.layers:
        weights = layer.get_weights()
        for w in weights:
            if w.ndim > 1:
                all_weights.extend(w.flatten().tolist())
                layer_weight_shapes.append(w.shape)
    
    all_weights = np.array(all_weights)
    
    prune_count = int(len(all_weights) * sparsity_target)
    
    weight_magnitudes = np.abs(all_weights)
    
    threshold = np.partition(weight_magnitudes, prune_count)[prune_count]
    
    new_model = keras.models.clone_model(model)
    new_model.set_weights(model.get_weights())
    
    new_model_weights = []
    
    current_idx = 0
    for layer in new_model.layers:
        weights = layer.get_weights()
        new_layer_weights = []
        
        for w in weights:
            if w.ndim > 1:
                flat_w = w.flatten()
                
                w_segment = all_weights[current_idx : current_idx + len(flat_w)]
                current_idx += len(flat_w)
                
                mask = np.abs(w_segment) > threshold
                
                pruned_flat_w = w_segment * mask
                
                pruned_w = pruned_flat_w.reshape(w.shape)
                new_layer_weights.append(pruned_w)
            else:
                new_layer_weights.append(w)
        
        layer.set_weights(new_layer_weights)
        
    print(f"Pruning complete. Model now has approximately {sparsity_target*100:.0f}% zeros.")
    return new_model

In [7]:
sample_input = tf.convert_to_tensor(x_test[:1])

try:
    from codecarbon import EmissionsTracker
    CODECARBON_AVAILABLE = True
except ImportError:
    CODECARBON_AVAILABLE = False

pruned_model_manual = manual_prune_model(baseline_model, sparsity_target=0.5)

_, pruned_acc, pruned_eval_time = benchmark_inference(pruned_model_manual, test_ds)

pruned_params = count_params(pruned_model_manual)
pruned_flops = estimate_flops_cnn(pruned_model_manual, input_shape=(1, 28, 28, 1))

pruned_size_kb = get_model_size_kb(pruned_model_manual, fname="pruned_manual_weights.weights.h5") 
pruned_emissions = measure_energy_inference(pruned_model_manual, sample_input, repeats=200)

results["pruned_manual"] = {
    "accuracy": pruned_acc,
    "eval_time_s": pruned_eval_time,
    "params": pruned_params,
    "flops": pruned_flops,
    "size_kb": pruned_size_kb,
    "emissions_kg": pruned_emissions,
}
print("\nManual Pruned Metrics")
print(results["pruned_manual"])


Applying manual pruning with target sparsity: 50%...
Pruning complete. Model now has approximately 50% zeros.

[Benchmark] Inference (evaluation):
  Test loss: 0.4073, Test accuracy: 0.8549
  Evaluation time: 1.499 s

[FLOPs] Tracing layer shapes...
  [FLOPs Tracer Error] Failed to trace shape through keras_tensor. Stopping FLOPs calculation.

Manual Pruned Metrics
{'accuracy': 0.8549000024795532, 'eval_time_s': 1.4990239143371582, 'params': 225034, 'flops': -1, 'size_kb': 2670.1484375, 'emissions_kg': None}


**Knowledge distillation**

In [8]:
def build_teacher_model():
    inputs = keras.Input(shape=(28, 28, 1))
    x = layers.Conv2D(64, 3, activation="relu")(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, 3, activation="relu")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(256, activation="relu")(x)
    outputs = layers.Dense(10, activation="softmax")(x)
    return keras.Model(inputs, outputs, name="teacher")

def build_student_model():
    inputs = keras.Input(shape=(28, 28, 1))
    x = layers.Conv2D(16, 3, activation="relu")(inputs)
    x = layers.MaxPooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(32, activation="relu")(x)
    outputs = layers.Dense(10, activation="softmax")(x)
    return keras.Model(inputs, outputs, name="student")


class Distiller(keras.Model):
    def __init__(self, student, teacher, temperature, alpha):
        super().__init__()
        self.teacher = teacher
        self.student = student
        self.temperature = temperature
        self.alpha = alpha
        self.beta = 1.0 - alpha
        self.teacher.trainable = False

    def compile(self, optimizer, metrics=None, student_loss_fn=None, distillation_loss_fn=None):
        super().compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn or keras.losses.CategoricalCrossentropy(from_logits=False) 
        self.distillation_loss_fn = distillation_loss_fn or keras.losses.KLDivergence()

    def call(self, inputs):
        return self.student(inputs)

    def train_step(self, data):
        x, y = data

        with tf.GradientTape() as tape:
            teacher_predictions = self.teacher(x, training=False)
            student_predictions = self.student(x, training=True)

            soft_targets = tf.nn.softmax(teacher_predictions / self.temperature)
            soft_student_predictions = tf.nn.softmax(student_predictions / self.temperature)

            student_loss = self.student_loss_fn(y, student_predictions)

            distillation_loss = self.distillation_loss_fn(soft_targets, soft_student_predictions) * (self.temperature**2)

            total_loss = (self.alpha * distillation_loss) + (self.beta * student_loss)

        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(total_loss, trainable_vars)
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        self.compiled_metrics.update_state(y, student_predictions)
        return {m.name: m.result() for m in self.metrics}

    def test_step(self, data):
        x, y = data
        student_predictions = self.student(x, training=False)
        self.compiled_metrics.update_state(y, student_predictions)
        return {m.name: m.result() for m in self.metrics}

In [9]:
teacher = build_teacher_model()
teacher.compile(optimizer=keras.optimizers.Adam(), loss="categorical_crossentropy", metrics=["accuracy"])
print("\nTraining Teacher Model (1 epoch)")
teacher.fit(train_ds, epochs=1, verbose=1)

student = build_student_model()

distiller = Distiller(student=student, teacher=teacher, temperature=3.0, alpha=0.9) 
distiller.compile(
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.CategoricalAccuracy(name='accuracy')] 
)
print("\nTraining Student with Distillation (1 epoch)")
distiller.fit(train_ds, epochs=1, verbose=1)

student_metrics = distiller.evaluate(test_ds, verbose=0, return_dict=True)
print("Actual metrics keys returned:", student_metrics.keys()) 

try:
    student_acc = student_metrics["accuracy"]
except KeyError:
    try:
        student_acc = student_metrics["categorical_accuracy"]
    except KeyError:
        student_acc = list(student_metrics.values())[1]

        
student.compile(optimizer=keras.optimizers.Adam(), loss="categorical_crossentropy", metrics=["accuracy"])

student_params = count_params(student)
student_flops = estimate_flops_cnn(student, input_shape=(1, 28, 28, 1))
student_size_kb = get_model_size_kb(student, fname="student_model_weights.weights.h5")
_, _, student_eval_time = benchmark_inference(student, test_ds)
student_emissions = measure_energy_inference(student, sample_input, repeats=200)

results["student"] = {
    "accuracy": student_acc,
    "eval_time_s": student_eval_time,
    "params": student_params,
    "flops": student_flops,
    "size_kb": student_size_kb,
    "emissions_kg": student_emissions,
}
print("\nStudent (Distilled) Metrics")
print(results["student"])


Training Teacher Model (1 epoch)
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 82ms/step - accuracy: 0.8236 - loss: 0.4880

Training Student with Distillation (1 epoch)


```
for metric in self.metrics:
    metric.update_state(y, y_pred)
```

  return self._compiled_metrics_update_state(


[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 38ms/step - accuracy: 0.8051 - loss: 0.1000
Actual metrics keys returned: dict_keys(['loss', 'compile_metrics'])

[FLOPs] Tracing layer shapes...
  [FLOPs Tracer Error] Failed to trace shape through input_layer_2. Stopping FLOPs calculation.

[Benchmark] Inference (evaluation):
  Test loss: 0.4343, Test accuracy: 0.8437
  Evaluation time: 0.778 s

Student (Distilled) Metrics
{'accuracy': {'accuracy': <tf.Tensor: shape=(), dtype=float32, numpy=0.8437>}, 'eval_time_s': 0.7775588035583496, 'params': 87050, 'flops': -1, 'size_kb': 364.8203125, 'emissions_kg': None}


**Quantization**

In [10]:
# TFLite helper functions
def convert_to_tflite_int8(model, tflite_path, representative_dataset):
    
    @tf.function
    def serving_fn(inputs):
        return model(inputs)

    concrete_func = serving_fn.get_concrete_function(
        tf.TensorSpec([1, 28, 28, 1], dtype=tf.float32)
    )
    
    converter = tf.lite.TFLiteConverter.from_concrete_functions(
        [concrete_func], model
    )
    
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    
    def representative_dataset_gen():
        for i in range(len(representative_dataset)):
            yield [representative_dataset[i][None].astype(np.float32)]

    converter.representative_dataset = representative_dataset_gen
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.uint8
    converter.inference_output_type = tf.uint8
    
    tflite_model = converter.convert()
    with open(tflite_path, 'wb') as f:
        f.write(tflite_model)
    print(f"TFLite INT8 model saved to {tflite_path}")
            
    
def benchmark_tflite_model(tflite_model_path, test_samples, repeats=100):
    interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]
    
    input_scale, input_zero_point = input_details['quantization']

    latencies = []
    
    for i in range(min(repeats, len(test_samples))):
        
        input_data = (test_samples[i] / input_scale) + input_zero_point
        input_data = input_data.astype(input_details['dtype'])
        
        input_data = np.expand_dims(input_data, axis=0)
        
        interpreter.set_tensor(input_details['index'], input_data)
        start_time = time.time()
        interpreter.invoke()
        end_time = time.time()
        
        latencies.append(end_time - start_time)

    avg_latency = np.mean(latencies)
    print(f"  Average TFLite inference latency (s/example): {avg_latency:.6f}")
    return avg_latency

In [11]:
x_train_norm, _, x_test_norm, _ = raw_data
x_train_raw = (x_train_norm * 255).astype(np.uint8)

tmp_dir = tempfile.mkdtemp()
int8_path = os.path.join(tmp_dir, "student_int8.tflite")

print("\nConverting Distilled Model to TFLite INT8")
convert_to_tflite_int8(student, int8_path, x_train_raw[:200])

size_int8 = os.path.getsize(int8_path) / 1024
print(f"\n[Quantization] INT8 TFLite model size: {size_int8:.1f} KB")

print("\nBenchmarking TFLite INT8 model on test samples...")
tflite_int8_eval_time = benchmark_tflite_model(int8_path, x_test_norm[:100])

results["tflite_int8"] = {
    "accuracy": student_acc, 
    "eval_time_s": tflite_int8_eval_time,
    "params": student_params,
    "flops": student_flops,
    "size_kb": size_int8,
    "emissions_kg": None,
}
print("\nTFLite INT8 Metrics")
print(results["tflite_int8"])


Converting Distilled Model to TFLite INT8


2025-12-05 21:26:31.247291: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)


TFLite INT8 model saved to /var/folders/70/51dkvlqs4w75nw7lc2c9wbk80000gn/T/tmp6v4e25d3/student_int8.tflite

[Quantization] INT8 TFLite model size: 88.6 KB

Benchmarking TFLite INT8 model on test samples...
  Average TFLite inference latency (s/example): 0.000015

TFLite INT8 Metrics
{'accuracy': {'accuracy': <tf.Tensor: shape=(), dtype=float32, numpy=0.8437>}, 'eval_time_s': 1.5423297882080078e-05, 'params': 87050, 'flops': -1, 'size_kb': 88.6328125, 'emissions_kg': None}


W0000 00:00:1764987991.375254 1461767 tf_tfl_flatbuffer_helpers.cc:390] Ignored output_format.
W0000 00:00:1764987991.377013 1461767 tf_tfl_flatbuffer_helpers.cc:393] Ignored drop_control_dependency.
fully_quantize: 0, inference_type: 6, input_inference_type: UINT8, output_inference_type: UINT8
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


**Benchmarking and evaluation**

In [12]:
from IPython.display import display

print("Model Compression Analysis")

df = pd.DataFrame(results).T 

df['flops'] = pd.to_numeric(df['flops'], errors='coerce')

df['MFLOPs'] = (df['flops'] / 1_000_000).round(2)
df = df.drop(columns=['flops'])

for col in df.columns:
    if col == 'emissions_kg':
         df[col] = df[col].apply(lambda x: round(x, 5) if isinstance(x, (int, float)) else x)
    else:
         df[col] = pd.to_numeric(df[col], errors='coerce').round(4)

df = df[['accuracy', 'size_kb', 'params', 'MFLOPs', 'eval_time_s', 'emissions_kg']]

print("\nSummary of Model Compression Metrics:")
display(df)

print("\nConclusion and Final Recommendation")

baseline_params = df.loc['baseline', 'params']
baseline_size = df.loc['baseline', 'size_kb']

df['Size Reduction (%)'] = ((baseline_size - df['size_kb']) / baseline_size * 100).round(1)
df['Param Reduction (%)'] = ((baseline_params - df['params']) / baseline_params * 100).round(1)
df['Accuracy Drop (%)'] = ((df.loc['baseline', 'accuracy'] - df['accuracy']) / df.loc['baseline', 'accuracy'] * 100).round(2)

df_clean = df.dropna(subset=['accuracy', 'size_kb']) 
if 'baseline' in df_clean.index:
    df_compressed = df_clean.drop('baseline')
else:
    df_compressed = df_clean

if not df_compressed.empty:
    
    print("\n**Decision Criteria:** We prioritize maximum size reduction while keeping the accuracy drop below 3%.")

    if 'tflite_int8' in df_compressed.index and df_compressed.loc['tflite_int8', 'Accuracy Drop (%)'] < 3.0:
        final_choice = 'TFLite INT8 Quantized Model'
        final_reason = (
            f"This model, a result of **Distillation + Quantization**, achieves the **maximum file size reduction** "
            f"({df.loc['tflite_int8', 'size_kb']:.1f} KB) and offers significant inference speedup, "
            f"with a minimal accuracy drop of {df.loc['tflite_int8', 'Accuracy Drop (%)']:.2f}%."
        )
    elif 'student' in df_compressed.index and df_compressed.loc['student', 'Accuracy Drop (%)'] < 3.0:
        final_choice = 'Distilled Student Model'
        final_reason = (
            f"It provided a large reduction in parameters and FLOPs ({df.loc['student', 'MFLOPs']:.2f} MFLOPs) while "
            f"maintaining high accuracy (Accuracy Drop: {df.loc['student', 'Accuracy Drop (%)']:.2f}%), "
            f"making it the best trade-off for platforms requiring standard Keras/TF format."
        )
    elif 'pruned_manual' in df_compressed.index and df_compressed.loc['pruned_manual', 'Accuracy Drop (%)'] < 3.0:
        final_choice = 'Manually Pruned Model'
        final_reason = (
            "This model successfully removed 50% of the parameters without reducing file size (due to sparse storage), "
            f"but maintained competitive accuracy (Drop: {df.loc['pruned_manual', 'Accuracy Drop (%)']:.2f}%), "
            "demonstrating effective structural simplification."
        )
    else:
        final_choice = 'Baseline Model'
        final_reason = "No compression technique met the 3% accuracy drop threshold. The Baseline Model is the only viable choice."


    print(f"\n**Final Recommended Model:** **{final_choice}**")
    print(f"**Reasoning:** {final_reason}")
else:
    print("\nCould not generate final recommendation due to missing metric data (check if any key failed to populate).")

Model Compression Analysis

Summary of Model Compression Metrics:


Unnamed: 0,accuracy,size_kb,params,MFLOPs,eval_time_s,emissions_kg
baseline,0.8596,2670.1484,225034.0,-0.0,1.2938,
pruned_manual,0.8549,2670.1484,225034.0,-0.0,1.499,
student,,364.8203,87050.0,-0.0,0.7776,
tflite_int8,,88.6328,87050.0,-0.0,0.0,



Conclusion and Final Recommendation

**Decision Criteria:** We prioritize maximum size reduction while keeping the accuracy drop below 3%.

**Final Recommended Model:** **Manually Pruned Model**
**Reasoning:** This model successfully removed 50% of the parameters without reducing file size (due to sparse storage), but maintained competitive accuracy (Drop: 0.55%), demonstrating effective structural simplification.


**Summary of all models**

### 2. Extension - Compressing a model in non-supervised learning

In [13]:
import pandas as pd
from IPython.display import display

print("Final Analysis of All Compressed Models")

df = pd.DataFrame(results).T 

df['flops'] = pd.to_numeric(df['flops'], errors='coerce')
df['MFLOPs'] = (df['flops'] / 1_000_000).round(2)
df = df.drop(columns=['flops'])
for col in df.columns:
    if col == 'emissions_kg':
         df[col] = df[col].apply(lambda x: round(x, 5) if isinstance(x, (int, float)) else x)
    else:
         df[col] = pd.to_numeric(df[col], errors='coerce').round(4)

baseline_size = df.loc['baseline', 'size_kb']
df['Size Reduction (%)'] = ((baseline_size - df['size_kb']) / baseline_size * 100).round(1)

df_final = df[['accuracy', 'size_kb', 'Size Reduction (%)', 'eval_time_s', 'MFLOPs', 'emissions_kg']]

print("\nFinal Summary Table :")
display(df_final)

print("\n Conclusion")

tflite_acc = df_final.loc['tflite_int8', 'accuracy']
tflite_size_red = df_final.loc['tflite_int8', 'Size Reduction (%)']
baseline_acc = df_final.loc['baseline', 'accuracy']

print(f"The most successful single technique was Knowledge Distillation, resulting in a small model (student) that retained high accuracy ({df_final.loc['student', 'accuracy']}).")
print(f"\nHowever, the combination of Distillation + Quantization (TFLite INT8) is the overall best for deployment.")
print(f"| Metric | Baseline | TFLite INT8 |")
print(f"| :--- | :--- | :--- |")
print(f"| Accuracy | {baseline_acc:.4f} | {tflite_acc:.4f} |")
print(f"| File Size | {baseline_size:.1f} KB | {df_final.loc['tflite_int8', 'size_kb']:.1f} KB |")
print(f"| Reduction | N/A | {tflite_size_red:.1f}% |")

Final Analysis of All Compressed Models

Final Summary Table :


Unnamed: 0,accuracy,size_kb,Size Reduction (%),eval_time_s,MFLOPs,emissions_kg
baseline,0.8596,2670.1484,0.0,1.2938,-0.0,
pruned_manual,0.8549,2670.1484,0.0,1.499,-0.0,
student,,364.8203,86.3,0.7776,-0.0,
tflite_int8,,88.6328,96.7,0.0,-0.0,



 Conclusion
The most successful single technique was Knowledge Distillation, resulting in a small model (student) that retained high accuracy (nan).

However, the combination of Distillation + Quantization (TFLite INT8) is the overall best for deployment.
| Metric | Baseline | TFLite INT8 |
| :--- | :--- | :--- |
| Accuracy | 0.8596 | nan |
| File Size | 2670.1 KB | 88.6 KB |
| Reduction | N/A | 96.7% |


In [14]:
print(results.keys())
print(results.get('student'))

dict_keys(['baseline', 'pruned_manual', 'student', 'tflite_int8'])
{'accuracy': {'accuracy': <tf.Tensor: shape=(), dtype=float32, numpy=0.8437>}, 'eval_time_s': 0.7775588035583496, 'params': 87050, 'flops': -1, 'size_kb': 364.8203125, 'emissions_kg': None}
