In [27]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
import shutil

# Step 1: Load the MNIST dataset (handwritten digits 0-9)
mnist = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the data (scale pixel values between 0 and 1)
x_train, x_test = x_train / 255.0, x_test / 255.0

# Step 2: Define a simple neural network model
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),  # Flatten 28x28 images into a 1D array
    keras.layers.Dense(128, activation='relu'),  # Dense hidden layer with ReLU activation
    keras.layers.Dropout(0.2),  # Dropout to reduce overfitting (randomly drops 20% of neurons)
    keras.layers.Dense(10)  # Output layer (10 classes for digits 0-9)
])

# Step 3: Compile the model
model.compile(optimizer='adam',  # Adam optimizer (efficient for deep learning)
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),  # Cross-entropy loss for classification
              metrics=['accuracy'])  # Track accuracy during training

# Step 4: Train the model for 1 epoch
model.fit(x_train, y_train, epochs=1, validation_data=(x_test, y_test))

# Step 5: Save the trained model in TensorFlow's "SavedModel" format
model.save('original_model')

# Step 6: Zip the saved model to measure its actual size
shutil.make_archive('original_model', 'zip', 'original_model')

# Get the size of the zipped model
original_size = os.path.getsize('original_model.zip')

# Step 7: Define a function to generate representative data for quantization
# This helps the model adapt to typical input patterns
def representative_data_gen():
    for i in range(100):  # Use the first 100 samples from the training set
        yield [x_train[i:i+1].astype(np.float32)]  # Return batches of 1 sample each

# Step 8: Convert the original model to TensorFlow Lite format (for mobile/embedded devices)
converter = tf.lite.TFLiteConverter.from_saved_model('original_model')

# Apply quantization (optimize model to reduce size and increase speed)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Use the representative dataset to guide the quantization process
converter.representative_dataset = representative_data_gen

# Force the model to use 8-bit integer operations (smallest size)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

# Convert the model and store it in memory
quantized_model = converter.convert()

# Step 9: Save the quantized model as a .tflite file
with open('quantized_model.tflite', 'wb') as f:
    f.write(quantized_model)

# Step 10: Measure the size of the quantized model
quantized_size = os.path.getsize('quantized_model.tflite')

# Step 11: Print and compare the sizes of the original and quantized models
print(f"Original model size (zipped): {original_size / 1024:.2f} KB")
print(f"Quantized model size: {quantized_size / 1024:.2f} KB")
print(f"Size reduction: {(1 - quantized_size / original_size) * 100:.2f}%")


Original model size (zipped): 1054.60 KB
Quantized model size: 102.17 KB
Size reduction: 90.31%
