## CNN model for MNIST dataset

### Prepare Models

In [22]:
import numpy as np
import torch
import tensorflow as tf

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [23]:
import numpy as np
import torch

# Load TensorFlow MNIST data
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images_tf = train_images / 255.0
test_images_tf = test_images / 255.0
train_images_tf = train_images_tf.reshape(train_images.shape[0], 28, 28, 1)
test_images_tf = test_images_tf.reshape(test_images.shape[0], 28, 28, 1)

train_images_tf_14 = tf.image.resize(train_images_tf, [14, 14]).numpy()
test_images_tf_14 = tf.image.resize(test_images_tf, [14, 14]).numpy()


In [24]:
# Convert to PyTorch format [batch_size, channels, height, width]
train_images_pt = torch.tensor(train_images_tf).permute(0, 3, 1, 2).float()
test_images_pt = torch.tensor(test_images_tf).permute(0, 3, 1, 2).float()


train_images_pt_14 =  torch.tensor(test_images_tf_14).permute(0, 3, 1, 2).float()
test_images_pt_14 =  torch.tensor(test_images_tf_14).permute(0, 3, 1, 2).float()

## 5_11_80_10 CNN Model for 14x14 input and 3x3 kernel

In [25]:
layers = [14, 5, 11, 80, 10, 3]

# Define the LeNet model in TensorFlow
model_tf = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(5, 3, activation='relu', input_shape=(layers[0], layers[0], 1)),
    tf.keras.layers.AvgPool2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(11, 3, activation='relu'),
    tf.keras.layers.AvgPool2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(80, activation = 'relu'),
    tf.keras.layers.Dense(10)  # Assuming 10 classes
])

# Compile the model
model_tf.compile(optimizer='adam',
                 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                 metrics=['accuracy'])

model_tf.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 12, 12, 5)         50        
                                                                 
 average_pooling2d_4 (Avera  (None, 6, 6, 5)           0         
 gePooling2D)                                                    
                                                                 
 conv2d_5 (Conv2D)           (None, 4, 4, 11)          506       
                                                                 
 average_pooling2d_5 (Avera  (None, 2, 2, 11)          0         
 gePooling2D)                                                    
                                                                 
 flatten_2 (Flatten)         (None, 44)                0         
                                                                 
 dense_6 (Dense)             (None, 80)               

In [27]:
# Train the model
history = model_tf.fit(train_images_tf_14, train_labels, epochs=1, batch_size=32, validation_split=0.1)



In [28]:
# Evaluate the model
test_loss, test_acc = model_tf.evaluate(test_images_tf_14, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)

313/313 - 0s - loss: 0.1611 - accuracy: 0.9487 - 317ms/epoch - 1ms/step

Test accuracy: 0.9487000107765198


### Convert it to Pytorch

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F

layers = [14, 5, 11, 80, 10, 3]
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Convolutional encoder
        self.conv1 = nn.Conv2d(1, layers[1], layers[-1]) 
        self.conv2 = nn.Conv2d(layers[1], layers[2], layers[-1]) 

        # Fully connected layers / Dense block
        self.fc1 = nn.Linear(11 * 2 * 2, layers[3]) # 256 * 120
        self.fc2 = nn.Linear(layers[3], layers[4])

    def forward(self, x):
        # Convolutional block
        x = F.avg_pool2d(F.relu(self.conv1(x)), (2, 2)) # Convolution -> Sigmoid -> Avg Pool
        x = F.avg_pool2d(F.relu(self.conv2(x)), (2, 2)) # Convolution -> Sigmoid -> Avg Pool

        # TODO: figure out the resize, currently work on batch_size = 1
        batch_size = x.size(0)
        x = x.reshape(x.size(0),layers[2],-1)  # 16 output channels
        x = np.transpose(x, (0,2,1)).reshape(batch_size,-1)
        #x = x.reshape(batch_size,-1)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)  # No activation function here, will use CrossEntropyLoss later
        return x

model_pt = Net()

In [30]:
# Transfer weights for the first Conv2D layer from model_tf to model_pt
weights, biases = model_tf.layers[0].get_weights()
model_pt.conv1.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (3, 2, 0, 1))))
model_pt.conv1.bias = nn.Parameter(torch.from_numpy(biases))

# Transfer weights for the second Conv2D layer from model_tf to model_pt
weights, biases = model_tf.layers[2].get_weights()
model_pt.conv2.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (3, 2, 0, 1))))
model_pt.conv2.bias = nn.Parameter(torch.from_numpy(biases))

# Transfer weights for the first dense layer (fc1) from model_tf to model_pt
weights, biases = model_tf.layers[5].get_weights()
model_pt.fc1.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (1, 0))))
model_pt.fc1.bias = nn.Parameter(torch.from_numpy(biases))

# Transfer weights for the second dense layer (fc2) from model_tf to model_pt
weights, biases = model_tf.layers[6].get_weights()
model_pt.fc2.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (1, 0))))
model_pt.fc2.bias = nn.Parameter(torch.from_numpy(biases))

In [31]:
# Select the image for TensorFlow and PyTorch
controlled_input_tf = test_images_tf_14[189:190]  # Reshape to (1, 784) for DNN
controlled_input_pt = test_images_pt_14[189:190]

# Test TensorFlow Model
output_tf = model_tf.predict(controlled_input_tf) 
print("TensorFlow Basic Model Output:", output_tf)

# Test PyTorch Model
model_pt.eval()  # Set PyTorch model to evaluation mode
with torch.no_grad():
    output_pt = model_pt(controlled_input_pt)
print("PyTorch Basic Model Output:", output_pt)

TensorFlow Basic Model Output: [[-4.233818    3.8901105  -1.0666226   0.47903582 -0.4173532   0.30218112
  -2.0122645  -1.617805    0.09114771 -1.5917833 ]]
PyTorch Basic Model Output: tensor([[-4.2338,  3.8901, -1.0666,  0.4790, -0.4174,  0.3022, -2.0123, -1.6178,
          0.0911, -1.5918]])


In [32]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Assuming test_labels are already loaded
test_dataset = TensorDataset(test_images_pt_14, torch.tensor(test_labels))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in data_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Evaluate the model on the test set
accuracy = evaluate_model(model_pt, test_loader)
print(f'Accuracy of the model on the test images: {accuracy:.2f}%')

Accuracy of the model on the test images: 94.87%


In [33]:
def get_predictions_tf(model, test_images, batch_size=256):
    predictions = []
    for i in range(0, len(test_images), batch_size):
        batch = test_images[i:i+batch_size]
        pred = model.predict(batch)
        predictions.extend(np.argmax(pred, axis=1))
    return predictions

def get_predictions_pt(model, test_images, batch_size=256):
    model.eval()
    predictions = []
    with torch.no_grad():
        for i in range(0, len(test_images), batch_size):
            batch = test_images[i:i+batch_size]
            pred = model(batch)
            predictions.extend(torch.argmax(pred, axis=1).tolist())
    return predictions

In [34]:
# Generate predictions
predictions_tf = get_predictions_tf(model_tf, test_images_tf_14)
predictions_pt = get_predictions_pt(model_pt, test_images_pt_14)

# Compare predictions
mismatches = sum(p1 != p2 for p1, p2 in zip(predictions_tf, predictions_pt))
print(f"Number of mismatches: {mismatches} out of {len(test_images)} samples")

Number of mismatches: 0 out of 10000 samples


### Save Model

In [50]:
layers

[14, 5, 11, 80, 10, 3]

In [35]:
import os
# Tensorflow
arch_folder = "./input-conv2d-conv2d-dense-dense/"
os.makedirs(arch_folder, exist_ok=True)

model_name = "_".join([str(x) for x in layers])
model_tf.save(arch_folder + model_name + '.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model_tf)
tflite_model = converter.convert()

with open(arch_folder + model_name + '.tflite', 'wb') as f:
    f.write(tflite_model)


  saving_api.save_model(


INFO:tensorflow:Assets written to: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpg6tl7vgj/assets


INFO:tensorflow:Assets written to: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpg6tl7vgj/assets
2024-07-31 19:49:35.184374: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-07-31 19:49:35.184549: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-07-31 19:49:35.188108: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpg6tl7vgj
2024-07-31 19:49:35.189756: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-07-31 19:49:35.189770: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpg6tl7vgj
2024-07-31 19:49:35.195667: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-07-31 19:49:35.197090: I tensorflow/cc/saved_model/load

In [36]:
# Save entire model
torch.save(model_pt, arch_folder + model_name + ".pt")
# Save only the state_dict
torch.save(model_pt.state_dict(), arch_folder + model_name + ".pth")
with torch.no_grad():
    torch.onnx.export(model_pt, controlled_input_pt, arch_folder + model_name + ".onnx")

## 784_6_16_10 Conv2d-Conv2D-Dense

### Tensorflow Model

In [37]:
# Define the LeNet model in TensorFlow
model_tf = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(6, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.AvgPool2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(16, kernel_size=(5, 5), activation='relu'),
    tf.keras.layers.AvgPool2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10)  # Assuming 10 classes
])

# Compile the model
model_tf.compile(optimizer='adam',
                 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                 metrics=['accuracy'])

# Train the model
history = model_tf.fit(train_images, train_labels, epochs=1, batch_size=32, validation_split=0.1)




In [38]:
model_tf.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 24, 24, 6)         156       
                                                                 
 average_pooling2d_6 (Avera  (None, 12, 12, 6)         0         
 gePooling2D)                                                    
                                                                 
 conv2d_7 (Conv2D)           (None, 8, 8, 16)          2416      
                                                                 
 average_pooling2d_7 (Avera  (None, 4, 4, 16)          0         
 gePooling2D)                                                    
                                                                 
 flatten_3 (Flatten)         (None, 256)               0         
                                                                 
 dense_8 (Dense)             (None, 10)               

In [39]:
# Evaluate the model
test_loss, test_acc = model_tf.evaluate(test_images, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)

313/313 - 0s - loss: 0.0906 - accuracy: 0.9728 - 391ms/epoch - 1ms/step

Test accuracy: 0.9728000164031982


### Convert to Pytorch

In [40]:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Convolutional encoder
        self.conv1 = nn.Conv2d(1, 6, 5)  # 1 input channel, 6 output channels, 5x5 kernel
        self.conv2 = nn.Conv2d(6, 16, 5) # 6 input channels, 16 output channels, 5x5 kernel

        # Fully connected layers / Dense block
        self.fc1 = nn.Linear(16 * 4 * 4, 10) # 256 * 120

    def forward(self, x):
        # Convolutional block
        x = F.avg_pool2d(F.relu(self.conv1(x)), (2, 2)) # Convolution -> Sigmoid -> Avg Pool
        x = F.avg_pool2d(F.relu(self.conv2(x)), (2, 2)) # Convolution -> Sigmoid -> Avg Pool

        # TODO: figure out the resize, currently work on batch_size = 1
        batch_size = x.size(0)
        x = x.reshape(x.size(0),16,-1)  # 16 output channels
        x = np.transpose(x, (0,2,1)).reshape(batch_size,-1)
        #x = x.reshape(batch_size,-1)

        # Fully connected layers
        x = self.fc1(x)  # No activation function here, will use CrossEntropyLoss later
        return x
    

model_pt = Net()

In [41]:
# Transfer weights for the first Conv2D layer from model_tf to model_pt
weights, biases = model_tf.layers[0].get_weights()
model_pt.conv1.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (3, 2, 0, 1))))
model_pt.conv1.bias = nn.Parameter(torch.from_numpy(biases))

# Transfer weights for the second Conv2D layer from model_tf to model_pt
weights, biases = model_tf.layers[2].get_weights()
model_pt.conv2.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (3, 2, 0, 1))))
model_pt.conv2.bias = nn.Parameter(torch.from_numpy(biases))

# Transfer weights for the first dense layer (fc1) from model_tf to model_pt
weights, biases = model_tf.layers[5].get_weights()
model_pt.fc1.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (1, 0))))
model_pt.fc1.bias = nn.Parameter(torch.from_numpy(biases))


In [48]:

# Select the image for TensorFlow
controlled_input_tf = test_images[36][np.newaxis, ]  # No reshape needed as it's already in (28, 28, 1) format
controlled_input_tf = np.expand_dims(controlled_input_tf, axis=-1)
# controlled_input_tf = np.expand_dims(test_images[36], axis=0)
print(controlled_input_tf.shape)
controlled_input_pt = torch.tensor(controlled_input_tf).float().permute(0, 3, 1, 2)

(1, 28, 28, 1)


In [49]:
# Test PyTorch Basic Model
model_pt.eval()  # Set PyTorch model to evaluation mode
with torch.no_grad():
    output_pt = model_pt(controlled_input_pt)

output_tf = model_tf.predict(controlled_input_tf) 
print("TF Basic Model Output:", output_tf)
print("PT Basic Model Output:", output_pt.cpu().numpy())

TF Basic Model Output: [[ -6.5935187   -4.563655     3.4562306    0.18523064 -15.529033
  -11.604303   -18.729162     8.666261    -5.6039987   -0.50239336]]
PT Basic Model Output: [[ -6.5935173   -4.5636544    3.4562306    0.18523028 -15.529034
  -11.604305   -18.729158     8.666257    -5.6039977   -0.502393  ]]


In [51]:
test_images.shape

(10000, 28, 28)

In [53]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Assuming the TensorFlow MNIST data has already been loaded
# Convert test_images to PyTorch tensor and permute
test_images_pt = torch.tensor(test_images).unsqueeze(-1).permute(0, 3, 1, 2).float()

# Assuming test_labels are already loaded
test_dataset = TensorDataset(test_images_pt, torch.tensor(test_labels))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in data_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Evaluate the model on the test set
accuracy = evaluate_model(model_pt, test_loader)
print(f'Accuracy of the model on the test images: {accuracy:.2f}%')

Accuracy of the model on the test images: 97.28%


In [54]:
def get_predictions_tf(model, test_images, batch_size=256):
    predictions = []
    for i in range(0, len(test_images), batch_size):
        batch = test_images[i:i+batch_size]
        pred = model.predict(batch)
        predictions.extend(np.argmax(pred, axis=1))
    return predictions

def get_predictions_pt(model, test_images, batch_size=256):
    model.eval()
    predictions = []
    with torch.no_grad():
        for i in range(0, len(test_images), batch_size):
            batch = test_images[i:i+batch_size]
            pred = model(batch)
            predictions.extend(torch.argmax(pred, axis=1).tolist())
    return predictions

In [57]:
# Generate predictions
# add channel dimension to test_images
predictions_tf = get_predictions_tf(model_tf, test_images.reshape(-1, 28, 28, 1))
predictions_pt = get_predictions_pt(model_pt, test_images_pt)

# Compare predictions
mismatches = sum(p1 != p2 for p1, p2 in zip(predictions_tf, predictions_pt))
print(f"Number of mismatches: {mismatches} out of {len(test_images)} samples")


Number of mismatches: 0 out of 10000 samples


#### Save Models for 784_6_16_10

In [61]:
import os
# Tensorflow
arch_folder = "./input-conv2d-conv2d-dense/"
os.makedirs(arch_folder, exist_ok=True)

model_name = "784_6_16_10"
model_tf.save(arch_folder + model_name + '.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model_tf)
tflite_model = converter.convert()

with open(arch_folder + model_name + '.tflite', 'wb') as f:
    f.write(tflite_model)


  saving_api.save_model(


INFO:tensorflow:Assets written to: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpbykbu5ca/assets


INFO:tensorflow:Assets written to: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpbykbu5ca/assets
2024-07-31 19:56:58.740321: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-07-31 19:56:58.740479: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-07-31 19:56:58.741760: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpbykbu5ca
2024-07-31 19:56:58.742828: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-07-31 19:56:58.742836: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpbykbu5ca
2024-07-31 19:56:58.747665: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-07-31 19:56:58.795887: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

In [62]:
# pytorch

# Save entire model
torch.save(model_pt, arch_folder + model_name + ".pt")
# Save only the state_dict
torch.save(model_pt.state_dict(), arch_folder + model_name + ".pth")
with torch.no_grad():
    torch.onnx.export(model_pt, controlled_input_pt, arch_folder + model_name + ".onnx")

## 784_6_16_120_84_10 Conv2d-Conv2D-Dense-Dense-Dense

### Prepare Model

In [63]:
# Define the LeNet model in TensorFlow
model_tf = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(6, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.AvgPool2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(16, kernel_size=(5, 5), activation='relu'),
    tf.keras.layers.AvgPool2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(120, activation='relu'),
    tf.keras.layers.Dense(84, activation='relu'),
    tf.keras.layers.Dense(10)  # Assuming 10 classes
])

# Compile the model
model_tf.compile(optimizer='adam',
                 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                 metrics=['accuracy'])

# Train the model
history = model_tf.fit(train_images, train_labels, epochs=1, batch_size=256, validation_split=0.1)




In [64]:
# Evaluate the model
test_loss, test_acc = model_tf.evaluate(test_images, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)

313/313 - 1s - loss: 0.0956 - accuracy: 0.9707 - 504ms/epoch - 2ms/step

Test accuracy: 0.9707000255584717


In [65]:
model_tf.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 24, 24, 6)         156       
                                                                 
 average_pooling2d_8 (Avera  (None, 12, 12, 6)         0         
 gePooling2D)                                                    
                                                                 
 conv2d_9 (Conv2D)           (None, 8, 8, 16)          2416      
                                                                 
 average_pooling2d_9 (Avera  (None, 4, 4, 16)          0         
 gePooling2D)                                                    
                                                                 
 flatten_4 (Flatten)         (None, 256)               0         
                                                                 
 dense_9 (Dense)             (None, 120)              

### Convert to Pytorch Model

In [66]:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Convolutional encoder
        self.conv1 = nn.Conv2d(1, 6, 5)  # 1 input channel, 6 output channels, 5x5 kernel
        self.conv2 = nn.Conv2d(6, 16, 5) # 6 input channels, 16 output channels, 5x5 kernel

        # Fully connected layers / Dense block
        self.fc1 = nn.Linear(16 *4 * 4,120) # 256 * 120
        self.fc2 = nn.Linear(120, 84)         # 120 inputs, 84 outputs
        self.fc3 = nn.Linear(84, 10)          # 84 inputs, 10 outputs (number of classes)

    def forward(self, x):
        # Convolutional block
        x = F.avg_pool2d(F.relu(self.conv1(x)), (2, 2)) # Convolution -> Sigmoid -> Avg Pool
        x = F.avg_pool2d(F.relu(self.conv2(x)), (2, 2)) # Convolution -> Sigmoid -> Avg Pool

        # TODO: figure out the resize, currently work on batch_size = 1
        batch_size = x.size(0)
        x = x.reshape(x.size(0),16,-1)  # 16 output channels
        x = np.transpose(x, (0,2,1)).reshape(batch_size,-1)
        #x = x.reshape(batch_size,-1)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # No activation function here, will use CrossEntropyLoss later
        return x
    

model_pt = Net()

In [67]:
# Transfer weights for the first Conv2D layer from model_tf to model_pt
weights, biases = model_tf.layers[0].get_weights()
model_pt.conv1.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (3, 2, 0, 1))))
model_pt.conv1.bias = nn.Parameter(torch.from_numpy(biases))

# Transfer weights for the second Conv2D layer from model_tf to model_pt
weights, biases = model_tf.layers[2].get_weights()
model_pt.conv2.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (3, 2, 0, 1))))
model_pt.conv2.bias = nn.Parameter(torch.from_numpy(biases))

# Transfer weights for the first dense layer (fc1) from model_tf to model_pt
weights, biases = model_tf.layers[5].get_weights()
model_pt.fc1.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (1, 0))))
model_pt.fc1.bias = nn.Parameter(torch.from_numpy(biases))

# Transfer weights for the second dense layer (fc2) from model_tf to model_pt
weights, biases = model_tf.layers[6].get_weights()
model_pt.fc2.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (1, 0))))
model_pt.fc2.bias = nn.Parameter(torch.from_numpy(biases))

# Transfer weights for the third dense layer (fc3) from model_tf to model_pt
weights, biases = model_tf.layers[7].get_weights()
model_pt.fc3.weight = nn.Parameter(torch.from_numpy(np.transpose(weights, (1, 0))))
model_pt.fc3.bias = nn.Parameter(torch.from_numpy(biases))

In [71]:
# Select the image for TensorFlow
controlled_input_tf = test_images[36][np.newaxis, ]  # No reshape needed as it's already in (28, 28, 1) format
controlled_input_tf = np.expand_dims(controlled_input_tf, axis=-1)
print(controlled_input_tf.shape)

# controlled_input_pt = torch.tensor(controlled_input_tf).float().permute(0, 3, 1, 2)
controlled_input_pt = torch.tensor(controlled_input_tf).float().permute(0, 3, 2, 1)
print(controlled_input_pt.shape)

(1, 28, 28, 1)
torch.Size([1, 1, 28, 28])


In [72]:
controlled_input_pt.shape

torch.Size([1, 1, 28, 28])

In [73]:
# Test PyTorch Basic Model
model_pt.eval()  # Set PyTorch model to evaluation mode
with torch.no_grad():
    output_pt = model_pt(controlled_input_pt)

output_tf = model_tf.predict(controlled_input_tf) 
print("TF Basic Model Output:", output_tf)
print("PT Basic Model Output:", output_pt.cpu().numpy())

TF Basic Model Output: [[-0.7721681  -3.6355472   8.680198    5.5364165  -4.2197127  -2.9623535
  -9.193442   12.8122635   0.41937524 -0.39005888]]
PT Basic Model Output: [[ 3.7063713  -0.5799919   1.5933546  -5.1392236   6.149688   -3.4218006
  10.209146   -0.81681114  2.215624    0.5799337 ]]


In [75]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Assuming the TensorFlow MNIST data has already been loaded
# Convert test_images to PyTorch tensor and permute
test_images_pt = torch.tensor(test_images).unsqueeze(-1).permute(0, 3, 1, 2).float()

# Assuming test_labels are already loaded
test_dataset = TensorDataset(test_images_pt, torch.tensor(test_labels))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in data_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Evaluate the model on the test set
accuracy = evaluate_model(model_pt, test_loader)
print(f'Accuracy of the model on the test images: {accuracy:.2f}%')

Accuracy of the model on the test images: 97.07%


In [76]:
# Generate predictions
predictions_tf = get_predictions_tf(model_tf, test_images.reshape(-1, 28, 28, 1))
predictions_pt = get_predictions_pt(model_pt, test_images_pt)

# Compare predictions
mismatches = sum(p1 != p2 for p1, p2 in zip(predictions_tf, predictions_pt))
print(f"Number of mismatches: {mismatches} out of {len(test_images)} samples")


Number of mismatches: 0 out of 10000 samples


#### Save Model for 784_6_16_120_84_10

In [77]:
import os
# Tensorflow
arch_folder = "./input-conv2d-conv2d-dense-dense-dense/"
os.makedirs(arch_folder, exist_ok=True)

model_name = "784_6_16_120_84_10"
model_tf.save(arch_folder + model_name + '.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model_tf)
tflite_model = converter.convert()

with open(arch_folder + model_name + '.tflite', 'wb') as f:
    f.write(tflite_model)


  saving_api.save_model(


INFO:tensorflow:Assets written to: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpl_lds1gm/assets


INFO:tensorflow:Assets written to: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpl_lds1gm/assets
2024-07-31 20:00:37.841647: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-07-31 20:00:37.841817: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-07-31 20:00:37.843963: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpl_lds1gm
2024-07-31 20:00:37.845394: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-07-31 20:00:37.845405: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmpl_lds1gm
2024-07-31 20:00:37.850567: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-07-31 20:00:37.914869: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

In [78]:
# pytorch

# Save entire model
torch.save(model_pt, arch_folder + model_name + ".pt")
# Save only the state_dict
torch.save(model_pt.state_dict(), arch_folder + model_name + ".pth")
with torch.no_grad():
    torch.onnx.export(model_pt, controlled_input_pt, arch_folder + model_name + ".onnx")

In [86]:
model_name

'784_6_16_120_84_10'

In [85]:
# make the 0.5 percent of the model weights zero and save
import copy
model_tf_pruned = copy.deepcopy(model_tf)

# Prune the model
pruning_percentage = 0.5
for layer in model_tf_pruned.layers:
    if isinstance(layer, tf.keras.layers.Conv2D) or isinstance(layer, tf.keras.layers.Dense):
        weights = layer.get_weights()
        weights[0] = weights[0] * (weights[0] > np.percentile(np.abs(weights[0]), pruning_percentage))
        layer.set_weights(weights)

# Save the pruned model
model_tf_pruned.save(arch_folder + model_name + '_pruned.h5')
# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model_tf_pruned)
tflite_model = converter.convert()
# with open(arch_folder + model_name + '_pruned.tflite', 'wb') as f:
# model_in_path = model_path+arch_folder+args.model + '_sparsity_'+str(args.sparsity) + '.tflite'
# with open(arch_folder + model_name + '_sparsity_' + str(pruning_percentage) + '.tflite', 'wb') as f:
#     f.write(tflite_model)
if pruning_percentage > 0:
    with open(arch_folder + '28_6_16_120_84_10_5_sparsity_0.5.tflite', 'wb') as f:
        f.write(tflite_model)
else:
    with open(arch_folder + '28_6_16_120_84_10_5.tflite', 'wb') as f:
        f.write(tflite_model)

  saving_api.save_model(


INFO:tensorflow:Assets written to: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmprnxiuodj/assets


INFO:tensorflow:Assets written to: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmprnxiuodj/assets
2024-07-31 20:15:11.917841: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-07-31 20:15:11.917855: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-07-31 20:15:11.918081: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmprnxiuodj
2024-07-31 20:15:11.919092: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-07-31 20:15:11.919100: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/mh/wt0s4pwn1w52cl7dn_5mj92m0000gp/T/tmprnxiuodj
2024-07-31 20:15:11.922356: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-07-31 20:15:11.951504: I tensorflow/cc/saved_model/loader.cc:217] Running initialization