In [None]:
# load keras with pytorch backend
import os
os.environ["KERAS_BACKEND"] = "torch"
import torch
import keras
from keras.models import load_model
from pyprojroot import here

model_keras = load_model(here('models/sign_language_mnist/CNN.h5'))

"""
keras.Sequential(
  [
    layers.Input(shape=(28, 28, 1)),
    layers.Rescaling(1.0 / 255),
    layers.Conv2D(2**5, (3, 3), activation="relu", padding="same"),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(2**6, (3, 3), activation="relu", padding="same"),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(2**7, (3, 3), activation="relu", padding="same"),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(2**7, activation="relu"),
    layers.Dense(2**6, activation="relu"),
    layers.Dense(26, activation="softmax"),
  ]
)
"""

model_keras.summary()

"""
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ rescaling_2 (Rescaling)         │ (None, 28, 28, 1)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_6 (Conv2D)               │ (None, 28, 28, 32)     │           320 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_6 (MaxPooling2D)  │ (None, 14, 14, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_7 (Conv2D)               │ (None, 14, 14, 64)     │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_7 (MaxPooling2D)  │ (None, 7, 7, 64)       │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_8 (Conv2D)               │ (None, 7, 7, 128)      │        73,856 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_8 (MaxPooling2D)  │ (None, 3, 3, 128)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten_2 (Flatten)             │ (None, 1152)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_6 (Dense)                 │ (None, 128)            │       147,584 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_7 (Dense)                 │ (None, 64)             │         8,256 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_8 (Dense)                 │ (None, 26)             │         1,690 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
"""



In [33]:
import torchvision
import torch.nn as nn
# reconstruct the model class in torch and transfer the weights
class CNN(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv = nn.Sequential(
      nn.Conv2d(1, 32, kernel_size=3, padding=1),  # (28, 28, 1) → (28, 28, 32)
      nn.ReLU(),
      nn.MaxPool2d(2),  # → (14, 14, 32)

      nn.Conv2d(32, 64, kernel_size=3, padding=1),  # → (14, 14, 64)
      nn.ReLU(),
      nn.MaxPool2d(2),  # → (7, 7, 64)

      nn.Conv2d(64, 128, kernel_size=3, padding=1),  # → (7, 7, 128)
      nn.ReLU(),
      nn.MaxPool2d(2),  # → (3, 3, 128)
    )
    self.flatten = nn.Flatten()
    self.dense = nn.Sequential(
      nn.Linear(128 * 3 * 3, 128),
      nn.ReLU(),
      nn.Linear(128, 64),
      nn.ReLU(),
      nn.Linear(64, 26),
    )
  
  def forward(self, x):
    x = self.conv(x)
    x = self.flatten(x)
    x = self.dense(x)
    return x

# create the model and transfer the weights
model_torch = CNN()
""" model.layers
[<Rescaling name=rescaling_2, built=True>,
 <Conv2D name=conv2d_6, built=True>,
 <MaxPooling2D name=max_pooling2d_6, built=True>,
 <Conv2D name=conv2d_7, built=True>,
 <MaxPooling2D name=max_pooling2d_7, built=True>,
 <Conv2D name=conv2d_8, built=True>,
 <MaxPooling2D name=max_pooling2d_8, built=True>,
 <Flatten name=flatten_2, built=True>,
 <Dense name=dense_6, built=True>,
 <Dense name=dense_7, built=True>,
 <Dense name=dense_8, built=True>]
"""
# transfer weights from keras model to torch model (without for loop)
model_torch.conv[0].weight.data = torch.from_numpy(model_keras.layers[1].get_weights()[0].transpose(3, 2, 0, 1))
model_torch.conv[0].bias.data = torch.from_numpy(model_keras.layers[1].get_weights()[1])
model_torch.conv[3].weight.data = torch.from_numpy(model_keras.layers[3].get_weights()[0].transpose(3, 2, 0, 1))
model_torch.conv[3].bias.data = torch.from_numpy(model_keras.layers[3].get_weights()[1])
model_torch.conv[6].weight.data = torch.from_numpy(model_keras.layers[5].get_weights()[0].transpose(3, 2, 0, 1))
model_torch.conv[6].bias.data = torch.from_numpy(model_keras.layers[5].get_weights()[1])
model_torch.dense[0].weight.data = torch.from_numpy(model_keras.layers[8].get_weights()[0].transpose(1, 0))
model_torch.dense[0].bias.data = torch.from_numpy(model_keras.layers[8].get_weights()[1])
model_torch.dense[2].weight.data = torch.from_numpy(model_keras.layers[9].get_weights()[0].transpose(1, 0))
model_torch.dense[2].bias.data = torch.from_numpy(model_keras.layers[9].get_weights()[1])
model_torch.dense[4].weight.data = torch.from_numpy(model_keras.layers[10].get_weights()[0].transpose(1, 0))
model_torch.dense[4].bias.data = torch.from_numpy(model_keras.layers[10].get_weights()[1])
# check the model
print(model_torch)

CNN(
  (conv): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dense): Sequential(
    (0): Linear(in_features=1152, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=26, bias=True)
  )
)


In [34]:
import pandas as pd
import numpy as np
import kagglehub

# Load test data
os.environ["KAGGLEHUB_CACHE"] = str(here("data"))
path = kagglehub.dataset_download("datamunge/sign-language-mnist")
test_pd = pd.read_csv(os.path.join(path, "sign_mnist_test/sign_mnist_test.csv"))
test_data = test_pd.drop(columns=["label"]).to_numpy().reshape(-1, 28, 28, 1)

# Take a small sample for testing
sample_data = test_data[:5]

# Get predictions from Keras model
keras_pred = model_keras.predict(sample_data, verbose=0)

# Get predictions from PyTorch model
model_torch.eval()
with torch.no_grad():
    # Fix: Convert properly to PyTorch format
    # Remove the preprocessing from forward pass and do it manually
    torch_input = sample_data.squeeze(-1).astype(np.float32) / 255.0  # Normalize manually
    torch_input = torch.from_numpy(torch_input).unsqueeze(1)  # Add channel dimension: (N, 1, H, W)
    
    # Get raw logits (before softmax) to match Keras output
    torch_logits = model_torch.conv(torch_input)
    torch_logits = model_torch.flatten(torch_logits)
    torch_logits = model_torch.dense(torch_logits)
    torch_pred = torch.softmax(torch_logits, dim=1).numpy()

# Compare predictions
print("Max difference in predictions:", np.max(np.abs(keras_pred - torch_pred)))
print("Are predictions close?", np.allclose(keras_pred, torch_pred, atol=1e-5))
print("\nFirst sample predictions:")
print("Keras:", keras_pred[0][:5])
print("PyTorch:", torch_pred[0][:5])

Max difference in predictions: 0.9999347
Are predictions close? False

First sample predictions:
Keras: [5.4543357e-27 1.5692821e-20 1.4949109e-09 6.3104268e-15 8.5250658e-16]
PyTorch: [6.7521780e-11 9.5426440e-03 1.8989425e-07 8.0588194e-05 1.4928431e-05]
