In [2]:
import torch
import torch.nn as nn

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion * planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = torch.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = torch.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=1000):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

def ResNet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])

# Create PyTorch model and load state
model = ResNet50()

n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Number of Params: {n_parameters / 1000000:.1f}M")

Number of Params: 25.6M


In [3]:
torch.save(model.state_dict(), 'resnet50.pth')

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models

def identity_block(input_tensor, kernel_size, filters, stage, block):
    filters1, filters2, filters3 = filters
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = layers.Conv2D(filters1, (1, 1), name=conv_name_base + '2a', use_bias=False)(input_tensor)
    x = layers.BatchNormalization(name=bn_name_base + '2a')(x)
    x = layers.ReLU()(x)

    x = layers.Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b', use_bias=False)(x)
    x = layers.BatchNormalization(name=bn_name_base + '2b')(x)
    x = layers.ReLU()(x)

    x = layers.Conv2D(filters3, (1, 1), name=conv_name_base + '2c', use_bias=False)(x)
    x = layers.BatchNormalization(name=bn_name_base + '2c')(x)

    x = layers.add([x, input_tensor])
    x = layers.ReLU()(x)
    return x

def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
    filters1, filters2, filters3 = filters
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = layers.Conv2D(filters1, (1, 1), strides=strides, name=conv_name_base + '2a', use_bias=False)(input_tensor)
    x = layers.BatchNormalization(name=bn_name_base + '2a')(x)
    x = layers.ReLU()(x)

    x = layers.Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b', use_bias=False)(x)
    x = layers.BatchNormalization(name=bn_name_base + '2b')(x)
    x = layers.ReLU()(x)

    x = layers.Conv2D(filters3, (1, 1), name=conv_name_base + '2c', use_bias=False)(x)
    x = layers.BatchNormalization(name=bn_name_base + '2c')(x)

    shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, name=conv_name_base + '1', use_bias=False)(input_tensor)
    shortcut = layers.BatchNormalization(name=bn_name_base + '1')(shortcut)

    x = layers.add([x, shortcut])
    x = layers.ReLU()(x)
    return x

def ResNet50_tf(input_shape=(224, 224, 3), num_classes=1000):
    img_input = layers.Input(shape=input_shape)
    x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
    x = layers.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=False)(x)
    x = layers.BatchNormalization(name='bn_conv1')(x)
    x = layers.ReLU()(x)
    x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
    x = layers.Dense(num_classes, activation='softmax', name='fc1000')(x)

    model = models.Model(img_input, x, name='resnet50')
    return model

# Create a TensorFlow ResNet-50 model
tf_model = ResNet50_tf()

n_parameters = tf_model.count_params()
print(f"Number of Params: {n_parameters / 1000000:.1f}M")

2024-06-05 14:07:38.413941: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-05 14:07:38.414001: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-05 14:07:38.415403: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Number of Params: 25.6M


In [5]:
# Print TensorFlow model layers to get their order
# for i, layer in enumerate(tf_model.layers):
#     print(i, layer.name, [w.shape for w in layer.get_weights()])

In [6]:
pytorch_model = ResNet50()

pytorch_model.load_state_dict(torch.load('/kaggle/working/resnet50.pth'))
pytorch_model.eval()

# Helper function to convert PyTorch tensors to NumPy arrays
def pt_to_np(tensor):
    return tensor.detach().cpu().numpy()

# OR Try It
# def pt_to_np(tensor):
#     if tensor is None:
#         return None
#     return tensor.detach().cpu().numpy()

In [7]:
# Map PyTorch weights to TensorFlow model
# Initial convolution and batch normalization layers
tf_model.layers[2].set_weights([pt_to_np(pytorch_model.conv1.weight.permute(2, 3, 1, 0))])
tf_model.layers[3].set_weights([pt_to_np(pytorch_model.bn1.weight), pt_to_np(pytorch_model.bn1.bias),
                                pt_to_np(pytorch_model.bn1.running_mean), pt_to_np(pytorch_model.bn1.running_var)])

# Function to map the weights of a block
def map_block(layer_indices, block, block_name):
    for i, layer_index in enumerate(layer_indices):
        layer = tf_model.layers[layer_index]
        if isinstance(layer, layers.Conv2D):
            conv_weight = block[i].weight.permute(2, 3, 1, 0)
            if block[i].bias is not None:
                conv_bias = block[i].bias
                layer.set_weights([pt_to_np(conv_weight), pt_to_np(conv_bias)])
            else:
                layer.set_weights([pt_to_np(conv_weight)])
        elif isinstance(layer, layers.BatchNormalization):
            bn_weight = block[i].weight
            bn_bias = block[i].bias
            bn_running_mean = block[i].running_mean
            bn_running_var = block[i].running_var
            layer.set_weights([pt_to_np(bn_weight), pt_to_np(bn_bias), pt_to_np(bn_running_mean), pt_to_np(bn_running_var)])

# Define the block indices
block_indices = {
    "conv2_block1": [7, 8, 10, 11, 13, 15],
    "conv2_block2": [19, 20, 22, 23, 25, 27],
    "conv2_block3": [29, 30, 32, 33, 35, 37],
    "conv3_block1": [39, 40, 42, 43, 45, 47],
    "conv3_block2": [51, 52, 54, 55, 57, 59],
    "conv3_block3": [61, 62, 64, 65, 67, 69],
    "conv3_block4": [71, 72, 74, 75, 77, 79],
    "conv4_block1": [81, 82, 84, 85, 87, 89],
    "conv4_block2": [93, 94, 96, 97, 99, 101],
    "conv4_block3": [103, 104, 106, 107, 109, 111],
    "conv4_block4": [113, 114, 116, 117, 119, 121],
    "conv4_block5": [123, 124, 126, 127, 129, 131],
    "conv4_block6": [133, 134, 136, 137, 139, 141],
    "conv5_block1": [143, 144, 146, 147, 149, 151],
    "conv5_block2": [155, 156, 158, 159, 161, 163],
    "conv5_block3": [165, 166, 168, 169, 171, 173]
}

# Mapping each block
map_block(block_indices["conv2_block1"], list(pytorch_model.layer1[0].children()), "conv2_block1")
map_block(block_indices["conv2_block2"], list(pytorch_model.layer1[1].children()), "conv2_block2")
map_block(block_indices["conv2_block3"], list(pytorch_model.layer1[2].children()), "conv2_block3")
map_block(block_indices["conv3_block1"], list(pytorch_model.layer2[0].children()), "conv3_block1")
map_block(block_indices["conv3_block2"], list(pytorch_model.layer2[1].children()), "conv3_block2")
map_block(block_indices["conv3_block3"], list(pytorch_model.layer2[2].children()), "conv3_block3")
map_block(block_indices["conv3_block4"], list(pytorch_model.layer2[3].children()), "conv3_block4")
map_block(block_indices["conv4_block1"], list(pytorch_model.layer3[0].children()), "conv4_block1")
map_block(block_indices["conv4_block2"], list(pytorch_model.layer3[1].children()), "conv4_block2")
map_block(block_indices["conv4_block3"], list(pytorch_model.layer3[2].children()), "conv4_block3")
map_block(block_indices["conv4_block4"], list(pytorch_model.layer3[3].children()), "conv4_block4")
map_block(block_indices["conv4_block5"], list(pytorch_model.layer3[4].children()), "conv4_block5")
map_block(block_indices["conv4_block6"], list(pytorch_model.layer3[5].children()), "conv4_block6")
map_block(block_indices["conv5_block1"], list(pytorch_model.layer4[0].children()), "conv5_block1")
map_block(block_indices["conv5_block2"], list(pytorch_model.layer4[1].children()), "conv5_block2")
map_block(block_indices["conv5_block3"], list(pytorch_model.layer4[2].children()), "conv5_block3")
# Fully connected layer
tf_model.layers[176].set_weights([pt_to_np(pytorch_model.fc.weight.T), pt_to_np(pytorch_model.fc.bias)])

In [8]:
# Save the TensorFlow model
tf_model.save('resnet50_tf.h5')

In [9]:
# Load the saved TensorFlow model
loaded_tf_model = tf.keras.models.load_model('resnet50_tf.h5')

In [10]:
n_parameters = loaded_tf_model.count_params()
print(f"Number of Params: {n_parameters / 1000000:.1f}M")

Number of Params: 25.6M
