# Compare sinusiodal embeddings results between tensorflow and torch

In [97]:
import tensorflow as tf
from tensorflow.keras import layers, models,activations

import torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch
from torchvision import datasets, transforms

import math

In [10]:
tf.math.log(tf.constant(1000.0))

<tf.Tensor: shape=(), dtype=float32, numpy=6.9077554>

In [11]:
IMAGE_SIZE = 64
BATCH_SIZE = 64
DATASET_REPETITIONS = 5
LOAD_MODEL = False

NOISE_EMBEDDING_SIZE = 32
PLOT_DIFFUSION_STEPS = 20

# optimization
EMA = 0.999
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-4
EPOCHS = 50


In [12]:
def sinusoidal_embedding(x):
    frequencies = tf.exp(
        tf.linspace(
            tf.math.log(1.0),
            tf.math.log(1000.0),
            NOISE_EMBEDDING_SIZE // 2,
        )
    )
    angular_speeds = 2.0 * math.pi * frequencies
    embeddings = tf.concat(
        [tf.sin(angular_speeds * x), tf.cos(angular_speeds * x)], axis=3
    )
    return embeddings


In [13]:
# let's define simple model that will accept an input with shape (1,1,1) and apply sinusoidal embedding function on it
noise_variances = layers.Input(shape=(1,1,1))
noise_embedding = layers.Lambda(sinusoidal_embedding)(noise_variances)
model = models.Model(noise_variances,noise_embedding, name="sin_emb")



In [14]:
x = tf.constant(0.34,shape=(1,1,1))
y = model(x)

In [15]:
print(f"tf emb shape : {y.shape}")
print(f"tf sin emb output :\n{y}")

tf emb shape : (1, 1, 1, 32)
tf sin emb output :
[[[[ 8.4432781e-01 -2.4176864e-01 -7.9383200e-01  7.9565132e-01
     7.9113644e-01  5.8778441e-01  6.4400035e-01 -2.5121161e-01
    -2.2208980e-01  2.9371348e-01  2.5971909e-05 -6.5481865e-01
     5.6658745e-01  7.8468496e-01 -1.5987124e-01  1.9868393e-04
    -5.3582692e-01 -9.7033393e-01  6.0813713e-01 -6.0575485e-01
     6.1163974e-01 -8.0901760e-01 -7.6502520e-01 -9.6793216e-01
    -9.7502619e-01 -9.5589352e-01  1.0000000e+00  7.5578606e-01
    -8.2400167e-01 -6.1989480e-01 -9.8713785e-01  1.0000000e+00]]]]


In [16]:
def sinusoidal_embedding_torch(x:torch.tensor):
    frequencies = torch.exp(torch.linspace(torch.log(torch.tensor(1.0)),torch.log(torch.tensor(1000.0)),NOISE_EMBEDDING_SIZE//2))
    angular_speeds = 2 * torch.pi* frequencies * x
    return torch.cat((torch.sin(angular_speeds), torch.cos(angular_speeds)), dim=1)

In [17]:
x_torch = torch.tensor([[0.34]])
y_torch = sinusoidal_embedding_torch(x_torch)
print(y_torch.shape)

torch.Size([1, 32])


In [18]:
y_torch

tensor([[ 8.4433e-01, -2.4177e-01, -7.9383e-01,  7.9565e-01,  7.9114e-01,
          5.8778e-01,  6.4400e-01, -2.5121e-01, -2.2209e-01,  2.9371e-01,
          2.5972e-05, -6.5482e-01,  5.6659e-01,  7.8442e-01, -1.5987e-01,
          1.9868e-04, -5.3583e-01, -9.7033e-01,  6.0814e-01, -6.0575e-01,
          6.1164e-01, -8.0902e-01, -7.6503e-01, -9.6793e-01, -9.7503e-01,
         -9.5589e-01,  1.0000e+00,  7.5579e-01, -8.2400e-01, -6.2023e-01,
         -9.8714e-01,  1.0000e+00]])

In [21]:
class SinusoidalEmbedding(nn.Module):
    def __init__(self, device, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.device = device
        frequencies = torch.linspace(torch.log(torch.tensor(1.0)),torch.log(torch.tensor(1000.0)),16)
        frequencies = frequencies.unsqueeze(0).unsqueeze(2).unsqueeze(3)
        frequencies = frequencies.to(device)
        self.frequencies = frequencies
    
    def forward(self,x):
        angular_speeds = 2 * torch.pi * torch.exp(self.frequencies) * x
        # return single scalar as 32 dimensional vector
        return torch.cat((torch.sin(angular_speeds),torch.cos(angular_speeds)),dim=1)


In [None]:
#device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device = torch.device('cpu')
sinemb_model = SinusoidalEmbedding(device=device)
out = sinemb_model(x_torch)
print(out.shape)
print(out)

In [30]:
x_torch = torch.tensor([[0.34]])
print(x_torch.requires_grad)

False


# Compare Conv2d between tf and torch

In [38]:
width = 5
theinput = layers.Input(shape=(5,5,3))
residual = layers.Conv2D(width, kernel_size=1)(theinput)
model = models.Model(theinput, residual,name="conv2d")


Generate tensor with normal distribution and save it to a file and load it from a file

In [31]:
import numpy as np

np.random.seed(42)
tf.random.set_seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x194011ef070>

In [32]:

tftensor = tf.random.normal(shape=(5,5,3))
nptensor = tftensor.numpy()


In [33]:
import pathlib
tempfolder = pathlib.Path.cwd()/"temp_files"
filename="nptensor00.npy"
np.save(str(tempfolder/filename),nptensor)

In [34]:
loadednptensor=np.load(str(tempfolder/filename))

In [35]:
# checking that loaded tensor and original tensor are equal
print((nptensor==loadednptensor).sum()==5*5*3)

True


In [36]:
loadedtftensor=tf.convert_to_tensor(loadednptensor)
print(f"loaded tf tensor shape : {loadedtftensor.shape}")

loaded tf tensor shape : (5, 5, 3)


In [39]:
# now pass it through a model
out = model(tf.expand_dims(loadedtftensor,axis=0))
print(f"out shape : {out.shape}")

out shape : (1, 5, 5, 5)


Now let's try the same with torch

In [40]:
import torch.nn as nn

class SimpleModel(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels,kernel_size=1)
    
    def forward(self,x):
        return self.conv(x)


In [41]:
torch_tensor = torch.from_numpy(loadednptensor)
torch_tensor = torch_tensor.permute(2,0,1).unsqueeze(0)
print(f"torch tensor shape : {torch_tensor.shape}")

torch tensor shape : torch.Size([1, 3, 5, 5])


In [42]:
torch_model = SimpleModel(3,width)
torch_out = torch_model(torch_tensor)

In [43]:
torch_out.shape

torch.Size([1, 5, 5, 5])

In [44]:
out[0,0,0,:]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([-0.74220693, -0.7008943 ,  0.59093624, -0.2729187 , -0.04410662],
      dtype=float32)>

In [45]:
torch_out[0,:,0,0]

tensor([-0.3840,  0.7626, -0.1298, -0.7966,  0.3099],
       grad_fn=<SelectBackward0>)

In [46]:
print("second layer weights list length : ",len(model.layers[1].get_weights()))

# convolution filters have the shape FHxFWxFCxFN, where FH is filter height, FW is filter width, FC is filter channels which usually matches input tensors channels
# FN is the number of filters which will translate to output channels
print("first param shape : ",model.layers[1].get_weights()[0].shape)
print("second param shape : ",model.layers[1].get_weights()[1].shape)

second layer weights list length :  2
first param shape :  (1, 1, 3, 5)
second param shape :  (5,)


In [50]:
tf_conv_filters=model.layers[1].get_weights()[0]
tf_conv_filters_bias = model.layers[1].get_weights()[1]
print(type(tf_conv_filters))
print(tf_conv_filters[:,:,:,0])

<class 'numpy.ndarray'>
[[[ 0.40754956  0.83004147 -0.5517646 ]]]


In [52]:
# I want to manually set the parameters of nn.Conv2D
tc_filters = torch.from_numpy(tf_conv_filters)
tc_filters = tc_filters.permute(3,2,0,1)
tc_filt_biases = torch.from_numpy(tf_conv_filters_bias)

with torch.no_grad():
    torch_model.conv.weight.copy_(tc_filters)
    torch_model.conv.bias.copy_(tc_filt_biases)

In [47]:
torch_params=[param for param in torch_model.parameters()]
print(len(torch_params))

2


In [48]:
print("first param shape : ",torch_params[0].shape)
print("second param shape : ",torch_params[1].shape)

first param shape :  torch.Size([5, 3, 1, 1])
second param shape :  torch.Size([5])


In [53]:
# now I want to pass x to conv2D with new weights and compare the results with that of tensorflow conv2d
tc_out = torch_model(torch_tensor)
print(f"tc_out shape : {tc_out.shape}, tf out shape : {out.shape}")

tc_out shape : torch.Size([1, 5, 5, 5]), tf out shape : (1, 5, 5, 5)


In [54]:
print("torch out first channel output", tc_out[:,0,:,:])
print(f"tf first channel output : {out[:,:,:,0]}")

torch out first channel output tensor([[[-0.7422, -1.9824, -0.7168, -0.9868, -1.4923],
         [ 1.6791,  0.7415, -1.3628, -0.6151,  0.5614],
         [ 0.2119,  0.5652,  0.9435, -1.8063,  0.5516],
         [ 0.2992, -0.1099, -0.5495, -0.5857,  0.9035],
         [ 0.6210,  0.6236, -1.5218,  1.4500,  1.4976]]],
       grad_fn=<SliceBackward0>)
tf first channel output : [[[-0.74220693 -1.9824156  -0.7168442  -0.98682624 -1.4923443 ]
  [ 1.6790966   0.74148107 -1.3627936  -0.61512125  0.5614329 ]
  [ 0.21191429  0.5651724   0.94349176 -1.806262    0.5516483 ]
  [ 0.29922476 -0.10988607 -0.54947203 -0.58572084  0.90354264]
  [ 0.6210349   0.62357324 -1.5217959   1.4499993   1.49762   ]]]


# Let's compare BatchNormalization between tensorflow and torch

In [55]:
# let us generate random N,W,H,C tensor, pass it through convolution and save the result
tf_x=tf.random.normal(shape=(10,5,5,3))
tf_out = model(tf_x)
print(tf_out.shape)

(10, 5, 5, 5)


In [56]:
filename="nptensor01.npy"
np.save(str(tempfolder/filename), tf_out.numpy())

In [57]:
tf_bn_input = layers.Input(shape=(5,5,5))
tf_batch_norm = layers.BatchNormalization(center=False, scale=False)(tf_bn_input)
bn_model = models.Model(tf_bn_input, tf_batch_norm, name="batch_norm")

In [91]:
tf_bn_out=bn_model(tf_out,training=True)
print("tf batch normalization output shape : ",tf_bn_out.shape)

tf batch normalization output shape :  (10, 5, 5, 5)


In [59]:
print(tf_out[0,0,0,:])
print(tf_bn_out[0,0,0,:])

tf.Tensor([-0.888895   -0.7787231   0.5411658  -0.06579003 -0.0377686 ], shape=(5,), dtype=float32)
tf.Tensor([-0.8884508  -0.77833396  0.5408954  -0.06575715 -0.03774973], shape=(5,), dtype=float32)


In [60]:
tf_means = tf.reduce_mean(tf_out,axis=0)
tf_vars = tf.math.reduce_variance(tf_out,axis=0)
print("tf mean shape : ",tf_means.shape)
print("tf variances shape : ", tf_vars.shape)

tf mean shape :  (5, 5, 5)
tf variances shape :  (5, 5, 5)


In [84]:
print("mean of first feature across batches : ",tf_means[0,0,0])
print("variance of first feature across batches : ",tf_vars[0,0,0])

mean of first feature across batches :  tf.Tensor(-0.43894997, shape=(), dtype=float32)
variance of first feature across batches :  tf.Tensor(1.7402786, shape=(), dtype=float32)


In [83]:
# with this I am confirming the behavior of tf.reduce_mean and tf.math.reduce_variance
print("mean of first feature calculated with numpy across batches : ",tf_out.numpy()[:,0,0,0].mean())
print("variance of first feature calculated with numpy across batches : ",tf_out.numpy()[:,0,0,0].std()**2)

mean of first feature calculated with numpy across batches :  -0.43895
variance of first feature calculated with numpy across batches :  1.740278363456028


In [85]:
(tf_out[:,0,0,0]-tf_means[0,0,0])/tf.math.sqrt(tf_vars[0,0,0])

<tf.Tensor: shape=(10,), dtype=float32, numpy=
array([-0.34107512,  0.34862122,  0.36523625, -0.4958102 ,  1.689387  ,
        1.3150597 ,  0.57471246, -1.3216718 , -0.5958357 , -1.5386238 ],
      dtype=float32)>

In [92]:
# I couldn't verify how tensorflow BatchNormalization calculates mean and variances
# Accoring to gpt it computes mean and variance across channels, so let's do that

# below will calculate means for each channel. given tensor (10,5,5,5) for each channel total of 10*5*5=250 datapoints and we take mean across them
mean=tf.reduce_mean(tf_out,axis=[0,1,2])
var=tf.math.reduce_variance(tf_out,axis=[0,1,2])

print(f"manual mean : {mean}, manual variance : {var}")
print(f"tf bn mean : {bn_model.layers[1].moving_mean}, tf bn variance : {bn_model.layers[1].moving_variance}")

manual mean : [-0.07195345 -0.05061551  0.11001837 -0.02935027  0.06918108], manual variance : [1.1207141  0.73636943 0.88828963 0.8977073  0.66772294]
tf bn mean : <tf.Variable 'batch_normalization/moving_mean:0' shape=(5,) dtype=float32, numpy=
array([-0.00071953, -0.00050616,  0.00110018, -0.0002935 ,  0.00069181],
      dtype=float32)>, tf bn variance : <tf.Variable 'batch_normalization/moving_variance:0' shape=(5,) dtype=float32, numpy=
array([1.0012522 , 0.99739325, 0.9989186 , 0.9990131 , 0.99670404],
      dtype=float32)>


Now with torch

In [76]:
x_torch = torch.tensor(tf_out.numpy()).permute(0,3,1,2)
torch_batch_norm = nn.BatchNorm2d(num_features=5, affine=False)
torch_batch_norm.train()
torch_bn_out = torch_batch_norm(x_torch)
print(torch_bn_out.shape)

torch.Size([10, 5, 5, 5])


In [77]:
print(bn_model.layers[1].moving_mean)
print(bn_model.layers[1].moving_variance)

<tf.Variable 'batch_normalization/moving_mean:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>
<tf.Variable 'batch_normalization/moving_variance:0' shape=(5,) dtype=float32, numpy=array([1., 1., 1., 1., 1.], dtype=float32)>


In [93]:
print("BatchNorm2d output first sample across channels",torch_bn_out[0,:,0,2])
print("BatchNorm2d output first sample across last dim",torch_bn_out[0,0,2,:])
print(f"tf bn output : {tf_bn_out[0,0,2,:]}")
print("x_torch first sample across channels : ",x_torch[0,:,0,2])
print("tf_out across channels : ",tf_out[0,0,2,:])

BatchNorm2d output first sample across channels tensor([0.1604, 0.2819, 0.0793, 0.0826, 0.4031])
BatchNorm2d output first sample across last dim tensor([-1.7095, -0.7310,  0.2003, -0.8787,  1.3037])
tf bn output : [0.16030471 0.28168422 0.07928884 0.08257569 0.40281639]
x_torch first sample across channels :  tensor([0.0978, 0.1913, 0.1848, 0.0489, 0.3986])
tf_out across channels :  tf.Tensor([0.09782689 0.19126739 0.18478946 0.04893163 0.39858615], shape=(5,), dtype=float32)


# Average Pooling behaviour

In [None]:
x = torch.randn(10,32,64,64)
out=nn.AvgPool2d(kernel_size=2)(x)
print("nn.AvgPool2d output when applied to x with shape (10,32,64,64): ",out.shape)

# Let's compare output of Residual Blocks

In [124]:
def copy_tf_conv_weights_to_torch_conv(tf_conv,torch_conv):
    tf_conv_filters=tf_conv.get_weights()[0]
    tf_conv_filters_bias = tf_conv.get_weights()[1]

    # I want to manually set the parameters of nn.Conv2D
    tc_filters = torch.from_numpy(tf_conv_filters)
    tc_filters = tc_filters.permute(3,2,0,1)
    tc_filt_biases = torch.from_numpy(tf_conv_filters_bias)

    with torch.no_grad():
        torch_conv.weight.copy_(tc_filters)
        torch_conv.bias.copy_(tc_filt_biases)
        

In [125]:
width=10
tf_conv_one = layers.Conv2D(width, kernel_size=1)
tf_conv_two = layers.Conv2D(
            width, kernel_size=3, padding="same", activation=activations.swish
        )
tf_conv_three = layers.Conv2D(width, kernel_size=3, padding="same")
tf_bn = layers.BatchNormalization(center=False, scale=False)

def TfResidualBlock():
    def apply(x):
        input_width = x.shape[3]
        if input_width == width:
            residual = x
        else:
            residual = tf_conv_one(x)
        x = tf_bn(x, training=True)
        x = tf_conv_two(x)
        x = tf_conv_three(x)
        x = layers.Add()([x, residual])
        return x

    return apply

In [126]:
tf_x = tf_out
tf_res_out = TfResidualBlock()(tf_x)
print(f"tf res output : {tf_res_out.shape}")

tf res output : (10, 5, 5, 10)


In [127]:
# let's define torch ResidualBlock
in_channels = 5
out_channels = 10

class Swish(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)

tc_conv_shortcut=nn.Conv2d(in_channels, out_channels, kernel_size=1)
copy_tf_conv_weights_to_torch_conv(tf_conv_one,tc_conv_shortcut)

tc_conv1 = nn.Conv2d(in_channels=in_channels,out_channels=out_channels, kernel_size=3, padding=1, stride=1)
copy_tf_conv_weights_to_torch_conv(tf_conv_two,tc_conv1)

tc_conv2 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1, stride=1)
copy_tf_conv_weights_to_torch_conv(tf_conv_three, tc_conv2)

class TcResidualBlock(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)        
        if in_channels!=out_channels:
            self.shortcut = tc_conv_shortcut
        else:
            self.shortcut = nn.Identity()        
        self.batch_norm = nn.BatchNorm2d(num_features=in_channels,affine=False)
        self.conv1 = tc_conv1
        self.swish = Swish()
        self.conv2 = tc_conv2

    
    def forward(self,x):        
        residual = self.shortcut(x)
        x = self.batch_norm(x)
        x = self.conv1(x)
        x = self.swish(x)
        x = self.conv2(x)
        return x + residual


In [128]:
torch_x = torch.tensor(tf_x.numpy())
torch_x = torch_x.permute(0,3,1,2)

tc_res_block=TcResidualBlock()
tc_res_block.train()
tc_out = tc_res_block(torch_x)
print(tc_out.shape)

torch.Size([10, 10, 5, 5])


In [129]:
print(f"tf out first channel : {tf_res_out[0,0,0,:]}")
print(f"tc out first channel : {tc_out[0,:,0,0]}")


tf out first channel : [-0.56412524 -0.37567738 -1.1697795   1.0911057  -0.01003164  0.401801
  0.58847225  0.50728196 -0.19187619 -0.267038  ]
tc out first channel : tensor([-0.5645, -0.3761, -1.1700,  1.0914, -0.0104,  0.4017,  0.5884,  0.5072,
        -0.1920, -0.2670], grad_fn=<SelectBackward0>)
