In [1]:
import tensorflow as tf
import torch




In [2]:
# import torch.nn as nn
# from mmcv.cnn import normal_init
# from ..builder import HEADS
# from .base import BaseHead


# @HEADS.register_module()
# class I3DHead(BaseHead):
#     """Classification head for I3D.
#     Args:
#         num_classes (int): Number of classes to be classified.
#         in_channels (int): Number of channels in input feature.
#         loss_cls (dict): Config for building loss.
#             Default: dict(type='CrossEntropyLoss')
#         spatial_type (str): Pooling type in spatial dimension. Default: 'avg'.
#         dropout_ratio (float): Probability of dropout layer. Default: 0.5.
#         init_std (float): Std value for Initiation. Default: 0.01.
#         kwargs (dict, optional): Any keyword argument to be used to initialize
#             the head.
#     """

#     def __init__(self,
#                  num_classes,
#                  in_channels,
#                  loss_cls=dict(type='CrossEntropyLoss'),
#                  spatial_type='avg',
#                  dropout_ratio=0.5,
#                  init_std=0.01,
#                  **kwargs):
#         super().__init__(num_classes, in_channels, loss_cls, **kwargs)

#         self.spatial_type = spatial_type
#         self.dropout_ratio = dropout_ratio
#         self.init_std = init_std
#         if self.dropout_ratio != 0:
#             self.dropout = nn.Dropout(p=self.dropout_ratio)
#         else:
#             self.dropout = None
#         self.fc_cls = nn.Linear(self.in_channels, self.num_classes)

#         if self.spatial_type == 'avg':
#             # use `nn.AdaptiveAvgPool3d` to adaptively match the in_channels.
#             self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
#         else:
#             self.avg_pool = None

#     def init_weights(self):
#         """Initiate the parameters from scratch."""
#         normal_init(self.fc_cls, std=self.init_std)

#     def forward(self, x):
#         """Defines the computation performed at every call.
#         Args:
#             x (torch.Tensor): The input data.
#         Returns:
#             torch.Tensor: The classification scores for input samples.
#         """
#         # [N, in_channels, 4, 7, 7]
#         if self.avg_pool is not None:
#             x = self.avg_pool(x)
#         # [N, in_channels, 1, 1, 1]
#         if self.dropout is not None:
#             x = self.dropout(x)
#         # [N, in_channels, 1, 1, 1]
#         x = x.view(x.shape[0], -1)
#         # [N, in_channels]
#         cls_score = self.fc_cls(x)
#         # [N, num_classes]
#         return cls_score

In [29]:
def get_x(shape=(1,3,8,224,224)):
    x_pt = torch.rand(shape)
    x_np = x_pt.numpy()
    x_tf = tf.convert_to_tensor(x_np)

    return x_tf, x_pt


shape = (2, 768, 2, 7, 7)
x_tf, x_pt = get_x(shape)

In [71]:
import torch.nn as nn
# from mmcv.cnn import normal_init

def normal_init(module: nn.Module,
                mean: float = 0,
                std: float = 1,
                bias: float = 0) -> None:
    if hasattr(module, 'weight') and module.weight is not None:
        nn.init.normal_(module.weight, mean, std)
    if hasattr(module, 'bias') and module.bias is not None:
        nn.init.constant_(module.bias, bias)
        print(module.bias)


class I3DHead(nn.Module):
    """Classification head for I3D.
    Args:
        num_classes (int): Number of classes to be classified.
        in_channels (int): Number of channels in input feature.
        loss_cls (dict): Config for building loss.
            Default: dict(type='CrossEntropyLoss')
        spatial_type (str): Pooling type in spatial dimension. Default: 'avg'.
        dropout_ratio (float): Probability of dropout layer. Default: 0.5.
        init_std (float): Std value for Initiation. Default: 0.01.
        kwargs (dict, optional): Any keyword argument to be used to initialize
            the head.
    """

    def __init__(self,
                 num_classes,
                 in_channels,
                 loss_cls=dict(type='CrossEntropyLoss'),
                 spatial_type='avg',
                 dropout_ratio=0.5,
                 init_std=0.01,
                 **kwargs):

        print(num_classes, in_channels, loss_cls, spatial_type, dropout_ratio,init_std, kwargs)
        super().__init__( )
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.spatial_type = spatial_type
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std

        if self.dropout_ratio != 0:
            self.dropout = nn.Dropout(p=self.dropout_ratio)
        else:
            self.dropout = None
        self.fc_cls = nn.Linear(self.in_channels, self.num_classes)

        if self.spatial_type == 'avg':
            # use `nn.AdaptiveAvgPool3d` to adaptively match the in_channels.
            self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
        else:
            self.avg_pool = None

    def init_weights(self):
        """Initiate the parameters from scratch."""
        normal_init(self.fc_cls, std=self.init_std)

    def forward(self, x):
        """Defines the computation performed at every call.
        Args:
            x (torch.Tensor): The input data.
        Returns:
            torch.Tensor: The classification scores for input samples.
        """
        # [N, in_channels, 4, 7, 7]


        if self.avg_pool is not None:
            x = self.avg_pool(x)
            print("pool",x.shape)

        
        # [N, in_channels, 1, 1, 1]
        if self.dropout is not None:
            x = self.dropout(x)
        # [N, in_channels, 1, 1, 1]
        x = x.view(x.shape[0], -1)
        # [N, in_channels]
        print(x[:1, :10])

        cls_score = self.fc_cls(x)
        # [N, num_classes]
        return cls_score

In [73]:
head_pt = I3DHead(num_classes=5,in_channels=768,
        spatial_type='avg',
        dropout_ratio=0.5)
head_pt.init_weights()

out_pt = head_pt(x_pt)
out_pt.shape

5 768 {'type': 'CrossEntropyLoss'} avg 0.5 0.01 {}
Parameter containing:
tensor([0., 0., 0., 0., 0.], requires_grad=True)
pool torch.Size([2, 768, 1, 1, 1])
tensor([[0.0000, 0.0000, 0.9596, 0.0000, 0.0000, 0.9384, 0.0000, 0.0000, 0.9948,
         0.0000]])


torch.Size([2, 5])

In [74]:
head_pt.eval()



np_state_dict = head_pt.state_dict()
np_state_dict = {k: np_state_dict[k].numpy() for k in np_state_dict}

np_state_dict.keys()

dict_keys(['fc_cls.weight', 'fc_cls.bias'])

In [75]:
np_state_dict["fc_cls.bias"]

array([0., 0., 0., 0., 0.], dtype=float32)

In [76]:
from tensorflow.keras import initializers
import tensorflow_addons as tfa



class I3DHead_tf(tf.keras.Model):
    """Classification head for I3D.
    Args:
        num_classes (int): Number of classes to be classified.
        in_channels (int): Number of channels in input feature.
        loss_cls (dict): Config for building loss.
            Default: dict(type='CrossEntropyLoss')
        spatial_type (str): Pooling type in spatial dimension. Default: 'avg'.
        dropout_ratio (float): Probability of dropout layer. Default: 0.5.
        init_std (float): Std value for Initiation. Default: 0.01.
        kwargs (dict, optional): Any keyword argument to be used to initialize
            the head.
    """

    def __init__(self,
                 num_classes,
                 in_channels,
                 loss_cls=dict(type='CrossEntropyLoss'),
                 spatial_type='avg',
                 dropout_ratio=0.5,
                 init_std=0.01,
                 **kwargs):

        print(num_classes, in_channels, loss_cls, spatial_type, dropout_ratio,init_std, kwargs)
        super().__init__( )
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.spatial_type = spatial_type
        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        if self.dropout_ratio != 0:
            self.dropout = tf.keras.layers.Dropout(self.dropout_ratio)
        else:
            self.dropout = None
        self.fc_cls = tf.keras.layers.Dense( self.num_classes,  
                                            kernel_initializer=initializers.RandomNormal(stddev=self.init_std),
                                            bias_initializer=initializers.Zeros())

        if self.spatial_type == 'avg':
            # use `nn.AdaptiveAvgPool3d` to adaptively match the in_channels.
            self.avg_pool = tfa.layers.AdaptiveAveragePooling3D((1, 1, 1))
        else:
            self.avg_pool = None



    def call(self, x):
        """Defines the computation performed at every call.
        Args:
            x (torch.Tensor): The input data.
        Returns:
            torch.Tensor: The classification scores for input samples.
        """
        # [N, in_channels, 4, 7, 7]


        if self.avg_pool is not None:
            x = tf.transpose(x, perm=(0,2,3,4,1))

            x = self.avg_pool(x)
            # x = tf.transpose(x, perm=(0,4,1,2,3))

            print("pool",x.shape)

        # [N, in_channels, 1, 1, 1]


        if self.dropout is not None:
            x = self.dropout(x)
        # [N, in_channels, 1, 1, 1]

        x = tf.reshape(x, [x.shape[0],-1])
        # [N, in_channels]
        cls_score = self.fc_cls(x)
        
        # [N, num_classes]
        return cls_score

In [77]:
head_tf = I3DHead_tf(5, 768)
out_tf = head_tf(x_tf)
out_tf.shape

5 768 {'type': 'CrossEntropyLoss'} avg 0.5 0.01 {}
pool (2, 1, 1, 1, 768)
tf.Tensor(
[[0.5597479  0.450521   0.47980407 0.5017258  0.47834447 0.46921554
  0.48635027 0.51388454 0.49739128 0.49860203]], shape=(1, 10), dtype=float32)


TensorShape([2, 5])

In [78]:
def modify_dense(tf_component, np_state_dict):
    tf_component.kernel.assign(tf.Variable(np_state_dict["fc_cls.weight"].transpose(1, 0)))
    tf_component.bias.assign(tf.Variable(np_state_dict["fc_cls.bias"]))
    return tf_component


head_tf.layers[1] = modify_dense(head_tf.layers[1], np_state_dict)

In [79]:
out_tf = head_tf(x_tf)
out_pt = head_pt(x_pt)

out_tf, out_pt

pool (2, 1, 1, 1, 768)
tf.Tensor(
[[0.5597479  0.450521   0.47980407 0.5017258  0.47834447 0.46921554
  0.48635027 0.51388454 0.49739128 0.49860203]], shape=(1, 10), dtype=float32)
pool torch.Size([2, 768, 1, 1, 1])
tensor([[0.5597, 0.4505, 0.4798, 0.5017, 0.4783, 0.4692, 0.4864, 0.5139, 0.4974,
         0.4986]])


(<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
 array([[ 0.15969104, -0.10262091,  0.17698918, -0.06723979,  0.00814114],
        [ 0.14762259, -0.10894921,  0.16734135, -0.06109077, -0.00279752]],
       dtype=float32)>,
 tensor([[ 0.1597, -0.1026,  0.1770, -0.0672,  0.0081],
         [ 0.1476, -0.1089,  0.1673, -0.0611, -0.0028]],
        grad_fn=<AddmmBackward0>))