# Setup

In [9]:
import json
import os
import torch
import torch
from torch import nn

class Item:
    def __init__(self, key, value, type:str):
        self.key = key
        self.type = type
        if type.startswith("Tensor"):
            self.value = {"size":value.size(),"values":value.flatten().tolist()}
        else:
            self.value = value

    def to_dict(self):
        return {self.key: {self.type: self.value}}


def to_file(name:str,items:list):
    path = "test-files/"+name+".json"
    values = {}
    for item in items:
        values.update(item.to_dict())
    output = {"values": values}
    
    data = json.dumps(output, indent=4)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w") as f:
        f.write(data)

def random_tensor(shape:list,seed:int=0):
    n = 1 
    for dim in shape:
        n*=dim

    a = 3
    c = 23
    m = 2**32
    
    result = []
    x = seed
    for _ in range(n):
        x = (a * x + c) % m
        result.append(x / m)  # Normalize the result to [0, 1]

    return torch.tensor(result).view(shape)

def mock_linear(linear:nn.Linear )->nn.Linear:
    size = linear.weight.size()
    linear.weight.data = random_tensor(size,1)
    linear.bias.data = random_tensor([size[0]],2)

def mock_layer_norm(layer_norm: nn.LayerNorm)->nn.LayerNorm:
    size = layer_norm.weight.size()
    layer_norm.weight.data = random_tensor(size,1)
    layer_norm.bias.data = random_tensor(size,2)

def mock_conv2d(conv2d:nn.Conv2d)->nn.Conv2d:
    size = conv2d.weight.size()
    conv2d.weight.data = random_tensor(size,1)
    conv2d.bias.data = random_tensor([size[0]],2)

## Common
#### LayerNorm2d

In [10]:
from segment_anything.modeling.common import LayerNorm2d

layer_norm = LayerNorm2d(256,0.1)
items = [Item("weight", layer_norm.weight, "TensorFloat"), Item("bias", layer_norm.bias, "TensorFloat"), Item("eps", layer_norm.eps, "Float")]
to_file("layer_norm_2d",items)

# Forward
input = random_tensor([2,256,16,16])
output = layer_norm(input)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("layer_norm_2d_forward",items)
del layer_norm, input, output,items

#### MLPBlock

In [11]:
from segment_anything.modeling.common import MLPBlock

mlp_block = MLPBlock(256,256,nn.GELU)
items=[Item("lin1_size", mlp_block.lin1.weight.size(), "List"), Item("lin2_size", mlp_block.lin2.weight.size(), "List")]
to_file("mlp_block",items)

#Mocking 
mock_linear(mlp_block.lin1)
mock_linear(mlp_block.lin2)

# Forward
input = random_tensor([256,256],5)
output = mlp_block(input)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("mlp_block_forward",items)
del mlp_block, input, output,items

#### Activation

In [12]:
# Gelu
gelu = nn.GELU()
input = random_tensor([256,256])
output = gelu(input)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("activation_gelu",items)

# ReLU
relu = nn.ReLU()
input = random_tensor([256,256])
output = relu(input)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("activation_relu",items)
del input,output

# Image encoder

#### PatchEmbeded

In [13]:
from segment_anything.modeling.image_encoder import PatchEmbed

patch_embed = PatchEmbed((16,16),(16,16),(0,0),3,320)
items=[Item("proj_size", patch_embed.proj.weight.size(), "List")]
to_file("patch_embed",items)

# Mocking 
mock_conv2d(patch_embed.proj)

# Forward
input = random_tensor([1,3,512,512],3)
output = patch_embed(input)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("patch_embed_forward",items)
del patch_embed, input, output,items

#### Block

In [14]:
from segment_anything.modeling.image_encoder import  window_partition,window_unpartition

# Window partition
input = random_tensor([2,256,16,16],1)
output,size = window_partition(input,16)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat"), Item("size", size, "Size")]
to_file("window_partition",items)

# Window unpartition
input = random_tensor([2,256,16,16],2)
output = window_unpartition(input,16,(16,16),(14,14))
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("window_unpartition",items)
del input, output, items

In [15]:
from segment_anything.modeling.image_encoder import Block

#Block
block = Block(320,16,4.0,True,nn.LayerNorm,nn.GELU,True,True,14,(64,64))
items=[Item("window_size", block.window_size, "Int")]
to_file("block",items)

#Mocking 
mock_layer_norm(block.norm1)
mock_layer_norm(block.norm2)
mock_linear(block.attn.qkv)
mock_linear(block.attn.proj)
mock_linear(block.mlp.lin1 )
mock_linear(block.mlp.lin2)

#Forward
input = random_tensor([1,64,64,320],1)
output = block(input)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("block_forward",items)
del block, input, output

#### Attention

In [16]:
from segment_anything.modeling.image_encoder import get_rel_pos,add_decomposed_rel_pos

# Get rel pos
q_size = 32
k_size = 32
input = random_tensor([127,40],1)
output = get_rel_pos( q_size, k_size, input)
items = [Item("input",input,"TensorFloat"),Item("output", output, "TensorFloat")]
to_file("get_rel_pos",items)
del input, output


# Add decomposed rel pos
attn = random_tensor([200,49,49],2)
q = random_tensor([200,49,20],3)
relo_pos_h = random_tensor([20,20],4)
relo_pos_w = random_tensor([20,20],5)
q_size = (7,7)
k_size = (7,7)
output = add_decomposed_rel_pos(attn,q,relo_pos_h,relo_pos_w,q_size,k_size)
items = [Item("attn", attn, "TensorFloat"), Item("q", q, "TensorFloat"), Item("q_size", q_size, "Size"), Item("k_size", k_size, "Size"), Item("output", output, "TensorFloat")]
to_file("add_decomposed_rel_pos",items)
del attn,q,relo_pos_h,relo_pos_w,q_size,k_size,output,items

In [17]:
from segment_anything.modeling.image_encoder import Attention

# Attention
attention = Attention(320, 16 ,True ,True ,True, (14, 14))
items =[Item("num_heads", attention.num_heads, "Int"), Item("scale", attention.scale, "Float"),  Item("use_rel_pos", attention.use_rel_pos, "Bool")]
to_file("attention",items)

#Mocking 
mock_linear(attention.qkv)
mock_linear(attention.proj)

# Forward
input = random_tensor([25,14,14,320],1)
output = attention(input)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("attention_forward",items)
del input
del output
del attention

#### Image encoderViT

In [18]:
from segment_anything.modeling.image_encoder import ImageEncoderViT

image_encoder = ImageEncoderViT(128,4,3,320,32,16,4.0,256,True,nn.LayerNorm,nn.GELU,True,True,True,14,[7,15,23,31])
items =[Item("img_size", image_encoder.img_size,"Int")]
to_file("image_encoder",items)

input = random_tensor([1,3,128,128],1)
output = image_encoder(input)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("image_encoder_forward",items)

del image_encoder
del input
del output

## Mask decoder
#### MLP block


In [19]:
from segment_anything.modeling.mask_decoder import MLP

mlp = MLP(256,256,256,4,False)
items = [Item("num_layers", mlp.num_layers, "Int"), Item("sigmoid_output", mlp.sigmoid_output, "Bool"), Item("layers_len",len(mlp.layers),"Int")]
for i in range(len(mlp.layers)):
    items.append(Item("layer"+str(i), mlp.layers[i].weight.size(), "List"))
to_file("mlp",items)

# Mocking
for i in range(len(mlp.layers)):
    mock_linear(mlp.layers[i])

input = random_tensor([1,256],1)
output = mlp(input)
items = [Item("input", input, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("mlp_forward",items)

#### Mask decoder

In [21]:
from segment_anything.modeling.mask_decoder import MaskDecoder

# mask_decoder = MaskDecoder()
#Todo


## Transformer
#### Attention

In [26]:
from segment_anything.modeling.transformer import Attention

attention = Attention(256,8,1)
items = [Item("embedding_dim", attention.embedding_dim, "Int"),
          Item("internal_dim", attention.internal_dim, "Int"),
          Item("num_heads", attention.num_heads, "Int"),
          Item("q_proj_size", attention.q_proj.weight.size(), "List"),
          Item("k_proj_size", attention.k_proj.weight.size(), "List"),
          Item("v_proj_size", attention.v_proj.weight.size(), "List"),
          Item("out_proj_size", attention.out_proj.weight.size(), "List"),]
to_file("transformer_attention",items)

#Mocking
mock_linear(attention.q_proj)
mock_linear(attention.k_proj)
mock_linear(attention.v_proj)
mock_linear(attention.out_proj)

#Forward
q = random_tensor([1,256,256],1)
k = random_tensor([1,256,256],2)
v = random_tensor([1,256,256],3)
output = attention.forward(q,k,v)
items = [Item("q", q, "TensorFloat"), Item("k", k, "TensorFloat"), Item("v", v, "TensorFloat"), Item("output", output, "TensorFloat")]
to_file("transformer_attention_forward",items)


256 8 1


#### TwoWayAttention

#### TwoWayTransformer

In [24]:
from segment_anything.modeling.transformer import TwoWayTransformer

transformer = TwoWayTransformer(2, 256, 8, 2048, nn.ReLU, 2)

# input = random_tensor([1,2,256],1)
# output = transformer(input)

256 8 1
256 8 2
256 8 2
256 8 1
256 8 2
256 8 2
256 8 2


TypeError: forward() missing 2 required positional arguments: 'image_pe' and 'point_embedding'