In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

from model_module.model.backbones.efficientnet import EfficientNetExtractor
from model_module.model.backbones.aggregator import Agg16Agg4

### backbone

In [2]:
layers = ['reduction_2', 'reduction_3', 'reduction_4', 'reduction_5']
extra_layers = ['reduction_6', 'GAP']
chs = [32, 56, 160, 448, 448, 448]

effnet = EfficientNetExtractor(layers=layers, 
                                extra_layers = extra_layers,
                                chs = chs,
                                reduce_dim=64,
                                image_height=900, image_width=1600)

backbone = Agg16Agg4(effnet, 64, chs)

Loaded pretrained weights for efficientnet-b4


In [3]:
inputs = torch.randn((2,3,900,1600))
agg4, agg16 = backbone(inputs)
print(agg4.shape)
print(agg16.shape)

torch.Size([2, 64, 225, 400])
torch.Size([2, 64, 56, 100])


### matching

    TODO matching 코딩 - cvt like로 할지 아님 transformerdecoderlayer 쓸지

    transformerdecoderlayer 씀

    TODO q positional embedding

    TODO k positional embedding

In [4]:
from model_module.model.segmentation.semantic_feature_extractor import SemanticFeatureExtractor
seg_extractor = SemanticFeatureExtractor()
out = seg_extractor(agg16)
out.shape

torch.Size([2, 64, 128, 128])

In [15]:
depth_start, depth_num = 1, 64
index  = torch.arange(start=0, end=depth_num, step=1).float()
index_1 = index + 1
bin_size = (61.2 - depth_start) / (depth_num * (1 + depth_num))
coords_d = depth_start + bin_size * index * index_1

print(index)
print(bin_size)
print(coords_d)

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
        14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27.,
        28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
        42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55.,
        56., 57., 58., 59., 60., 61., 62., 63.])
0.014471153846153847
tensor([ 1.0000,  1.0289,  1.0868,  1.1737,  1.2894,  1.4341,  1.6078,  1.8104,
         2.0419,  2.3024,  2.5918,  2.9102,  3.2575,  3.6337,  4.0389,  4.4731,
         4.9362,  5.4282,  5.9491,  6.4990,  7.0779,  7.6857,  8.3224,  8.9881,
         9.6827, 10.4062, 11.1587, 11.9402, 12.7506, 13.5899, 14.4582, 15.3554,
        16.2815, 17.2366, 18.2207, 19.2337, 20.2756, 21.3464, 22.4463, 23.5750,
        24.7327, 25.9193, 27.1349, 28.3794, 29.6529, 30.9553, 32.2866, 33.6469,
        35.0362, 36.4543, 37.9014, 39.3775, 40.8825, 42.4164, 43.9793, 45.5712,
        47.1919, 48.8416, 50.5203, 52.2279, 53.9644, 55.72

In [10]:
index  = torch.arange(start=0, end=depth_num, step=1).float()
bin_size = (61.2 - depth_start) / depth_num
coords_d = depth_start + bin_size * index

In [13]:
print(index)
print(bin_size)
print(coords_d)

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
        14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27.,
        28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41.,
        42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55.,
        56., 57., 58., 59., 60., 61., 62., 63.])
0.940625
tensor([ 1.0000,  1.9406,  2.8812,  3.8219,  4.7625,  5.7031,  6.6438,  7.5844,
         8.5250,  9.4656, 10.4062, 11.3469, 12.2875, 13.2281, 14.1687, 15.1094,
        16.0500, 16.9906, 17.9312, 18.8719, 19.8125, 20.7531, 21.6938, 22.6344,
        23.5750, 24.5156, 25.4563, 26.3969, 27.3375, 28.2781, 29.2188, 30.1594,
        31.1000, 32.0406, 32.9813, 33.9219, 34.8625, 35.8031, 36.7438, 37.6844,
        38.6250, 39.5656, 40.5063, 41.4469, 42.3875, 43.3281, 44.2687, 45.2094,
        46.1500, 47.0906, 48.0312, 48.9719, 49.9125, 50.8531, 51.7938, 52.7344,
        53.6750, 54.6156, 55.5563, 56.4969, 57.4375, 58.3781, 59.3188,

### segmentation head

In [5]:
from model_module.model.segmentation.semantic_head import SemanticHead
seg_head = SemanticHead()
out = seg_head(out)
out.shape

torch.Size([2, 3, 256, 256])

### FullModel

In [9]:
from model_module.model.fullmodel import FullModel

model = FullModel(backbone, seg_extractor, seg_head)
inputs = torch.randn((2,3,900,1600))


In [11]:
for k, param in dict(model.named_parameters()).items():
    print(k)

backbone.efficientnet.layers.0.0.weight
backbone.efficientnet.layers.0.1.weight
backbone.efficientnet.layers.0.1.bias
backbone.efficientnet.layers.1.0._depthwise_conv.weight
backbone.efficientnet.layers.1.0._bn1.weight
backbone.efficientnet.layers.1.0._bn1.bias
backbone.efficientnet.layers.1.0._se_reduce.weight
backbone.efficientnet.layers.1.0._se_reduce.bias
backbone.efficientnet.layers.1.0._se_expand.weight
backbone.efficientnet.layers.1.0._se_expand.bias
backbone.efficientnet.layers.1.0._project_conv.weight
backbone.efficientnet.layers.1.0._bn2.weight
backbone.efficientnet.layers.1.0._bn2.bias
backbone.efficientnet.layers.1.1._depthwise_conv.weight
backbone.efficientnet.layers.1.1._bn1.weight
backbone.efficientnet.layers.1.1._bn1.bias
backbone.efficientnet.layers.1.1._se_reduce.weight
backbone.efficientnet.layers.1.1._se_reduce.bias
backbone.efficientnet.layers.1.1._se_expand.weight
backbone.efficientnet.layers.1.1._se_expand.bias
backbone.efficientnet.layers.1.1._project_conv.weigh

In [7]:
seg_maps = model(inputs)

In [8]:
seg_maps.shape

torch.Size([2, 3, 256, 256])