## Imports

In [3]:
!export CUDA_VISIBLE_DEVICES=0,1

In [4]:
import torch

In [5]:
%run ../utils/common.py

## CNN

In [32]:
%run classification/transfusion.py
%run classification/resnet.py
%run classification/densenet.py

In [12]:
labels = [f'disease{idx}' for idx in range(14)]

In [13]:
cnn = Densenet121CNN(labels, multilabel=True)
num_trainable_parameters(cnn)

7993206

In [15]:
cnn = Resnet50CNN(labels, multilabel=True)
num_trainable_parameters(cnn)

25585718

In [52]:
cnn = TransfusionCBRCNN(labels, multilabel=True, name='tiny', n_channels=3)
num_trainable_parameters(cnn)

4315662

In [53]:
cnn

TransfusionCBRCNN(
  (conv): Sequential(
    (0): Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(5, 5), stride=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(256, 512, kernel_size=(5, 5), stride=(1, 1))
    (13): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): ReLU()
  )
  (global_pool): Sequential(
    (0

In [54]:
batch_size = 4
h = w = 512

images = torch.rand(4, 3, h, w)
images.size()

torch.Size([4, 3, 512, 512])

In [56]:
features = cnn.conv(images)
features.size()

torch.Size([4, 512, 55, 55])

## Decoder

In [21]:
%run report_generation/decoder_lstm.py

In [22]:
vocab_size = 1000
embedding_size = 200
hidden_size = 100

decoder = LSTMDecoder(vocab_size, embedding_size, hidden_size)
decoder

LSTMDecoder(
  (embeddings_table): Embedding(1000, 200, padding_idx=0)
  (lstm_cell): LSTMCell(200, 100)
  (W_vocab): Linear(in_features=100, out_features=1000, bias=True)
)

In [23]:
batch_size = 4
hidden_size = 100

initial_state = torch.rand(batch_size, hidden_size)
initial_state.size()

torch.Size([4, 100])

In [24]:
outputs = decoder(initial_state, 10)
words = outputs[0]
words.size()

torch.Size([4, 10, 1000])

## Encoder-Decoder

In [44]:
%run report_generation/cnn_to_seq.py

In [45]:
model = CNN2Seq(cnn, decoder)
# model

In [46]:
words, = model(images, 10)
words.size()

torch.Size([4, 10, 1000])