In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import cv2 as cv
import torchvision.transforms as transforms
import clip
from transformers.models.clip import CLIPTokenizer

import pandas as pd
from utils import train_val_test_split

from mimic_dataset import MIMIC_DataSet
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from timm.models.vision_transformer import Block


In [2]:
class PatchEmbed(nn.Module):
    """
    2D Image to Patch Embedding
    """
    def __init__(self, img_size=224, patch_size=16, in_c=3, embed_dim=768, norm_layer=None):
        super().__init__()
        img_size = (img_size, img_size)
        patch_size = (patch_size, patch_size)
        self.img_size = img_size
        self.patch_size = patch_size
        self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
        self.num_patches = self.grid_size[0] * self.grid_size[1]
        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        B, C, H, W = x.shape
        assert H == self.img_size[0] and W == self.img_size[1], \
            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."

        # flatten: [B, C, H, W] -> [B, C, HW]
        # transpose: [B, C, HW] -> [B, HW, C]
        x = self.proj(x).flatten(2).transpose(1, 2)
        x = self.norm(x)
        return x
    
def zero_padding(text_tensor, tar_dim, device=None):
    padding_size = tar_dim - text_tensor.shape[1]
    zero_tensor = torch.zeros((text_tensor.shape[0], padding_size), device=device)
    padded_tensor = torch.cat([text_tensor, zero_tensor], dim=1)
    return padded_tensor

In [3]:
class Data2Seq(nn.Module):

    def __init__(self,modality,dim):
        super().__init__()
        self.modality = modality
        self.embed_dim = dim
        if self.modality == 'image' or self.modality == 'infrared' or self.modality == 'x-ray':
            self.embed = PatchEmbed(embed_dim=self.embed_dim)
        elif self.modality == 'text':
            self.embed = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", truncation=True)

    def forward(self,data):
        if self.modality in ['image', 'text' ]:
            embeddings = self.embed(data)
        elif self.modality =='text':
            embeddings = self.embed(data)
            embeddings = zero_padding(text_tensor=embeddings, tar_dim = self.embed_dim)

        return embeddings

# Data

In [4]:
data_path = '/home/fe/baur/datasets/mimic-cxr-jpg-2.0.0-small/'

label_file = pd.read_csv(data_path + "mimic-cxr-2.0.0-chexpert.csv")

train, test_val = train_val_test_split(label_file, 0.2)
val, test = train_val_test_split(test_val, 0.5)

mean = 0.4992
std = 0.2600

train_transform = A.Compose([A.Resize(256, 256, always_apply=True),
                                 A.CenterCrop(224, 224, always_apply=True),
                                 A.Normalize(mean=mean, std=std),
                                 ToTensorV2()])

mimic_train = MIMIC_DataSet(data_path, train, train_transform, 'multilabel', 'PA', tokenize=False)
mimic_val = MIMIC_DataSet(data_path, train, train_transform, 'multilabel', 'PA', tokenize=False)


In [5]:
train_loader = DataLoader(dataset = mimic_train
                              , batch_size = 8
                              , shuffle=False
                              , num_workers=8)

In [6]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7f2283dc9010>

# Model

In [7]:

ckpt = torch.load('/home/fe/baur/Downloads/Meta-Transformer_base_patch16_encoder (1).pth')
encoder = nn.Sequential(*[
            Block(
                dim=768,
                num_heads=12,
                mlp_ratio=4.,
                qkv_bias=True,
                norm_layer=nn.LayerNorm,
                act_layer=nn.GELU
            )
            for i in range(12)])
encoder.load_state_dict(ckpt,strict=True)

<All keys matched successfully>

In [8]:
encoder

Sequential(
  (0): Block(
    (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (q_norm): Identity()
      (k_norm): Identity()
      (attn_drop): Dropout(p=0.0, inplace=False)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
    )
    (ls1): Identity()
    (drop_path1): Identity()
    (norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (act): GELU(approximate='none')
      (drop1): Dropout(p=0.0, inplace=False)
      (norm): Identity()
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (drop2): Dropout(p=0.0, inplace=False)
    )
    (ls2): Identity()
    (drop_path2): Identity()
  )
  (1): Block(
    (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (attn): Attention(
      

# Image Embedding

In [19]:
auto_tokenizer_img = Data2Seq(modality='image',dim=768)

auto_tokenizer_img

Data2Seq(
  (embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
)

In [20]:
for x, y in train_loader:
    print(auto_tokenizer_img(x[0]).shape)

    break

torch.Size([8, 196, 768])


In [11]:
model, preprocess = clip.load('ViT-B/32', 'cuda')

In [12]:
model

CLIP(
  (visual): VisionTransformer(
    (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
    (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (transformer): Transformer(
      (resblocks): Sequential(
        (0): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): Sequential(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (gelu): QuickGELU()
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
        (1): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          

In [13]:
model.encode_image(x[0].to('cuda')).shape

torch.Size([8, 512])

# Text Embedding

In [14]:
x[1]

('                                 FINAL REPORT\n EXAMINATION:  CHEST (PA AND LAT)\n \n INDICATION:  ___F with new onset ascites  // eval for infection\n \n TECHNIQUE:  Chest PA and lateral\n \n COMPARISON:  None.\n \n FINDINGS: \n \n There is no focal consolidation, pleural effusion or pneumothorax.  Bilateral\n nodular opacities that most likely represent nipple shadows. The\n cardiomediastinal silhouette is normal.  Clips project over the left lung,\n potentially within the breast. The imaged upper abdomen is unremarkable.\n Chronic deformity of the posterior left sixth and seventh ribs are noted.\n \n IMPRESSION: \n \n No acute cardiopulmonary process.\n',
 '                                 FINAL REPORT\n EXAMINATION:  CHEST (PA AND LAT)\n \n INDICATION:  History: ___F with shortness of breath\n \n TECHNIQUE:  Chest PA and lateral\n \n COMPARISON:  ___\n \n FINDINGS: \n \n The cardiac, mediastinal and hilar contours are normal. Pulmonary vasculature\n is normal.  Lungs are clear. N

In [15]:
auto_tokenizer_text = Data2Seq(modality='text',dim=768)

auto_tokenizer_text

Data2Seq()

In [16]:
torch.tensor(auto_tokenizer_text(x[1][0])['input_ids']).shape

Token indices sequence length is longer than the specified maximum sequence length for this model (127 > 77). Running this sequence through the model will result in indexing errors


torch.Size([127])

In [17]:
zero_padding(torch.tensor(auto_tokenizer_text.embed(x[1])['input_ids'][0]).unsqueeze(dim=0), 768)

tensor([[49406.,  1755.,  2417., 20970.,   281., 10563.,   263.,  2217.,   537.,
         28437.,   264., 38539.,   281., 13530.,   325.,   593.,   686., 30527.,
         22720.,  2454.,  3502.,  9703.,   556., 14774.,  1782.,   697.,  2319.,
           281., 10563.,  2217.,   537., 26646., 14186.,   281.,  8906.,   269.,
         16529.,   281.,   997.,   533.,   871., 30934., 41111.,   267.,   926.,
         33948.,  1490.,  9364.,   541., 28714.,   617.,  4130.,  9203.,   269.,
         25599.,   578.,   691.,  1652.,   676.,   546.,  1480.,   682.,  1096.,
          5256.,  8406., 45987., 12971.,   269.,   518.,  6211.,  3693.,   570.,
          1761., 19178., 26149.,   533.,  5967.,   269., 16594.,  1965.,   962.,
           518.,  1823., 16271.,   267., 16508.,  4154.,   518.,  9475.,   269.,
           518.,  2316.,   538.,  7067.,   596.,  2164.,   576.,   533.,   569.,
         12404.,   269., 13677.,   561., 42290.,   539.,   518., 22881.,  2498.,
          1823., 12909.,   5

In [18]:
zero_padding(torch.tensor(auto_tokenizer_text.embed(x[1])['input_ids'][0]).unsqueeze(dim=0), 768).shape

torch.Size([1, 768])

In [19]:
def get_text_embeddings(text, tar_dim=768):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, preprocess = clip.load('ViT-B/32', device)
    text_tensor = clip.tokenize(text, truncate=True)
    encoding = model.encode_text(text_tensor.to('cuda'))
    encoding = zero_padding(encoding, tar_dim, device)
    encoding = encoding.unsqueeze(dim=1)
    return encoding.to('cuda')

In [20]:
get_text_embeddings(x[1])

tensor([[[ 0.0035, -0.0186,  0.0605,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.1467,  0.0156,  0.0248,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.1428,  0.1348, -0.0100,  ...,  0.0000,  0.0000,  0.0000]],

        ...,

        [[ 0.0020,  0.0623,  0.0526,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0994, -0.1002,  0.1600,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0751,  0.1327, -0.2786,  ...,  0.0000,  0.0000,  0.0000]]],
       device='cuda:0', grad_fn=<UnsqueezeBackward0>)

In [21]:
get_text_embeddings(x[1]).shape

torch.Size([8, 1, 768])

# Forward Pass

In [22]:
encoder.to('cuda')

Sequential(
  (0): Block(
    (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (q_norm): Identity()
      (k_norm): Identity()
      (attn_drop): Dropout(p=0.0, inplace=False)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
    )
    (ls1): Identity()
    (drop_path1): Identity()
    (norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (act): GELU(approximate='none')
      (drop1): Dropout(p=0.0, inplace=False)
      (norm): Identity()
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (drop2): Dropout(p=0.0, inplace=False)
    )
    (ls2): Identity()
    (drop_path2): Identity()
  )
  (1): Block(
    (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (attn): Attention(
      

In [23]:
# text
encoder(get_text_embeddings(x[1])).shape

torch.Size([8, 1, 768])

In [24]:
# image

encoder(auto_tokenizer_img(x[0].to('cuda'))).shape

RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

In [25]:
encoder.to('cuda')

Sequential(
  (0): Block(
    (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (q_norm): Identity()
      (k_norm): Identity()
      (attn_drop): Dropout(p=0.0, inplace=False)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
    )
    (ls1): Identity()
    (drop_path1): Identity()
    (norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (act): GELU(approximate='none')
      (drop1): Dropout(p=0.0, inplace=False)
      (norm): Identity()
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (drop2): Dropout(p=0.0, inplace=False)
    )
    (ls2): Identity()
    (drop_path2): Identity()
  )
  (1): Block(
    (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (attn): Attention(
      

In [26]:
classification_head = nn.Linear(768, 14)
classification_head.to('cuda')
classification_head

Linear(in_features=768, out_features=14, bias=True)

In [27]:
classification_head(encoder(get_text_embeddings(x[1]))).squeeze().shape

torch.Size([8, 14])

In [28]:
classification_head(encoder(auto_tokenizer_img(x[0].to('cuda')))).shape



RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

# Meta Transformer Class

In [26]:
class Meta_Transformer(nn.Module):
    def __init__(self, num_classes, checkpoint_path):
        super().__init__()
        self.image_embedding = PatchEmbed().to('cuda')
        
        self.meta_encoder = load_meta_transformer(checkpoint_path).to('cuda')
        for param in self.meta_encoder.parameters():
                param.requires_grad = False
        
        self.global_avg_pooling = nn.AdaptiveAvgPool1d(1).to('cuda')
        self.classification_head = nn.Linear(768, num_classes).to('cuda')
        self.text_encoder, _ = clip.load('ViT-B/32')

    def forward(self, x):
        if isinstance(x, list):
            image, text = x[0], x[1]
        else:
            image = x
            text = None

        # Process the image
        image_embedding = self.image_embedding(image.to('cuda'))
        image_encoding = self.meta_encoder(image_embedding)
        image_encoding = self.global_avg_pooling(image_encoding.permute(0, 2, 1)).squeeze(dim=2)
        y_hat_img = self.classification_head(image_encoding)

        if text is not None:
            # Process the text
            text_embedding = get_text_embeddings(text=text, tar_dim=768, model=self.text_encoder)
            text_encoding = self.meta_encoder(text_embedding)
            y_hat_text = self.classification_head(text_encoding)
            return y_hat_img, y_hat_text.squeeze()

        return y_hat_img
    
class PatchEmbed(nn.Module):
    """
    2D Image to Patch Embedding
    """
    def __init__(self, img_size=224, patch_size=16, in_c=3, embed_dim=768, norm_layer=None):
        super().__init__()
        img_size = (img_size, img_size)
        patch_size = (patch_size, patch_size)
        self.img_size = img_size
        self.patch_size = patch_size
        self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
        self.num_patches = self.grid_size[0] * self.grid_size[1]
        self.proj = nn.Conv2d(in_c, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()

    def forward(self, x):
        B, C, H, W = x.shape
        assert H == self.img_size[0] and W == self.img_size[1], \
            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."

        # flatten: [B, C, H, W] -> [B, C, HW]
        # transpose: [B, C, HW] -> [B, HW, C]
        x = self.proj(x).flatten(2).transpose(1, 2)
        x = self.norm(x)
        return x    
    
def load_meta_transformer(checkpoint_path):
    ckpt = torch.load(checkpoint_path)
    encoder = nn.Sequential(*[
                Block(
                    dim=768,
                    num_heads=12,
                    mlp_ratio=4.,
                    qkv_bias=True,
                    norm_layer=nn.LayerNorm,
                    act_layer=nn.GELU
                )
                for i in range(12)])
    encoder.load_state_dict(ckpt,strict=True)
    print('Meta Transformer initilaized with pretrained weights.')
    return encoder

def get_text_embeddings(text, tar_dim=768, model=None):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    text_tensor = clip.tokenize(text, truncate=True)
    encoding = model.encode_text(text_tensor.to('cuda'))
    encoding = zero_padding(encoding, tar_dim, device)
    encoding = encoding.unsqueeze(dim=1)
    return encoding.to('cuda').detach()

def zero_padding(text_tensor, tar_dim, device=None):
    padding_size = tar_dim - text_tensor.shape[1]
    zero_tensor = torch.zeros((text_tensor.shape[0], padding_size), device=device)
    padded_tensor = torch.cat([text_tensor, zero_tensor], dim=1)
    return padded_tensor

In [27]:
for x, y in train_loader:
    print(x)
    print(y)
    break

[tensor([[[[-1.8597, -1.8295, -1.8144,  ..., -1.8597, -1.8597, -1.8597],
          [-1.7692, -1.7541, -1.7390,  ..., -1.7390, -1.7541, -1.7541],
          [-1.6485, -1.6636, -1.6938,  ..., -1.6938, -1.7239, -1.7088],
          ...,
          [-0.0045,  0.2670,  0.2821,  ..., -1.7692, -1.7541, -1.7541],
          [ 0.0257,  0.3274,  0.3274,  ..., -1.7692, -1.7541, -1.7541],
          [ 0.0257,  0.3575,  0.3877,  ..., -1.7692, -1.7692, -1.7541]],

         [[-1.8597, -1.8295, -1.8144,  ..., -1.8597, -1.8597, -1.8597],
          [-1.7692, -1.7541, -1.7390,  ..., -1.7390, -1.7541, -1.7541],
          [-1.6485, -1.6636, -1.6938,  ..., -1.6938, -1.7239, -1.7088],
          ...,
          [-0.0045,  0.2670,  0.2821,  ..., -1.7692, -1.7541, -1.7541],
          [ 0.0257,  0.3274,  0.3274,  ..., -1.7692, -1.7541, -1.7541],
          [ 0.0257,  0.3575,  0.3877,  ..., -1.7692, -1.7692, -1.7541]],

         [[-1.8597, -1.8295, -1.8144,  ..., -1.8597, -1.8597, -1.8597],
          [-1.7692, -1.7541, 

In [28]:
meta_transformer = Meta_Transformer(14, '/home/fe/baur/Downloads/Meta-Transformer_base_patch16_encoder (1).pth')

Meta Transformer initilaized with pretrained weights.


In [29]:
x

[tensor([[[[-1.8597, -1.8295, -1.8144,  ..., -1.8597, -1.8597, -1.8597],
           [-1.7692, -1.7541, -1.7390,  ..., -1.7390, -1.7541, -1.7541],
           [-1.6485, -1.6636, -1.6938,  ..., -1.6938, -1.7239, -1.7088],
           ...,
           [-0.0045,  0.2670,  0.2821,  ..., -1.7692, -1.7541, -1.7541],
           [ 0.0257,  0.3274,  0.3274,  ..., -1.7692, -1.7541, -1.7541],
           [ 0.0257,  0.3575,  0.3877,  ..., -1.7692, -1.7692, -1.7541]],
 
          [[-1.8597, -1.8295, -1.8144,  ..., -1.8597, -1.8597, -1.8597],
           [-1.7692, -1.7541, -1.7390,  ..., -1.7390, -1.7541, -1.7541],
           [-1.6485, -1.6636, -1.6938,  ..., -1.6938, -1.7239, -1.7088],
           ...,
           [-0.0045,  0.2670,  0.2821,  ..., -1.7692, -1.7541, -1.7541],
           [ 0.0257,  0.3274,  0.3274,  ..., -1.7692, -1.7541, -1.7541],
           [ 0.0257,  0.3575,  0.3877,  ..., -1.7692, -1.7692, -1.7541]],
 
          [[-1.8597, -1.8295, -1.8144,  ..., -1.8597, -1.8597, -1.8597],
           [-

In [30]:
a, b = meta_transformer(x)

In [31]:
a.shape

torch.Size([8, 14])

In [32]:
meta_transformer(x[0]).shape

torch.Size([8, 14])

In [33]:
b.shape

torch.Size([8, 14])

In [14]:
meta_transformer

Meta_Transformer(
  (image_embedding): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (meta_encoder): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (drop2):

In [15]:
a

tensor([[-1.2733, -0.1022,  1.7166,  1.5149,  0.9412,  0.1355,  1.5457, -1.8708,
         -0.4450, -0.0345,  0.0456, -0.8384,  0.1511, -2.0948],
        [-1.3504, -0.0524,  1.6573,  1.3680,  1.0730,  0.0771,  1.4828, -1.6735,
         -0.3572, -0.1226,  0.3301, -0.9064,  0.0337, -2.1048],
        [-0.9777, -0.2089,  1.5738,  1.4654,  1.0402, -0.3487,  1.6059, -1.5584,
         -0.2000,  0.1424,  0.0825, -0.5257, -0.1704, -2.0210],
        [-1.0008, -0.2901,  1.6252,  1.5551,  1.0797, -0.3242,  1.5591, -1.5426,
         -0.2182,  0.1646,  0.1738, -0.5412, -0.2003, -1.9446],
        [-1.0343, -0.2409,  1.7178,  1.6498,  1.0568, -0.3487,  1.6970, -1.6052,
         -0.2835,  0.1888,  0.1938, -0.5744, -0.1763, -1.9482],
        [-0.8761, -0.3927,  1.4990,  1.4338,  1.0742, -0.4382,  1.4491, -1.4982,
         -0.1571,  0.1150,  0.1875, -0.4960, -0.1933, -1.9997],
        [-1.2394, -0.3136,  1.6743,  1.5161,  1.0368, -0.0418,  1.5208, -1.8409,
         -0.2033,  0.2230,  0.3110, -0.6427,  0.1

In [16]:
b

tensor([[-1.7188, -0.4577,  1.6785,  0.8650,  0.9708,  0.8193,  1.2848, -2.9963,
         -0.7747, -0.9468,  0.2731, -1.8168, -0.3471, -2.3613],
        [-1.7156, -0.4407,  1.7851,  0.8381,  1.0102,  0.7418,  1.2789, -2.9622,
         -0.8063, -0.9077,  0.2275, -1.8123, -0.3934, -2.3239],
        [-1.8598, -0.6031,  1.7094,  0.6077,  1.2172,  0.7321,  1.1970, -3.0493,
         -0.6644, -0.6273,  0.3871, -1.5912, -0.3445, -2.4219],
        [-1.7303, -0.5009,  1.8373,  0.6384,  1.0309,  0.7276,  1.2096, -3.0642,
         -0.7292, -0.7264,  0.3220, -1.7644, -0.2355, -2.4804],
        [-1.7863, -0.4844,  1.7237,  0.7541,  1.0498,  0.7168,  1.1746, -2.9206,
         -0.6966, -0.7722,  0.3209, -1.7049, -0.4518, -2.4405],
        [-1.8125, -0.5053,  1.7951,  0.7229,  0.9733,  0.9157,  1.2940, -3.0284,
         -0.7660, -0.8660,  0.1919, -1.7711, -0.3461, -2.3082],
        [-1.8208, -0.4244,  1.7446,  0.7321,  1.0113,  0.7784,  1.0949, -3.0116,
         -0.8293, -0.8676,  0.1778, -1.7494, -0.4

In [17]:
import torch.nn as nn


In [20]:
sigmoid = nn.Sigmoid()

In [21]:
sigmoid(a)

tensor([[0.2187, 0.4745, 0.8477, 0.8198, 0.7193, 0.5338, 0.8243, 0.1335, 0.3905,
         0.4914, 0.5114, 0.3019, 0.5377, 0.1096],
        [0.2058, 0.4869, 0.8399, 0.7971, 0.7452, 0.5193, 0.8150, 0.1580, 0.4116,
         0.4694, 0.5818, 0.2877, 0.5084, 0.1086],
        [0.2733, 0.4480, 0.8283, 0.8124, 0.7389, 0.4137, 0.8328, 0.1739, 0.4502,
         0.5355, 0.5206, 0.3715, 0.4575, 0.1170],
        [0.2688, 0.4280, 0.8355, 0.8256, 0.7464, 0.4197, 0.8262, 0.1762, 0.4457,
         0.5411, 0.5433, 0.3679, 0.4501, 0.1251],
        [0.2622, 0.4401, 0.8478, 0.8389, 0.7421, 0.4137, 0.8451, 0.1673, 0.4296,
         0.5470, 0.5483, 0.3602, 0.4560, 0.1248],
        [0.2940, 0.4031, 0.8174, 0.8075, 0.7454, 0.3922, 0.8099, 0.1827, 0.4608,
         0.5287, 0.5467, 0.3785, 0.4518, 0.1192],
        [0.2245, 0.4222, 0.8421, 0.8200, 0.7382, 0.4896, 0.8207, 0.1369, 0.4493,
         0.5555, 0.5771, 0.3446, 0.5332, 0.1251],
        [0.3016, 0.3989, 0.8139, 0.8157, 0.7528, 0.3732, 0.8177, 0.1908, 0.4623,
  

In [22]:
sigmoid(b)

tensor([[0.1520, 0.3875, 0.8427, 0.7037, 0.7253, 0.6941, 0.7833, 0.0476, 0.3155,
         0.2795, 0.5679, 0.1398, 0.4141, 0.0862],
        [0.1524, 0.3916, 0.8563, 0.6981, 0.7331, 0.6774, 0.7823, 0.0492, 0.3087,
         0.2875, 0.5566, 0.1404, 0.4029, 0.0892],
        [0.1347, 0.3536, 0.8468, 0.6474, 0.7716, 0.6753, 0.7680, 0.0452, 0.3398,
         0.3481, 0.5956, 0.1692, 0.4147, 0.0815],
        [0.1505, 0.3773, 0.8626, 0.6544, 0.7371, 0.6743, 0.7702, 0.0446, 0.3254,
         0.3260, 0.5798, 0.1462, 0.4414, 0.0772],
        [0.1435, 0.3812, 0.8486, 0.6801, 0.7407, 0.6719, 0.7640, 0.0511, 0.3326,
         0.3160, 0.5795, 0.1538, 0.3889, 0.0801],
        [0.1403, 0.3763, 0.8576, 0.6732, 0.7258, 0.7142, 0.7848, 0.0462, 0.3173,
         0.2961, 0.5478, 0.1454, 0.4143, 0.0904],
        [0.1393, 0.3955, 0.8513, 0.6753, 0.7333, 0.6853, 0.7493, 0.0469, 0.3038,
         0.2957, 0.5443, 0.1481, 0.3992, 0.0841],
        [0.1429, 0.3899, 0.8669, 0.7066, 0.7171, 0.7104, 0.7879, 0.0485, 0.2957,
  

In [24]:
(sigmoid(a) + sigmoid(b))/2

tensor([[0.1854, 0.4310, 0.8452, 0.7617, 0.7223, 0.6140, 0.8038, 0.0905, 0.3530,
         0.3855, 0.5396, 0.2208, 0.4759, 0.0979],
        [0.1791, 0.4392, 0.8481, 0.7476, 0.7391, 0.5983, 0.7986, 0.1036, 0.3602,
         0.3784, 0.5692, 0.2140, 0.4557, 0.0989],
        [0.2040, 0.4008, 0.8375, 0.7299, 0.7552, 0.5445, 0.8004, 0.1096, 0.3950,
         0.4418, 0.5581, 0.2704, 0.4361, 0.0993],
        [0.2097, 0.4027, 0.8491, 0.7400, 0.7418, 0.5470, 0.7982, 0.1104, 0.3855,
         0.4335, 0.5616, 0.2571, 0.4457, 0.1012],
        [0.2029, 0.4106, 0.8482, 0.7595, 0.7414, 0.5428, 0.8046, 0.1092, 0.3811,
         0.4315, 0.5639, 0.2570, 0.4225, 0.1024],
        [0.2172, 0.3897, 0.8375, 0.7404, 0.7356, 0.5532, 0.7973, 0.1144, 0.3891,
         0.4124, 0.5473, 0.2619, 0.4331, 0.1048],
        [0.1819, 0.4089, 0.8467, 0.7476, 0.7358, 0.5874, 0.7850, 0.0919, 0.3766,
         0.4256, 0.5607, 0.2464, 0.4662, 0.1046],
        [0.2222, 0.3944, 0.8404, 0.7611, 0.7350, 0.5418, 0.8028, 0.1197, 0.3790,
  

In [26]:
import timm

# Get a list of all available models
model_names = timm.list_models(pretrained=True)

# Filter the model names to only include DenseNet models
densenet_models = [name for name in model_names if 'densenet' in name.lower()]

# Print the list of DenseNet models
for model in densenet_models:
    print(model)

densenet121.ra_in1k
densenet121.tv_in1k
densenet161.tv_in1k
densenet169.tv_in1k
densenet201.tv_in1k
densenetblur121d.ra_in1k
