In [1]:
from torchvision import models
from torchvision.models import (
    ResNet18_Weights, 
    MobileNet_V3_Large_Weights, 
    EfficientNet_B2_Weights, 
    DenseNet121_Weights, 
    MNASNet1_3_Weights, 
    RegNet_X_1_6GF_Weights,
    RegNet_Y_3_2GF_Weights,
)
from torchinfo import summary

In [2]:
# get the model
model = models.regnet_y_3_2gf(weights=RegNet_Y_3_2GF_Weights.IMAGENET1K_V2)

batch_size = 16
channels = 3
img_width = 224
img_height = 224

# Print a summary using torchinfo (uncomment for actual output)
summary(model=model, 
	input_size=(batch_size, channels, img_width, img_height), # make sure this is "input_size", not "input_shape"
	# col_names=["input_size"], # uncomment for smaller output
	col_names=["input_size", "output_size", "num_params", "trainable"],
	col_width=20,
	row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
RegNet (RegNet)                                              [16, 3, 224, 224]    [16, 1000]           --                   True
├─SimpleStemIN (stem)                                        [16, 3, 224, 224]    [16, 32, 112, 112]   --                   True
│    └─Conv2d (0)                                            [16, 3, 224, 224]    [16, 32, 112, 112]   864                  True
│    └─BatchNorm2d (1)                                       [16, 32, 112, 112]   [16, 32, 112, 112]   64                   True
│    └─ReLU (2)                                              [16, 32, 112, 112]   [16, 32, 112, 112]   --                   --
├─Sequential (trunk_output)                                  [16, 32, 112, 112]   [16, 1512, 7, 7]     --                   True
│    └─AnyStage (block1)                                     [16, 32, 112, 112]   [16, 72, 56,

In [3]:
# get the model
model = models.regnet_x_1_6gf(weights=RegNet_X_1_6GF_Weights.IMAGENET1K_V1)

batch_size = 16
channels = 3
img_width = 224
img_height = 224

# Print a summary using torchinfo (uncomment for actual output)
summary(model=model, 
	input_size=(batch_size, channels, img_width, img_height), # make sure this is "input_size", not "input_shape"
	# col_names=["input_size"], # uncomment for smaller output
	col_names=["input_size", "output_size", "num_params", "trainable"],
	col_width=20,
	row_settings=["var_names"]
)

Layer (type (var_name))                            Input Shape          Output Shape         Param #              Trainable
RegNet (RegNet)                                    [16, 3, 224, 224]    [16, 1000]           --                   True
├─SimpleStemIN (stem)                              [16, 3, 224, 224]    [16, 32, 112, 112]   --                   True
│    └─Conv2d (0)                                  [16, 3, 224, 224]    [16, 32, 112, 112]   864                  True
│    └─BatchNorm2d (1)                             [16, 32, 112, 112]   [16, 32, 112, 112]   64                   True
│    └─ReLU (2)                                    [16, 32, 112, 112]   [16, 32, 112, 112]   --                   --
├─Sequential (trunk_output)                        [16, 32, 112, 112]   [16, 912, 7, 7]      --                   True
│    └─AnyStage (block1)                           [16, 32, 112, 112]   [16, 72, 56, 56]     --                   True
│    │    └─ResBottleneckBlock (block1-0)    

In [4]:
# get the model
model = models.mnasnet1_3(weights=MNASNet1_3_Weights.IMAGENET1K_V1)

batch_size = 16
channels = 3
img_width = 224
img_height = 224

# Print a summary using torchinfo (uncomment for actual output)
summary(model=model, 
	input_size=(batch_size, channels, img_width, img_height), # make sure this is "input_size", not "input_shape"
	# col_names=["input_size"], # uncomment for smaller output
	col_names=["input_size", "output_size", "num_params", "trainable"],
	col_width=20,
	row_settings=["var_names"]
)

Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
MNASNet (MNASNet)                             [16, 3, 224, 224]    [16, 1000]           --                   True
├─Sequential (layers)                         [16, 3, 224, 224]    [16, 1280, 7, 7]     --                   True
│    └─Conv2d (0)                             [16, 3, 224, 224]    [16, 40, 112, 112]   1,080                True
│    └─BatchNorm2d (1)                        [16, 40, 112, 112]   [16, 40, 112, 112]   80                   True
│    └─ReLU (2)                               [16, 40, 112, 112]   [16, 40, 112, 112]   --                   --
│    └─Conv2d (3)                             [16, 40, 112, 112]   [16, 40, 112, 112]   360                  True
│    └─BatchNorm2d (4)                        [16, 40, 112, 112]   [16, 40, 112, 112]   80                   True
│    └─ReLU (5)                               [16, 40, 112, 112]   [16, 40, 112, 112]

In [5]:
# get the model
model = models.densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1)

batch_size = 16
channels = 3
img_width = 224
img_height = 224

# Print a summary using torchinfo (uncomment for actual output)
summary(model=model, 
	input_size=(batch_size, channels, img_width, img_height), # make sure this is "input_size", not "input_shape"
	# col_names=["input_size"], # uncomment for smaller output
	col_names=["input_size", "output_size", "num_params", "trainable"],
	col_width=20,
	row_settings=["var_names"]
)

Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
DenseNet (DenseNet)                           [16, 3, 224, 224]    [16, 1000]           --                   True
├─Sequential (features)                       [16, 3, 224, 224]    [16, 1024, 7, 7]     --                   True
│    └─Conv2d (conv0)                         [16, 3, 224, 224]    [16, 64, 112, 112]   9,408                True
│    └─BatchNorm2d (norm0)                    [16, 64, 112, 112]   [16, 64, 112, 112]   128                  True
│    └─ReLU (relu0)                           [16, 64, 112, 112]   [16, 64, 112, 112]   --                   --
│    └─MaxPool2d (pool0)                      [16, 64, 112, 112]   [16, 64, 56, 56]     --                   --
│    └─_DenseBlock (denseblock1)              [16, 64, 56, 56]     [16, 256, 56, 56]    --                   True
│    │    └─_DenseLayer (denselayer1)         [16, 64, 56, 56]     [16, 32, 56, 56]    

In [6]:
# get the model
model = models.efficientnet_b2(weights=EfficientNet_B2_Weights.IMAGENET1K_V1)

batch_size = 16
channels = 3
img_width = 224
img_height = 224

# Print a summary using torchinfo (uncomment for actual output)
summary(model=model, 
	input_size=(batch_size, channels, img_width, img_height), # make sure this is "input_size", not "input_shape"
	# col_names=["input_size"], # uncomment for smaller output
	col_names=["input_size", "output_size", "num_params", "trainable"],
	col_width=20,
	row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [16, 3, 224, 224]    [16, 1000]           --                   True
├─Sequential (features)                                      [16, 3, 224, 224]    [16, 1408, 7, 7]     --                   True
│    └─Conv2dNormActivation (0)                              [16, 3, 224, 224]    [16, 32, 112, 112]   --                   True
│    │    └─Conv2d (0)                                       [16, 3, 224, 224]    [16, 32, 112, 112]   864                  True
│    │    └─BatchNorm2d (1)                                  [16, 32, 112, 112]   [16, 32, 112, 112]   64                   True
│    │    └─SiLU (2)                                         [16, 32, 112, 112]   [16, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [16, 32, 112, 112]   [16, 16, 112

In [7]:
# get the model
model = models.mobilenet_v3_large(weights=MobileNet_V3_Large_Weights.IMAGENET1K_V2)

batch_size = 16
channels = 3
img_width = 224
img_height = 224

# Print a summary using torchinfo (uncomment for actual output)
summary(model=model, 
	input_size=(batch_size, channels, img_width, img_height), # make sure this is "input_size", not "input_shape"
	# col_names=["input_size"], # uncomment for smaller output
	col_names=["input_size", "output_size", "num_params", "trainable"],
	col_width=20,
	row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
MobileNetV3 (MobileNetV3)                                    [16, 3, 224, 224]    [16, 1000]           --                   True
├─Sequential (features)                                      [16, 3, 224, 224]    [16, 960, 7, 7]      --                   True
│    └─Conv2dNormActivation (0)                              [16, 3, 224, 224]    [16, 16, 112, 112]   --                   True
│    │    └─Conv2d (0)                                       [16, 3, 224, 224]    [16, 16, 112, 112]   432                  True
│    │    └─BatchNorm2d (1)                                  [16, 16, 112, 112]   [16, 16, 112, 112]   32                   True
│    │    └─Hardswish (2)                                    [16, 16, 112, 112]   [16, 16, 112, 112]   --                   --
│    └─InvertedResidual (1)                                  [16, 16, 112, 112]   [16, 16, 112

In [8]:
# get the model
model = models.resnet18(weights=ResNet18_Weights.DEFAULT)

batch_size = 16
channels = 3
img_width = 224
img_height = 224

# Print a summary using torchinfo (uncomment for actual output)
summary(model=model, 
	input_size=(batch_size, channels, img_width, img_height), # make sure this is "input_size", not "input_shape"
	# col_names=["input_size"], # uncomment for smaller output
	col_names=["input_size", "output_size", "num_params", "trainable"],
	col_width=20,
	row_settings=["var_names"]
)

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
ResNet (ResNet)                          [16, 3, 224, 224]    [16, 1000]           --                   True
├─Conv2d (conv1)                         [16, 3, 224, 224]    [16, 64, 112, 112]   9,408                True
├─BatchNorm2d (bn1)                      [16, 64, 112, 112]   [16, 64, 112, 112]   128                  True
├─ReLU (relu)                            [16, 64, 112, 112]   [16, 64, 112, 112]   --                   --
├─MaxPool2d (maxpool)                    [16, 64, 112, 112]   [16, 64, 56, 56]     --                   --
├─Sequential (layer1)                    [16, 64, 56, 56]     [16, 64, 56, 56]     --                   True
│    └─BasicBlock (0)                    [16, 64, 56, 56]     [16, 64, 56, 56]     --                   True
│    │    └─Conv2d (conv1)               [16, 64, 56, 56]     [16, 64, 56, 56]     36,864               True
│    │    └─BatchN

## Trying out the LSTM Model (from PyTorch Tutorial)

Link: [Sequence Models and Long Short-Term Memory Networks](https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html)

In [9]:
import torch
import torch.nn.functional as F

class LSTMTagger(torch.nn.Module):

	def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
		super(LSTMTagger, self).__init__()
		self.hidden_dim = hidden_dim

		self.word_embeddings = torch.nn.Embedding(vocab_size, embedding_dim)

		# The LSTM takes word embeddings as inputs, and outputs hidden states
		# with dimensionality hidden_dim.
		self.lstm = torch.nn.LSTM(embedding_dim * (vocab_size - 1), hidden_dim)

		# The linear layer that maps from hidden state space to tag space
		self.hidden2tag = torch.nn.Linear(hidden_dim, tagset_size)

	def forward(self, sentence):
		embeds = self.word_embeddings(sentence)
		print(embeds.shape) # batch size of 10, vocab size of 30, embedding dim of 20
		print(embeds.view(len(sentence), -1).shape)

		lstm_out, _ = self.lstm(embeds.view(len(sentence), -1))
		print("got here!")
		print(lstm_out.shape)
		print(lstm_out.view(len(sentence), -1).shape)

		tag_space = self.hidden2tag(lstm_out)
		tag_scores = F.log_softmax(tag_space, dim=1)
		return tag_scores

input_to_model = torch.randint(low=0, high=30, size=(10,30))
# print(input_to_model)
model = LSTMTagger(
	embedding_dim=20,
	hidden_dim=100,
	vocab_size=30+1,
	tagset_size=2,
)
outputs = model(input_to_model)

torch.Size([10, 30, 20])
torch.Size([10, 600])
got here!
torch.Size([10, 100])
torch.Size([10, 100])
