In [1]:
import torch
from transformers import AutoModel, AutoTokenizer

In [3]:
import torch
import torch.nn as nn
import requests
from PIL import Image

import warnings
# Ignore specific UserWarnings related to max_length in transformers
warnings.filterwarnings("ignore", 
    message=".*Using the model-agnostic default `max_length`.*")

class DummyModel(nn.Module):
  """
  A dummy model that consists of an embedding layer
  with two blocks of a linear layer followed by a layer
  norm layer.
  """
  def __init__(self):
    super().__init__()

    torch.manual_seed(123)

    self.token_embedding = nn.Embedding(2, 2)

    # Block 1
    self.linear_1 = nn.Linear(2, 2)
    self.layernorm_1 = nn.LayerNorm(2)

    # Block 2
    self.linear_2 = nn.Linear(2, 2)
    self.layernorm_2 = nn.LayerNorm(2)

    self.head = nn.Linear(2, 2)

  def forward(self, x):
    hidden_states = self.token_embedding(x)

    # Block 1
    hidden_states = self.linear_1(hidden_states)
    hidden_states = self.layernorm_1(hidden_states)

    # Block 2
    hidden_states = self.linear_2(hidden_states)
    hidden_states = self.layernorm_2(hidden_states)

    logits = self.head(hidden_states)
    return logits


def get_generation(model, processor, image, dtype):
  inputs = processor(image, return_tensors="pt").to(dtype)
  out = model.generate(**inputs)
  return processor.decode(out[0], skip_special_tokens=True)


def load_image(img_url):
    image = Image.open(requests.get(
        img_url, stream=True).raw).convert('RGB')

    return image



In [6]:
dummymodel = DummyModel()

In [7]:
dummymodel

DummyModel(
  (token_embedding): Embedding(2, 2)
  (linear_1): Linear(in_features=2, out_features=2, bias=True)
  (layernorm_1): LayerNorm((2,), eps=1e-05, elementwise_affine=True)
  (linear_2): Linear(in_features=2, out_features=2, bias=True)
  (layernorm_2): LayerNorm((2,), eps=1e-05, elementwise_affine=True)
  (head): Linear(in_features=2, out_features=2, bias=True)
)

In [9]:
for k, v in dummymodel.named_parameters():
    print(f"In layer {k}: Dtype: {v.dtype}")

In layer token_embedding.weight: Dtype: torch.float32
In layer linear_1.weight: Dtype: torch.float32
In layer linear_1.bias: Dtype: torch.float32
In layer layernorm_1.weight: Dtype: torch.float32
In layer layernorm_1.bias: Dtype: torch.float32
In layer linear_2.weight: Dtype: torch.float32
In layer linear_2.bias: Dtype: torch.float32
In layer layernorm_2.weight: Dtype: torch.float32
In layer layernorm_2.bias: Dtype: torch.float32
In layer head.weight: Dtype: torch.float32
In layer head.bias: Dtype: torch.float32


In [12]:
model_fp_16 = DummyModel().to(torch.float16)
for k, v in model_fp_16.named_parameters():
    print(f"In layer {k}: Dtype: {v.dtype}")

In layer token_embedding.weight: Dtype: torch.float16
In layer linear_1.weight: Dtype: torch.float16
In layer linear_1.bias: Dtype: torch.float16
In layer layernorm_1.weight: Dtype: torch.float16
In layer layernorm_1.bias: Dtype: torch.float16
In layer linear_2.weight: Dtype: torch.float16
In layer linear_2.bias: Dtype: torch.float16
In layer layernorm_2.weight: Dtype: torch.float16
In layer layernorm_2.bias: Dtype: torch.float16
In layer head.weight: Dtype: torch.float16
In layer head.bias: Dtype: torch.float16


In [13]:
dummy_input = torch.LongTensor([[1,0],[0,1]])

In [14]:
dummymodel(dummy_input)

tensor([[[-0.6872,  0.7132],
         [-0.6872,  0.7132]],

        [[-0.6872,  0.7132],
         [-0.6872,  0.7132]]], grad_fn=<ViewBackward0>)

In [15]:
model_fp_16(dummy_input)

tensor([[[-0.6870,  0.7134],
         [-0.6870,  0.7134]],

        [[-0.6870,  0.7134],
         [-0.6870,  0.7134]]], dtype=torch.float16, grad_fn=<ViewBackward0>)

In [18]:
# float 16
model_fp16 = DummyModel().half()
my_input = torch.LongTensor([[1, 0], [0, 1]])
logits_fp16 = model_fp16(dummy_input)

In [19]:
logits_fp16

tensor([[[-0.6870,  0.7134],
         [-0.6870,  0.7134]],

        [[-0.6870,  0.7134],
         [-0.6870,  0.7134]]], dtype=torch.float16, grad_fn=<ViewBackward0>)