In [1]:
import torch
import nestedtensor
from IPython.display import Markdown, display
def print_eval(s):
    colorS = "<span style='color:darkred'>$ {}</span>".format(s)
    display(Markdown('**{}**'.format(colorS))) 
    print('{}\n'.format(str(eval(s))))

Below code implements a basic Text classification (or related) model. Instead of worrying about padding or some offset keyword argument for something like EmbeddingBag the user can simply pass the NestedTensor to the model and let it deal with it. This also opens doors for creating variably sized batches with batch size simply determined by a number of desired tokens.

In [2]:
def generate_tensors(num_tensor, vocab_size):
    sentence_lengths = torch.normal(75.0, 10.0, size=(num_tensor,)).long()
    return [(torch.rand(l) * vocab_size).long() for l in sentence_lengths]

class TextModel(torch.nn.Module):
    def __init__(self, vocab_size, embed_dim, num_class):
        super().__init__()
        self.embedding = torch.nn.Embedding(vocab_size, embed_dim, sparse=True)
        self.fc = torch.nn.Linear(embed_dim, num_class)
        self.init_weights()

    def init_weights(self):
        initrange = 0.5
        self.embedding.weight.data.uniform_(-initrange, initrange)
        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

    def forward(self, text):
        emb = self.embedding(text)
        return self.fc(emb).sum(1).softmax(1)

vocab_size = 10000
model = TextModel(10000, 256, 5)
nt_text = nestedtensor.nested_tensor(generate_tensors(4, 10000), dtype=torch.int64)
print_eval("nt_text.nested_size()")
print_eval("model(nt_text).nested_size()")

**<span style='color:darkred'>$ nt_text.nested_size()</span>**

NestedSize([
	torch.Size([72]),
	torch.Size([78]),
	torch.Size([80]),
	torch.Size([79])
])



**<span style='color:darkred'>$ model(nt_text).nested_size()</span>**

NestedSize([
	torch.Size([5]),
	torch.Size([5]),
	torch.Size([5]),
	torch.Size([5])
])

