# Scratchpad

In [1]:
from tqdm import tqdm

In [2]:
stack = []

In [3]:
# popping from empty stack
stack.pop()

IndexError: pop from empty list

In [1]:
import torch

In [3]:
x = torch.zeros(3,3)
x[torch.randn(3,3) > 0.5] = 1
x

tensor([[0., 0., 1.],
        [1., 1., 0.],
        [1., 0., 0.]])

In [8]:
print(x.count_nonzero().dtype)
print(x.count_nonzero(dim=0))
print(x.count_nonzero(dim=1))
print(x.count_nonzero(dim=-1))

torch.int64
tensor([2, 1, 1])
tensor([1, 2, 1])
tensor([1, 2, 1])


In [13]:
y = torch.Tensor([0, 1, 2, 3, 4, 5]).int()

In [15]:
# so Tensor with shape (1,) can be used as an index
x[y[0]]

tensor([0., 0., 1.])

## testing walrus operator


In [19]:
s = "abc"
a = 2
b = 1
if (a < b and (l := len(s)) > 2):
    print("longer than 2")
    print(l)

In [20]:
# testing zip
l1 = [1, 2, 3]
l2 = ['a', 'b']
for i, j in zip(l1, l2):
    print(i, j)

1 a
2 b


In [21]:
t1 = torch.tensor([[1, 2, 3], [4, 5, 6]])
t1.sum(dim=-1)

tensor([ 6, 15])

In [24]:
t2 = torch.tensor([1, 2, 3])
t3 = torch.tensor([4, 5, 6])
t4 = torch.stack([t2, t3], dim=-1)
t4

tensor([[1, 4],
        [2, 5],
        [3, 6]])

## Sequence Probability Utilities Testing

In [26]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [27]:
small_model_hf_id = "microsoft/phi-1_5"
model = AutoModelForCausalLM.from_pretrained(small_model_hf_id)
tokenizer = AutoTokenizer.from_pretrained(small_model_hf_id)

In [45]:
tokenizer.pad_token_id  = tokenizer.eos_token_id

In [97]:
text_batch = [
    "Hello, my dog is cute\n",
    "Hi there!\n",
]
batch_enc = tokenizer(text_batch[0], return_tensors="pt", padding=True, add_special_tokens=True)
input_ids = batch_enc.input_ids

In [98]:
input_ids

tensor([[15496,    11,   616,  3290,   318, 13779,   198]])

In [99]:
for i in range(len(text_batch)):
    for tid in input_ids[i]:
        print(repr(tokenizer.decode(tid.item())) )

'Hello'
','
' my'
' dog'
' is'
' cute'
'\n'


IndexError: index 1 is out of bounds for dimension 0 with size 1

In [100]:
outputs = model(input_ids)

In [101]:
probs = torch.log_softmax(outputs.logits, dim=-1)

In [102]:
probs = probs[:, :-1, :]

In [103]:
probs.shape

torch.Size([1, 6, 51200])

In [70]:
input_ids = input_ids[:, 1:]

In [71]:
input_ids.shape

torch.Size([2, 6])

In [96]:
probs.shape

torch.Size([2, 6, 51200])

In [72]:
gen_probs = torch.gather(probs, -1, input_ids.unsqueeze(-1)).squeeze(-1)

In [73]:
gen_probs

tensor([[-0.4337, -2.0271, -8.8554, -2.5866, -5.5343, -7.0558],
        [-1.5266, -0.5090, -4.1932, -2.6174, -7.8948, -8.8712]],
       grad_fn=<SqueezeBackward1>)

In [82]:
termination_token_id = tokenizer.encode("\n")[-1]

In [83]:
# next step is to figure out how to mask out the padding tokens
mask = (input_ids == termination_token_id).byte()
mask

tensor([[0, 0, 0, 0, 0, 1],
        [0, 0, 1, 0, 0, 0]], dtype=torch.uint8)

In [84]:
mask.argmax(dim=-1)

tensor([5, 2])

In [91]:
gen_probs.cumsum(dim=-1)

tensor([[ -0.4337,  -2.4608, -11.3162, -13.9027, -19.4370, -26.4928],
        [ -1.5266,  -2.0355,  -6.2287,  -8.8461, -16.7409, -25.6120]],
       grad_fn=<CumsumBackward0>)

In [94]:
seq_probs = torch.gather(gen_probs.cumsum(dim=-1), 1, mask.argmax(dim=-1).unsqueeze(-1))
seq_probs.squeeze(-1)

tensor([-26.4928,  -6.2287], grad_fn=<SqueezeBackward1>)

In [92]:
better_mask = (input_ids == termination_token_id).cumsum(dim=-1) >= 1

In [93]:
better_mask

tensor([[False, False, False, False, False,  True],
        [False, False,  True,  True,  True,  True]])

## joint sorting

In [2]:
lst1 = [1, 9, 8, 5]
lst2 = ['a', 'b', 'c', 'd']
paired = list(zip(lst1, lst2))
paired.sort(key=lambda x: x[0])
lst3, lst4 = zip(*paired)

In [3]:
lst3, lst4

((1, 5, 8, 9), ('a', 'd', 'c', 'b'))

## lightning data module

In [None]:
from pytorch_lightning import LightningDataModule
class DummyDataModule(LightningDataModule):
    def __init__(self, train_size=0.95, limit_theorems=100):
        super().__init__()
    
    def dummy_method(self):
        print(self.h_params.train_size)