In [None]:
!pip install git+https://github.com/huggingface/transformers.git

In [None]:
import os 
os.environ['CUDA_VISIBLE_DEVICES']="0"

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, StaticCache

model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")

inputs = tokenizer(text="My name is GPT2", return_tensors="pt")

# Prepare a cache class and pass it to model's forward
# Leave empty space for 10 new tokens, which can be used when calling forward iteratively 10 times to generate
max_generated_length = inputs.input_ids.shape[1] + 10
past_key_values = StaticCache(config=model.config, 
                              max_batch_size=1, 
                              max_cache_len=max_generated_length, 
                              device=model.device, 
                              dtype=model.dtype)
outputs = model(**inputs, past_key_values=past_key_values, use_cache=True)
past_kv_length = outputs.past_key_values # access cache filled with key/values from generation

In [75]:
import torch
x = torch.randn(3, 4) #3 items in a batch, with each item containiing 4 column, and each row containing 5 elements
print("Dimension of tensor: ",x.ndim)
print("Dimension of tensor: ",x.dim())
print("Size of the Tensor: ",x.shape)
print("Size of the Tensor: ",x.size())

print("Dtype: ", x.dtype)

Dimension of tensor:  2
Dimension of tensor:  2
Size of the Tensor:  torch.Size([3, 4])
Size of the Tensor:  torch.Size([3, 4])
Dtype:  torch.float32


In [50]:
import torch
x = torch.tensor([[1, 2, 3]])
print(x.size())
print(x.squeeze(0).shape) # # Removes a dimension of size one at a mentioned position goes from torch.Size([1, 3]) to torch.Size([3]) 
print(x.unsqueeze(-1).shape)  # Adds an extra dimension of size one at a mentioned position goes from torch.Size([1, 3]) to torch.Size([1, 3, 1])

torch.Size([1, 3])
torch.Size([3])
torch.Size([1, 3, 1])


In [63]:
import torch

# torch.full(): Creates a tensor of 
# (i)given size with a 
# (ii)certain fill value, of 
# (iii)given dtype and on
# (iv)specified device
x = torch.full(size = (2,3), fill_value = (1),  dtype=torch.float32, device="cuda")


torch.Size([2, 3])
torch.Size([2, 3, 1])


In [68]:
import torch
x = torch.tensor([4, 9])
torch.sqrt(x)

tensor([2., 3.])

In [76]:
import torch

# Basic usage
print(torch.arange(5))  # Output: tensor([0, 1, 2, 3, 4])

# Specifying start, end, and step
print(torch.arange(1, 6, 2))  # Output: tensor([1, 3, 5])

# Using floating-point step
print(torch.arange(1, 3.5, 0.5))  # Output: tensor([1.0000, 1.5000, 2.0000, 2.5000, 3.0000])


tensor([0, 1, 2, 3, 4])
tensor([1, 3, 5])
tensor([1.0000, 1.5000, 2.0000, 2.5000, 3.0000])


In [None]:
import torch

# Basic usage
t = torch.tensor([0., -1., 2., 8.])
print(torch.argmax(t))  # Output: tensor(3)

# With specified dimension
t_2d = torch.randn(3, 4)
print(torch.argmax(t_2d, dim=1))  # Returns max index for each row


In [78]:
import torch

# Create a float tensor
x = torch.tensor([1.5, 2.3, 3.7])

# Convert to long
x_long = x.long()

print(x)
print(x.dtype)      # Output: torch.float32
print(x_long.dtype) # Output: torch.int64
print(x_long)       # Output: tensor([1, 2, 3])


tensor([1.5000, 2.3000, 3.7000])
torch.float32
torch.int64
tensor([1, 2, 3])


In [91]:
import torch

a = torch.arange(6)
reshaped = torch.reshape(a, (2,3))
print(reshaped)



tensor([[0, 1, 2],
        [3, 4, 5]])


torch.float32

In [94]:
print(torch.get_default_dtype())
print(torch.dtype)


torch.float32
<class 'torch.dtype'>


In [96]:
import torch

a = torch.tensor([[1, 2], [3, 4]])
b = torch.tensor([[5, 6], [7, 8]])
result = torch.matmul(a, b)
result
# result: tensor([[19, 22],
#                 [43, 50]])


tensor([[19, 22],
        [43, 50]])

In [105]:
import torch

# Create sample tensors
t1 = torch.tensor([1, 2, 3])
t2 = torch.tensor([4, 5, 6])

# # Concatenate along dimension 0 (default)
result = torch.cat((t1, t2))
print(result)  # Output: tensor([1, 2, 3, 4, 5, 6])

# Concatenate 2D tensors along dimension 1
a = torch.arange(4).reshape(2,2)
b = torch.arange(4).reshape(2,2)
c = torch.cat((a, b), dim=1)
c

tensor([1, 2, 3, 4, 5, 6])


tensor([[0, 1, 0, 1],
        [2, 3, 2, 3]])

In [108]:
import torch

# 1D tensor example
x = torch.tensor([1, 2, 3])
print(x.repeat(4, 2))
# Output:
# tensor([[1, 2, 3, 1, 2, 3],
#         [1, 2, 3, 1, 2, 3],
#         [1, 2, 3, 1, 2, 3],
#         [1, 2, 3, 1, 2, 3]])

# 2D tensor example
y = torch.tensor([[1, 2], [3, 4]])
print(y.repeat(2, 3))
# Output:
# tensor([[1, 2, 1, 2, 1, 2],
#         [3, 4, 3, 4, 3, 4],
#         [1, 2, 1, 2, 1, 2],
#         [3, 4, 3, 4, 3, 4]])


tensor([[1, 2, 3, 1, 2, 3],
        [1, 2, 3, 1, 2, 3],
        [1, 2, 3, 1, 2, 3],
        [1, 2, 3, 1, 2, 3]])
tensor([[1, 2, 1, 2, 1, 2],
        [3, 4, 3, 4, 3, 4],
        [1, 2, 1, 2, 1, 2],
        [3, 4, 3, 4, 3, 4]])


In [109]:
import torch

# Create a sample tensor
a = torch.tensor([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])

# Get upper triangular part
upper = torch.triu(a)
print(upper)
# Output:
# tensor([[1, 2, 3],
#         [0, 5, 6],
#         [0, 0, 9]])

# With positive diagonal
upper_pos = torch.triu(a, diagonal=1)
print(upper_pos)
# Output:
# tensor([[0, 2, 3],
#         [0, 0, 6],
#         [0, 0, 0]])

# With negative diagonal
upper_neg = torch.triu(a, diagonal=-1)
print(upper_neg)
# Output:
# tensor([[1, 2, 3],
#         [4, 5, 6],
#         [0, 8, 9]])


tensor([[1, 2, 3],
        [0, 5, 6],
        [0, 0, 9]])
tensor([[0, 2, 3],
        [0, 0, 6],
        [0, 0, 0]])
tensor([[1, 2, 3],
        [4, 5, 6],
        [0, 8, 9]])


In [111]:
import torch

# Create a 3D tensor
x = torch.randn(2, 3, 5)

# Permute the dimensions
y = x.permute(2, 0, 1)

print(x.shape)  # Output: torch.Size([2, 3, 5])
print(y.shape)  # Output: torch.Size([5, 2, 3])
print(x)
print(y)

torch.Size([2, 3, 5])
torch.Size([5, 2, 3])
tensor([[[ 3.3826e-01, -1.3041e+00, -1.6007e+00, -1.6985e-02, -8.8019e-03],
         [ 6.7971e-01, -1.3054e+00,  1.1866e+00,  3.3573e-02,  1.6189e+00],
         [ 2.2187e-03, -3.9983e-01,  4.4733e-02,  2.2468e+00,  6.9697e-01]],

        [[-8.4144e-01,  1.5785e-01,  1.0823e+00, -1.7036e+00, -1.3872e-01],
         [-8.5134e-02, -6.4710e-01, -1.0148e+00,  1.2168e+00,  1.2317e+00],
         [-6.8671e-01, -1.7752e-01, -5.5077e-01, -5.9119e-01,  1.2234e+00]]])
tensor([[[ 3.3826e-01,  6.7971e-01,  2.2187e-03],
         [-8.4144e-01, -8.5134e-02, -6.8671e-01]],

        [[-1.3041e+00, -1.3054e+00, -3.9983e-01],
         [ 1.5785e-01, -6.4710e-01, -1.7752e-01]],

        [[-1.6007e+00,  1.1866e+00,  4.4733e-02],
         [ 1.0823e+00, -1.0148e+00, -5.5077e-01]],

        [[-1.6985e-02,  3.3573e-02,  2.2468e+00],
         [-1.7036e+00,  1.2168e+00, -5.9119e-01]],

        [[-8.8019e-03,  1.6189e+00,  6.9697e-01],
         [-1.3872e-01,  1.2317e+00,  1

In [112]:
import torch

# 1D tensor example
x = torch.tensor([1, 2, 3, 4])
print(torch.cumsum(x, dim=0))
# Output: tensor([1, 3, 6, 10])

# 2D tensor example
y = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(torch.cumsum(y, dim=0))
# Output: tensor([[1, 2, 3],
#                 [5, 7, 9]])

print(torch.cumsum(y, dim=1))
# Output: tensor([[ 1,  3,  6],
#                 [ 4,  9, 15]])


tensor([ 1,  3,  6, 10])
tensor([[1, 2, 3],
        [5, 7, 9]])
tensor([[ 1,  3,  6],
        [ 4,  9, 15]])


In [113]:
import torch

# Create a sample tensor
a = torch.tensor([-1.7120, 0.1734, -0.0478, -0.0922])

# Clamp values between -0.5 and 0.5
clamped = torch.clamp(a, min=-0.5, max=0.5)
print(clamped)
# Output: tensor([-0.5000,  0.1734, -0.0478, -0.0922])

# Clamp with only a lower bound
lower_bound = torch.clamp(a, min=0)
print(lower_bound)
# Output: tensor([0.0000, 0.1734, 0.0000, 0.0000])


tensor([-0.5000,  0.1734, -0.0478, -0.0922])
tensor([0.0000, 0.1734, 0.0000, 0.0000])


In [121]:
import torch

x = torch.randn(4, 4)
print(x.size())  # torch.Size([4, 4])

y = x.view(16)
print(y.size())  # torch.Size([16])

z = x.view(-1, 8)  # -1 is inferred from other dimensions
print(z.size())  # torch.Size([2, 8])


torch.Size([4, 4])
torch.Size([16])
torch.Size([2, 8])


In [None]:
# Simulate a batch of data
batch = torch.randn(32, 10)  # 32 samples, 10 features each
print("Batch shape:", batch.shape)

# Split into smaller batches
mini_batches = torch.split(batch, 8)  # Split into batches of 8
print("\nMini-batch shapes:")
for i, mini_batch in enumerate(mini_batches):
    print(f"Mini-batch {i+1}:", mini_batch.shape)


In [125]:
import torch

# Create a tensor
x = torch.tensor([1, 2, 3])

# Clone the tensor
y = x.clone()

print("Original tensor:", x)
print("Cloned tensor:", y)

# Modify the original tensor
x[0] = 5

print("Modified original:", x)
print("Cloned tensor (unchanged):", y)


Original tensor: tensor([1, 2, 3])
Cloned tensor: tensor([1, 2, 3])
Modified original: tensor([5, 2, 3])
Cloned tensor (unchanged): tensor([1, 2, 3])


In [5]:
import torch
t = torch.tensor([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

indices = torch.tensor([
    [0, 2],
    [1, 1]
])
result = torch.gather(t, 0, indices)
result

tensor([[1, 8],
        [4, 5]])

In [15]:
a = torch.tensor([1, 2, 3])  # Shape: (3,)
b = a.expand(3, -1)  # Expanding to shape (2, 3)
print(b)

x = torch.tensor([[1], [2], [3]])  # Shape: (3, 1)
y = x.expand(-1, 5)  # Expanding to shape (3, 4)
print(y)

tensor([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3]])
tensor([[1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3]])


In [None]:
past_key_values?