In [3]:
import torch

# Creates a 3D tensor with random numbers from normal distribution (mean=0, std=1)
x = torch.randn(1, 3, 6)

# Dimensions breakdown:
# 1   -> Number of sequences/batches (singleton dimension)
# 3  -> Batch size (number of samples)
# 6 -> Hidden dimension/feature size

# Shape components:
dimensions = {
    'dim 0 (sequences)': 1,    # Often used for sequence length or layers
    'dim 1 (batch)': 3,      # Number of samples processed together
    'dim 2 (features)': 6   # Size of hidden state/embedding
}

# Common uses:
# - Neural network initialization
# - Creating dummy data for testing
# - Initializing hidden states in RNN/LSTM

# The tensor x is a 3D tensor with shape (1, 3, 6)
x

tensor([[[-1.4043, -0.4011,  1.6967,  2.4041, -0.2251,  1.0248],
         [-0.1186,  0.1278, -0.7602,  0.8845, -1.2966,  0.7585],
         [ 0.0783, -0.4681,  0.0772,  0.3696,  0.8051, -0.7423]]])

In [4]:
# Squeeze - removes dimensions of size 1
# Because the number of batches is 1, we can remove the first dimension. It will result into a 2D tensor
squeezed = x.squeeze(0) 
squeezed

tensor([[-1.4043, -0.4011,  1.6967,  2.4041, -0.2251,  1.0248],
        [-0.1186,  0.1278, -0.7602,  0.8845, -1.2966,  0.7585],
        [ 0.0783, -0.4681,  0.0772,  0.3696,  0.8051, -0.7423]])

In [5]:
# Removes all dimensions of size 1. Not just the first dimension
squeezed = x.squeeze()       
squeezed

tensor([[-1.4043, -0.4011,  1.6967,  2.4041, -0.2251,  1.0248],
        [-0.1186,  0.1278, -0.7602,  0.8845, -1.2966,  0.7585],
        [ 0.0783, -0.4681,  0.0772,  0.3696,  0.8051, -0.7423]])

In [6]:

# Unsqueeze - adds dimension of size 1
unsqueezed = x.unsqueeze(1)  # Shape: [1, 1, 3, 6]

# Result will be a 4D tensor with shape (1, 1, 3, 6)
unsqueezed

tensor([[[[-1.4043, -0.4011,  1.6967,  2.4041, -0.2251,  1.0248],
          [-0.1186,  0.1278, -0.7602,  0.8845, -1.2966,  0.7585],
          [ 0.0783, -0.4681,  0.0772,  0.3696,  0.8051, -0.7423]]]])

In [9]:
# Example 2. Adding sequence dimension

# Shape: [batch, hidden]
hidden = torch.randn(2, 4)            

# Shape: [batch, 1, hidden]
sequence = hidden.unsqueeze(1)           
sequence

tensor([[[-0.1988,  0.1294, -0.3280, -0.1420]],

        [[-1.3942, -1.5514, -0.7374, -0.6113]]])

In [6]:
import torch
# Example: 
# Initial shape: [sequence_length=2, batch_size=3, vocab_size=5]
output = torch.randn(2, 3, 5)
output






tensor([[[-0.2523, -0.7688,  0.4002, -0.4287,  0.6104],
         [ 0.7961, -0.5849,  0.4927, -0.3052, -0.8675],
         [ 0.7898, -1.1308,  0.0450, -1.0427, -1.2169]],

        [[-0.5405,  1.3746,  0.4098, -1.1473, -0.0319],
         [-1.5157,  0.0699,  0.3763, -1.1690, -0.3738],
         [-0.1964,  0.9854,  0.8022, -0.2907, -1.3828]]])

In [7]:
output_dim = output.shape[-1]  # 5
output_dim

5

In [None]:
# 1. Slice first timestep (column) for all batches
sliced = output[1:]
sliced

tensor([[[-0.5405,  1.3746,  0.4098, -1.1473, -0.0319],
         [-1.5157,  0.0699,  0.3763, -1.1690, -0.3738],
         [-0.1964,  0.9854,  0.8022, -0.2907, -1.3828]]])

In [12]:
sliced.shape

torch.Size([1, 3, 5])

In [10]:
sliced.type()

'torch.FloatTensor'

In [None]:
# 2. Reshape from 3D to 2D
# -1 means compute the size to maintain total elements
# output_dim specifies the size of the last dimension
reshaped = sliced.view(-1, output_dim)
reshaped

tensor([[-0.5405,  1.3746,  0.4098, -1.1473, -0.0319],
        [-1.5157,  0.0699,  0.3763, -1.1690, -0.3738],
        [-0.1964,  0.9854,  0.8022, -0.2907, -1.3828]])

In [13]:
reshaped.shape

torch.Size([3, 5])

Here, we can experiment pytorch `squeeze` and `unsqueeze`

In [2]:
import torch
# Examples of handling torch tensors
# Example 1: Simple sequence
ids = [1, 2, 3, 4]  # sequence of token ids
tensor = torch.LongTensor(ids)
print("Original:", tensor.shape)  # Shape: [4]
print(tensor)

tensor = tensor.unsqueeze(-1)
print("\nAfter unsqueeze:", tensor.shape)  # Shape: [4, 1]
print(tensor)

# Example 2: Sentence tokens
sentence_ids = [5, 2, 8, 1, 9]  # Example token IDs for "Hello world"
tensor = torch.LongTensor(sentence_ids)
print("\nOriginal sentence:", tensor.shape)  # Shape: [5]
print(tensor)

tensor = tensor.unsqueeze(-1)
print("\nAfter unsqueeze:", tensor.shape)  # Shape: [5, 1]
print(tensor)

# Example 3: Alternative ways to unsqueeze
tensor = torch.LongTensor([1, 2, 3])
print("\nDifferent unsqueeze positions:")
print("Original:", tensor.shape)  # [3]
print("unsqueeze(-1):", tensor.unsqueeze(-1).shape)  # [3, 1]
print("unsqueeze(0):", tensor.unsqueeze(0).shape)    # [1, 3]
print("unsqueeze(1):", tensor.unsqueeze(1).shape)    # [3, 1]

Original: torch.Size([4])
tensor([1, 2, 3, 4])

After unsqueeze: torch.Size([4, 1])
tensor([[1],
        [2],
        [3],
        [4]])

Original sentence: torch.Size([5])
tensor([5, 2, 8, 1, 9])

After unsqueeze: torch.Size([5, 1])
tensor([[5],
        [2],
        [8],
        [1],
        [9]])

Different unsqueeze positions:
Original: torch.Size([3])
unsqueeze(-1): torch.Size([3, 1])
unsqueeze(0): torch.Size([1, 3])
unsqueeze(1): torch.Size([3, 1])
