In [1]:
import torch

In [2]:
# Everything in pytorch is based on Tensor operations.
# A tensor can have different dimensions
# so it can be 1d, 2d, or even 3d and higher

# scalar, vector, matrix, tensor

# torch.empty(size): uninitiallized
x = torch.empty(1) # scalar
print(x)

tensor([0.])


In [3]:
x = torch.empty(3) # vector, 1D
print(x)

tensor([0., 0., 0.])


In [4]:
x = torch.empty(2,3) # matrix, 2D
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [5]:
x = torch.empty(2,2,3) # tensor, 3 dimensions
print(x)

tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])


In [6]:
x = torch.empty(2,2,2,3) # tensor, 4 dimensions
# dimension will be like 1) batch size 2) dimension of each batch 3) n rows 4) n cols
print(x)

tensor([[[[0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.]]],


        [[[0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.]]]])


In [7]:
torch.manual_seed(1)
# torch.rand(size): random numbers [0, 1]
x = torch.rand(5, 3)
print(x)

tensor([[0.7576, 0.2793, 0.4031],
        [0.7347, 0.0293, 0.7999],
        [0.3971, 0.7544, 0.5695],
        [0.4388, 0.6387, 0.5247],
        [0.6826, 0.3051, 0.4635]])


In [8]:
# torch.zeros(size), fill with 0
# torch.ones(size), fill with 1
x = torch.zeros(5, 3)
print(x)

# check size
print(x.size())
print(x.shape)
print(x.dtype)
print(type(x))

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
torch.Size([5, 3])
torch.Size([5, 3])
torch.float32
<class 'torch.Tensor'>


In [9]:
# construct from data
x = torch.tensor([5.5, 3])
print(x.size())

x = torch.tensor([5.5, 3, 1, 3])
print(x.reshape(2,2))

torch.Size([2])
tensor([[5.5000, 3.0000],
        [1.0000, 3.0000]])


In [10]:
# Operations
y = torch.rand(2, 2)
x = torch.rand(2, 2)
# elementwise addition
z = x + y
print(z)
torch.add(x,y)

tensor([[1.1106, 0.8863],
        [0.6960, 1.3533]])


tensor([[1.1106, 0.8863],
        [0.6960, 1.3533]])

In [11]:
# in place addition, everythin with a trailing underscore is an inplace operation
# i.e. it will modify the variable
print(y)
y.add_(x)
print(y)

tensor([[0.4550, 0.5725],
        [0.4980, 0.9371]])
tensor([[1.1106, 0.8863],
        [0.6960, 1.3533]])


In [12]:
# substraction
z = x - y
print(z)
z = torch.sub(x, y)
print(z)

tensor([[-0.4550, -0.5725],
        [-0.4980, -0.9371]])
tensor([[-0.4550, -0.5725],
        [-0.4980, -0.9371]])


In [13]:
z = x * y
print(z)
z = torch.mul(x, y)
print(z)

tensor([[0.7281, 0.2781],
        [0.1378, 0.5632]])
tensor([[0.7281, 0.2781],
        [0.1378, 0.5632]])


In [14]:
# division
z = x / y
print(z)
z = torch.div(x,y)
print(z)

tensor([[0.5903, 0.3541],
        [0.2845, 0.3075]])
tensor([[0.5903, 0.3541],
        [0.2845, 0.3075]])


In [15]:
# Slicing
x = torch.rand(5,3)
print(x)
print(x[:, 0]) # all rows, column 0
print(x[1, :]) # row 1, all columns
print(x[1,1]) # element at 1, 1

tensor([[0.2843, 0.3398, 0.5239],
        [0.7981, 0.7718, 0.0112],
        [0.8100, 0.6397, 0.9743],
        [0.8300, 0.0444, 0.0246],
        [0.2588, 0.9391, 0.4167]])
tensor([0.2843, 0.7981, 0.8100, 0.8300, 0.2588])
tensor([0.7981, 0.7718, 0.0112])
tensor(0.7718)


The `torch.transpose(dim0, dim1)` function swaps two dimensions (or axes) of a tensor. It is a generalization of matrix transposition, allowing you to permute arbitrary dimensions of higher-dimensional tensors.

Here’s an explanation of your observations:


#### How `torch.transpose(dim0, dim1)` Works:
Transpose for 2D Matrices: For a 2D tensor (matrix), `transpose(0, 1)` is equivalent to the mathematical transpose, swapping rows and columns.

General Case for N-Dimensional Tensors:

Given a tensor of shape `(D0, D1, ..., DN)`, calling transpose`(dim0, dim1)` swaps the axes dim0 and dim1.
Other dimensions remain unchanged.
Negative indices `(e.g., -1, -2)` count dimensions from the end: -1 is the last dimension, -2 is the second-to-last dimension, and so on.

#### Why Does x.transpose(0, 1) and x.transpose(-2, -1) Work?
For your matrix x = torch.rand(5, 3):

Shape of x is (5, 3) where:
Dimension 0 corresponds to rows (5 rows).
Dimension 1 corresponds to columns (3 columns).

#### Why Does x.transpose(0, 0) or x.transpose(dim, dim) Return the Original Matrix?
When the same dimension is specified for both arguments of transpose(dim0, dim1), no swapping occurs. Effectively, it’s a no-op because swapping a dimension with itself doesn't change the tensor.

For example:

- x.transpose(0, 0): Swaps dimension 0 with itself → No effect.
- x.transpose(1, 1): Swaps dimension 1 with itself → No effect.
- x.transpose(-1, -1): Swaps the last dimension with itself → No effect.
- x.transpose(-2, -2): Swaps the second-to-last dimension with itself → No effect.

In [16]:
print(x[1]) # second row
print(x.transpose(0,1)) #transpose  

tensor([0.7981, 0.7718, 0.0112])
tensor([[0.2843, 0.7981, 0.8100, 0.8300, 0.2588],
        [0.3398, 0.7718, 0.6397, 0.0444, 0.9391],
        [0.5239, 0.0112, 0.9743, 0.0246, 0.4167]])


In [17]:
print(x.transpose(-2,-1)) #transpose

tensor([[0.2843, 0.7981, 0.8100, 0.8300, 0.2588],
        [0.3398, 0.7718, 0.6397, 0.0444, 0.9391],
        [0.5239, 0.0112, 0.9743, 0.0246, 0.4167]])


In [18]:
print(x.transpose(0,0)) #same matrix

tensor([[0.2843, 0.3398, 0.5239],
        [0.7981, 0.7718, 0.0112],
        [0.8100, 0.6397, 0.9743],
        [0.8300, 0.0444, 0.0246],
        [0.2588, 0.9391, 0.4167]])


In [19]:
print(x.transpose(1,1)) #same matrix

tensor([[0.2843, 0.3398, 0.5239],
        [0.7981, 0.7718, 0.0112],
        [0.8100, 0.6397, 0.9743],
        [0.8300, 0.0444, 0.0246],
        [0.2588, 0.9391, 0.4167]])


In [20]:
print(x.transpose(-1,-1)) #same matrix

tensor([[0.2843, 0.3398, 0.5239],
        [0.7981, 0.7718, 0.0112],
        [0.8100, 0.6397, 0.9743],
        [0.8300, 0.0444, 0.0246],
        [0.2588, 0.9391, 0.4167]])


In [21]:
print(x.transpose(-2,-2)) #same matrix

tensor([[0.2843, 0.3398, 0.5239],
        [0.7981, 0.7718, 0.0112],
        [0.8100, 0.6397, 0.9743],
        [0.8300, 0.0444, 0.0246],
        [0.2588, 0.9391, 0.4167]])


In [22]:
# Get the actual value if only 1 element in your tensor
print(x[1,1])
print(x[1,1].item())

tensor(0.7718)
0.77176833152771


In [23]:
# Reshape with torch.view()
x = torch.randn(4, 4)
print(x)

tensor([[ 0.1530, -0.4757, -1.8821, -0.7765],
        [ 2.0242, -0.0865,  0.0981, -1.2150],
        [ 1.5748, -0.6298,  2.4070,  0.2786],
        [ 0.2468,  1.1843, -0.7282,  1.1633]])


In [24]:
y = x.view(16)
print(y)

tensor([ 0.1530, -0.4757, -1.8821, -0.7765,  2.0242, -0.0865,  0.0981, -1.2150,
         1.5748, -0.6298,  2.4070,  0.2786,  0.2468,  1.1843, -0.7282,  1.1633])


1. torch.view()
- Purpose: Returns a new tensor with the same data but a different shape, provided the data layout in memory is contiguous.
- Key Constraint: The input tensor must be contiguous in memory, meaning the data must be stored in a continuous block of memory.
- Operation Type: Lightweight operation that avoids copying data when possible.

2. torch.reshape()
- Purpose: Returns a new tensor with the same data but a different shape. It is more flexible because it works even if the input tensor is not contiguous.
- Key Advantage: If the input tensor is non-contiguous, reshape() will automatically create a copy of the data (if needed) to ensure the requested shape is valid.
- Operation Type: May involve copying data if contiguity is not satisfied.

In [25]:
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
# if -1 it pytorch will automatically determine the necessary size
print(z)

tensor([[ 0.1530, -0.4757, -1.8821, -0.7765,  2.0242, -0.0865,  0.0981, -1.2150],
        [ 1.5748, -0.6298,  2.4070,  0.2786,  0.2468,  1.1843, -0.7282,  1.1633]])


In [26]:
x.reshape(-1,8)

tensor([[ 0.1530, -0.4757, -1.8821, -0.7765,  2.0242, -0.0865,  0.0981, -1.2150],
        [ 1.5748, -0.6298,  2.4070,  0.2786,  0.2468,  1.1843, -0.7282,  1.1633]])

`Contiguity` in PyTorch refers to how a tensor's data is stored in memory. A tensor is contiguous if the elements in its data are laid out in a single, uninterrupted block in memory, without any gaps or reordering.

Example of a Contiguous and not contiguous Tensor:

In [27]:
import torch

x = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(x.is_contiguous())  # True

True


In [28]:
x = torch.arange(6).reshape(2, 3)  # Contiguous tensor of shape (2, 3)
y = x.permute(1, 0)  # Swap dimensions, now shape (3, 2)
print(y.is_contiguous())  # False

False


In [29]:
y.view(-1)

RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

In [None]:
y.reshape(-1)

tensor([0, 3, 1, 4, 2, 5])