# Notebook for experimenting and learning

In [2]:
# understanding torch.transpose()
import torch

A = torch.randn(1, 3, 4)
B = torch.randn(1, 3, 4)

print(A)
print(B)


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.5 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/rcss/Documents/master/math-reasoning/.venv/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/rcss/Documents/master/math-reasoning/.venv/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/rcss/Documents/master/math-reasoning/.venv/lib/python3.12/site-packages/ipykern

tensor([[[-0.5826,  2.5874,  0.9559,  0.9259],
         [ 0.1693, -0.4877,  0.6334,  1.2879],
         [-0.1584, -0.2066, -0.7949,  1.7970]]])
tensor([[[ 0.4281, -0.7625, -0.9006,  0.0649],
         [-0.7586,  0.3312, -0.7936,  0.9443],
         [-0.5790, -1.2730,  1.3174,  0.7216]]])


In [16]:
print(A.shape)

torch.Size([1, 3, 4])


In [11]:
# trying to multiply these
# The main difference lies in robustness:
# First case: transposes first dim, transposes 2nd dim
# Second case: GENERAL -> Will ALWAYS transpose the last two dims
# Third case: Same thing as first. Swapping dims change nothing
C = A @ B.transpose(dim0=1, dim1=2)
D = A @ B.transpose(dim0=-2, dim1=-1)
E = A @ B.transpose(dim0=2, dim1=1)

print(C) 
print(D)
print(E)


tensor([[[-1.1571,  1.0569, -2.6868],
         [ 2.2595, -4.0820, -0.8928],
         [ 1.1520, -0.8884,  0.9871]]])
tensor([[[-1.1571,  1.0569, -2.6868],
         [ 2.2595, -4.0820, -0.8928],
         [ 1.1520, -0.8884,  0.9871]]])
tensor([[[-1.1571,  1.0569, -2.6868],
         [ 2.2595, -4.0820, -0.8928],
         [ 1.1520, -0.8884,  0.9871]]])


In [17]:
print(D.shape)

torch.Size([1, 3, 3])


In [19]:
Z = D @ A
print(Z)
print(Z.shape)

tensor([[[   -inf,     nan,     nan,     nan],
         [   -inf,     inf,     inf,     inf],
         [-1.2432, -0.2798, -0.4479,  0.8259]]])
torch.Size([1, 3, 4])


In [15]:
# torch.tril()
B, T, C = A.shape
# 1. First, create a T x T tensor of ones
tril = torch.ones(T, T).tril()
D = D.masked_fill(tril == 0, float("-inf"))

D

tensor([[[-1.1571,    -inf,    -inf],
         [ 2.2595, -4.0820,    -inf],
         [ 1.1520, -0.8884,  0.9871]]])

In [None]:
# Understanding view
# torch.Tensor.view(-1) is a method in PyTorch that reshapes a tensor into a one-dimensional (1D) tensor \ 
# while automatically inferring the total number of elements. 
A = torch.randn(1, 2, 3)
A.view(-1)
A

tensor([[[-0.0019, -0.6673, -0.0354],
         [ 0.9114,  1.0840,  0.1493]]])