# Setup

In [1]:
pip install torch

Collecting torch
  Using cached torch-2.3.1-cp311-cp311-manylinux1_x86_64.whl.metadata (26 kB)
Collecting filelock (from torch)
  Using cached filelock-3.15.4-py3-none-any.whl.metadata (2.9 kB)
Collecting sympy (from torch)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Using cached networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)
Collecting fsspec (from torch)
  Using cached fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manyl

In [2]:
import torch

if torch.cuda.is_available():
    print("CUDA is available. GPU is present.")
    print("Device name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available. No GPU detected.")


CUDA is available. GPU is present.
Device name: NVIDIA GeForce RTX 2080 Ti


In [3]:
!pip install numpy

Collecting numpy
  Using cached numpy-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Using cached numpy-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.3 MB)
Installing collected packages: numpy
Successfully installed numpy-2.0.0


In [4]:
!pip install matplotlib

Collecting matplotlib
  Using cached matplotlib-3.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Using cached fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (162 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.4 kB)
Collecting pillow>=8 (from matplotlib)
  Using cached pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.2 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.1.2-py3-none-any.whl.metadata (5.1 kB)
Using cached matplotlib-3.9.1-cp311-cp311-manylinux_2_17_x86_6

In [5]:
import numpy as np
import matplotlib.pyplot as plt

# Initialise tensors

In [6]:
x = torch.ones(3,2)
print(x)
x = torch.zeros(3,2)
print(x)
x = torch.rand(3,2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.0191, 0.0739],
        [0.7551, 0.3018],
        [0.1049, 0.8480]])


In [7]:
x = torch.empty(3,2)
print(x)
y = torch.zeros_like(x)
print(y)

tensor([[7.0374e+22, 1.3563e-19],
        [2.7953e+20, 7.1321e+28],
        [1.5791e-19, 2.7381e+20]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [8]:
x = torch.tensor([[1, 2],
                  [3, 4],
                  [5,6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


# Slicing

In [9]:
print(x.size())
print(x[:,1])
print(x[0:2,:])

torch.Size([3, 2])
tensor([2, 4, 6])
tensor([[1, 2],
        [3, 4]])


In [10]:
y = x[1,1]
print(y.item())
y

4


tensor(4)

# Reshaping tensors

In [11]:
print(x)
y = x.view(2,3)
print(y)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [12]:
y = x.view(6,-1)
y

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])

In [13]:
x = torch.ones(3,2)
y = torch.ones(3,2)
z1 = x+y
z2 = x-y
z3 = x*y
print(z1,z2, z3)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]]) tensor([[0., 0.],
        [0., 0.],
        [0., 0.]]) tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [14]:
z = y.add(x)
print(z, y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]]) tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [15]:
z = y.add_(x)
print(z, y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]]) tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


In [16]:
x_np = x.numpy()
print(type(x), type(x_np))

<class 'torch.Tensor'> <class 'numpy.ndarray'>


In [19]:
a = np.random.randn(5)
a_pt = torch.from_numpy(a)
print(type(a), type(a_pt))

<class 'numpy.ndarray'> <class 'torch.Tensor'>


In [22]:
np.add(a, 1,out=a)
print(a)
print(a_pt)

[3.38664202 0.34430232 2.34207494 2.22866643 3.10133463]
tensor([3.3866, 0.3443, 2.3421, 2.2287, 3.1013], dtype=torch.float64)


# CUDA Support

In [23]:
print(torch.cuda.device_count())

1


In [25]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7acfa4e268d0>
NVIDIA GeForce RTX 2080 Ti


In [26]:
cuda0 = torch.device('cuda:0')

In [28]:
a = torch.ones(3,2,device=cuda0)
b = torch.ones(3,2,device=cuda0)
c = a+b
print(c)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]], device='cuda:0')


# Audo diff

In [52]:
x = torch.ones([3,2], requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [53]:
y = x+5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [54]:
z = y*y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [55]:
t = torch.sum(z)

In [56]:
t.backward()

In [57]:
print(x.grad)

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


$t = \sum_i z_i, z_i = y_i^2 + 1, y_i = x_i + 5$

$\frac{\partial t}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2y_i \times 1$

At x = 1, y = 6, $\frac{\partial t}{\partial x_i} = 12$

In [87]:
x = torch.ones([3,2], requires_grad=True)
y = x + 5
y.retain_grad()
r = 1/(1 + torch.exp(-y))
r.retain_grad()
print(r)
s = torch.sum(r)
s.backward()
print(y.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [89]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


# Example

In [93]:
x = torch.randn([20,1], requires_grad=True)
y = 3*x - 2

w = torch.tensor([1.0], requires_grad=True)
b = torch.tensor([1.0], requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat-y)**2)

print(loss)

loss.backward()

print(w.grad, b.grad)

tensor(166.5213, grad_fn=<SumBackward0>)
tensor([-11.6889]) tensor([103.2216])


In [100]:
learning_rate = 0.01

w = torch.tensor([1.0], requires_grad=True)
b = torch.tensor([1.0], requires_grad=True)

print(w.item(), b.item())

for i in range(100):
    x = torch.randn([20, 1])
    y = 3*x-2

    y_hat = w*x + b
    loss = torch.sum((y_hat-y)**2)
    print(loss)

    loss.backward()

    with torch.no_grad():
        w -= learning_rate*w.grad
        b -= learning_rate*b.grad

        w.grad.zero_()
        b.grad.zero_()
    
    print(w.item(), b.item())


1.0 1.0
tensor(256.9986, grad_fn=<SumBackward0>)
1.8257310390472412 -0.16283631324768066
tensor(119.5798, grad_fn=<SumBackward0>)
2.4599051475524902 -1.0592750310897827
tensor(32.5803, grad_fn=<SumBackward0>)
2.853238344192505 -1.526116132736206
tensor(5.3117, grad_fn=<SumBackward0>)
2.9382731914520264 -1.7239577770233154
tensor(1.7207, grad_fn=<SumBackward0>)
2.9848151206970215 -1.838219165802002
tensor(0.5243, grad_fn=<SumBackward0>)
2.98714280128479 -1.9028141498565674
tensor(0.2029, grad_fn=<SumBackward0>)
3.000384569168091 -1.9428268671035767
tensor(0.0655, grad_fn=<SumBackward0>)
2.9974100589752197 -1.9657145738601685
tensor(0.0245, grad_fn=<SumBackward0>)
3.001366138458252 -1.9796901941299438
tensor(0.0080, grad_fn=<SumBackward0>)
3.0027287006378174 -1.9876906871795654
tensor(0.0030, grad_fn=<SumBackward0>)
3.002480983734131 -1.9924695491790771
tensor(0.0010, grad_fn=<SumBackward0>)
3.0027222633361816 -1.9952675104141235
tensor(0.0008, grad_fn=<SumBackward0>)
3.000872850418091 -

In [111]:
learning_rate = 0.001
N = 10
epochs = 1000

w = torch.randn([N], requires_grad=True)
print(w.size())
x = torch.ones([N], requires_grad=True)
print(x.size())
b = torch.randn([1], requires_grad=True)

for i in range(epochs):

    x = torch.ones([N], requires_grad=True)
    print
    y = 3*x - 2
    print(y.size())

    y_hat = w*x + b 
    print(y_hat.size())

    loss = torch.sum((y_hat-y)**2)
    print('loss',loss)

    loss.backward()

    with torch.no_grad():
        w -= learning_rate*w.grad
        b -= learning_rate*b.grad

        w.grad.zero_()
        b.grad.zero_()



torch.Size([10])
torch.Size([10])
torch.Size([10])
torch.Size([10])
loss tensor(31.6901, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(30.7045, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(29.7603, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(28.8556, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(27.9886, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(27.1579, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(26.3618, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(25.5987, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(24.8674, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(24.1663, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(23.4942, grad_fn=<SumBackward0>)
torch.Size([10])
torch.Size([10])
loss tensor(22.8499, grad_fn=<SumBackward0>)
torch.Size([10])
t

In [2]:
print('hii')

hii
