# Speed Matters!

In [91]:
import torch
torch.cuda.is_available()

True

In [92]:
device = torch.device('cuda')

In [93]:
# creating a new tensor on gpu
x = torch.rand((2,3), device=device)

In [94]:
x

tensor([[0.4919, 0.1002, 0.9868],
        [0.0402, 0.2585, 0.9422]], device='cuda:0')

In [95]:
# moving an existing tensor to gpu
a = torch.rand(2,3)

In [96]:
a

tensor([[0.8849, 0.5107, 0.3129],
        [0.3162, 0.4545, 0.2683]])

In [97]:
b = a.to(device)

In [98]:
b + 5

tensor([[5.8849, 5.5107, 5.3129],
        [5.3162, 5.4545, 5.2683]], device='cuda:0')

In [99]:
import torch
import time

# Define the size of the matrices
size = 10000

# Create random matrices on CPU
matrix_cpu1 = torch.rand(size, size)
matrix_cpu2 = torch.rand(size, size)

# Measure time on CPU
start_time = time.time()
result_cpu = torch.matmul(matrix_cpu1, matrix_cpu2)
cpu_time = time.time() - start_time

print(f"Time on CPU: {cpu_time:.4f} seconds")

# Create random matrices on GPU
matrix_gpu1 = matrix_cpu1.to('cuda')
matrix_gpu2 = matrix_cpu2.to('cuda')

# Measure time on GPU
start_time = time.time()
result_gpu = torch.matmul(matrix_gpu1, matrix_gpu2)
gpu_time = time.time() - start_time

print(f"Time on GPU: {gpu_time:.4f} seconds")

# Compare results
print("\nSpeedup (CPU time / GPU time):", cpu_time / gpu_time)

Time on CPU: 15.6669 seconds
Time on GPU: 0.0007 seconds

Speedup (CPU time / GPU time): 23048.627148368992


# Reshaping Tensors

In [100]:
a = torch.ones(4,4)
a

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [101]:
# reshape
a.reshape(2,2,2,2)

tensor([[[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]],


        [[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]]])

In [102]:
# flatten
a.flatten()

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [103]:
b = torch.rand(2,3,4)
b

tensor([[[0.9746, 0.5044, 0.5705, 0.5033],
         [0.7057, 0.3649, 0.0180, 0.9518],
         [0.3307, 0.8733, 0.8304, 0.5196]],

        [[0.5580, 0.5552, 0.2712, 0.4427],
         [0.6963, 0.8689, 0.0097, 0.3795],
         [0.3424, 0.7465, 0.4281, 0.9706]]])

In [104]:
# permute
b.permute(2,0,1).shape

torch.Size([4, 2, 3])

In [105]:
b.shape

torch.Size([2, 3, 4])

In [106]:
# unsqueeze
c = torch.rand(226,226,3)
c.unsqueeze(1).shape

torch.Size([226, 1, 226, 3])

In [107]:
# squeeze
d = torch.rand(1,20)
d.squeeze(0).shape

torch.Size([20])

# NumPy and PyTorch

In [108]:
import numpy as np

In [109]:
a = torch.tensor([1,2,3])
a

tensor([1, 2, 3])

In [110]:
b = a.numpy()
b

array([1, 2, 3])

In [111]:
type(b)

numpy.ndarray

In [112]:
c = np.array([1,2,3])
c

array([1, 2, 3])

In [113]:
id(c)

133296610783856

In [114]:
d = torch.from_numpy(c)

In [115]:
id(d)

133296610781776

# AutoGrad (Automatic Differentiation)

In [116]:
def dy_dx(x):
  return 2*x

In [117]:
dy_dx(3)

6

In [118]:
import torch

In [119]:
x = torch.tensor(3.0, requires_grad=True)

In [120]:
y = x**2

In [121]:
x

tensor(3., requires_grad=True)

In [122]:
y

tensor(9., grad_fn=<PowBackward0>)

In [123]:
y.backward()

In [124]:
x.grad

tensor(6.)

In [134]:
import math

def dz_dx(x):
    return 2 * x * math.cos(x**2)

In [126]:
dz_dx(4)

-7.661275842587077

In [135]:
x = torch.tensor(4.0, requires_grad=True)

In [141]:
y = torch.sin(x)

In [142]:
z = y**2

In [143]:
x

tensor(4., requires_grad=True)

In [144]:
y

tensor(-0.7568, grad_fn=<SinBackward0>)

In [145]:
z

tensor(0.5728, grad_fn=<PowBackward0>)

In [146]:
z.backward()

In [147]:
x.grad

tensor(0.9894)

In [148]:
y.grad

  y.grad


In [149]:
import torch

# Inputs
x = torch.tensor(6.7)  # Input feature
y = torch.tensor(0.0)  # True label (binary)

w = torch.tensor(1.0)  # Weight
b = torch.tensor(0.0)  # Bias

In [150]:
# Binary Cross-Entropy Loss for scalar
def binary_cross_entropy_loss(prediction, target):
    epsilon = 1e-8  # To prevent log(0)
    prediction = torch.clamp(prediction, epsilon, 1 - epsilon)
    return -(target * torch.log(prediction) + (1 - target) * torch.log(1 - prediction))

In [151]:
# Forward pass
z = w * x + b  # Weighted sum (linear part)
y_pred = torch.sigmoid(z)  # Predicted probability

# Compute binary cross-entropy loss
loss = binary_cross_entropy_loss(y_pred, y)

In [152]:
loss

tensor(6.7012)

In [153]:
# Derivatives:
# 1. dL/d(y_pred): Loss with respect to the prediction (y_pred)
dloss_dy_pred = (y_pred - y)/(y_pred*(1-y_pred))

# 2. dy_pred/dz: Prediction (y_pred) with respect to z (sigmoid derivative)
dy_pred_dz = y_pred * (1 - y_pred)

# 3. dz/dw and dz/db: z with respect to w and b
dz_dw = x  # dz/dw = x
dz_db = 1  # dz/db = 1 (bias contributes directly to z)

dL_dw = dloss_dy_pred * dy_pred_dz * dz_dw
dL_db = dloss_dy_pred * dy_pred_dz * dz_db

In [154]:
print(f"Manual Gradient of loss w.r.t weight (dw): {dL_dw}")
print(f"Manual Gradient of loss w.r.t bias (db): {dL_db}")

Manual Gradient of loss w.r.t weight (dw): 6.691762447357178
Manual Gradient of loss w.r.t bias (db): 0.998770534992218


In [155]:
x = torch.tensor(6.7)
y = torch.tensor(0.0)

In [156]:
w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)

In [157]:
w

tensor(1., requires_grad=True)

In [158]:
b

tensor(0., requires_grad=True)

In [159]:
z = w*x + b
z

tensor(6.7000, grad_fn=<AddBackward0>)

In [160]:
y_pred = torch.sigmoid(z)
y_pred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [161]:
loss = binary_cross_entropy_loss(y_pred, y)
loss

tensor(6.7012, grad_fn=<NegBackward0>)

In [162]:
loss.backward()

In [163]:
print(w.grad)
print(b.grad)

tensor(6.6918)
tensor(0.9988)


In [164]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)

In [165]:
x

tensor([1., 2., 3.], requires_grad=True)

In [166]:
y = (x**2).mean()
y

tensor(4.6667, grad_fn=<MeanBackward0>)

In [167]:
y.backward()

In [168]:
x.grad

tensor([0.6667, 1.3333, 2.0000])

In [169]:
# clearing grad
x = torch.tensor(2.0, requires_grad=True)
x

tensor(2., requires_grad=True)

In [170]:
y = x ** 2
y

tensor(4., grad_fn=<PowBackward0>)

In [171]:
y.backward()

In [172]:
x.grad

tensor(4.)

In [173]:
x.grad.zero_()

tensor(0.)

In [174]:
# disable gradient tracking
x = torch.tensor(2.0, requires_grad=True)
x

tensor(2., requires_grad=True)

In [None]:
y = x ** 2
y

In [None]:
y.backward()

In [None]:
x.grad

In [None]:
# option 1 - requires_grad_(False)
# option 2 - detach()
# option 3 - torch.no_grad()

In [None]:
x.requires_grad_(False)

In [None]:
x

In [None]:
y = x ** 2

In [None]:
y

In [None]:
y.backward()

In [None]:
x = torch.tensor(2.0, requires_grad=True)
x

In [None]:
z = x.detach()
z

In [None]:
y = x ** 2

In [None]:
y

In [None]:
y1 = z ** 2
y1

In [None]:
y.backward()

In [None]:
y1.backward()

In [None]:
x = torch.tensor(2.0, requires_grad=True)
x

In [None]:
y = x ** 2

In [None]:
y

In [None]:
y.backward()

In [None]:
x.grad