# PyTorch Tensors and Autograd Tutorial

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/maheshghanta/Codes/blob/master/PyTorch_Tutorials/Tutorial1_Solutions/1.Tensors_and_Autograd.ipynb)

In [None]:
%pip install torch torchvision
%pip install ipywidgets
%pip install matplotlib
%pip install numpy
%pip install pandas
%pip install scikit-learn
%pip install scipy

## Overview: Scalars, Vectors, and Tensors

### **Scalar**
- Single value (0D): `5`
- Use: loss, learning rate

### **Vector**
- 1D array: `[1,2,3]`
- Use: embeddings, features

### **Matrix**
- 2D array: `[[1,2],[3,4]]`
- Use: weights, batch data

### **Tensor**
- ND array: generalizes all above
- 4D example: `(batch, channels, height, width)`
- Use: images, video, any ND data

**In PyTorch, everything is a tensor!**

## Setup and Imports

In [None]:
import torch
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler


from torch import nn
import numpy as np
import time
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter

import os
from datetime import datetime

print(f"PyTorch: {torch.__version__}")
print(f"NumPy: {np.__version__}")

## Tensor Operations: PyTorch vs NumPy

### 1. Creating Tensors

In [None]:
# From lists
np_arr = np.array([1,2,3])
torch_t = torch.tensor([1,2,3])
print("NumPy:", np_arr)
print("PyTorch:", torch_t)

# Zeros, ones, random
print("\nZeros:", torch.zeros(2,3).shape)
print("Ones:", torch.ones(2,3).shape)
print("Random:", torch.randn(2,2))

### 2. NumPy ↔ PyTorch

In [None]:
np_a = np.array([[1,2],[3,4]])
torch_a = torch.from_numpy(np_a)
print("NumPy→PyTorch:", torch_a)
print(torch_a.dtype)
torch_b = torch.tensor([[5,6],[7,8]])
np_b = torch_b.numpy()
print("PyTorch→NumPy:", np_b)
print(np_b.dtype)

# They share memory!
np_a[0,0] = 999
print("Modified NumPy affects PyTorch:", torch_a)

### 3. Basic Operations

In [None]:
a = torch.tensor([[1,2],[3,4]])
b = torch.tensor([[5,6],[7,8]])

print("Add:", a + b)
print("Multiply:", a * b)
print("Matmul:", torch.matmul(a, b))
print("Transpose:", a.T)
print("Sum:", a.sum().item())

### 4. Reshaping

In [None]:
t = torch.arange(12)
print("Original:", t.shape)
print("Reshaped 3x4:\n", t.reshape(3,4))
print("View 2x6:\n", t.view(2,6))
print("Index [0,1]:", t.reshape(3,4)[0,1].item())

### 5. Performance (GPU)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

size = 1000
x_cpu = torch.randn(size, size)
y_cpu = torch.randn(size, size)

start = time.time()
result = torch.matmul(x_cpu, y_cpu)
print(f"CPU time: {time.time()-start:.4f}s")

if torch.cuda.is_available():
    x_gpu = x_cpu.to(device)
    y_gpu = y_cpu.to(device)
    torch.cuda.synchronize()
    start = time.time()
    result_gpu = torch.matmul(x_gpu, y_gpu)
    torch.cuda.synchronize()
    print(f"GPU time: {time.time()-start:.4f}s")

## Manual Backpropagation

### Function: $f(x,y) = x^2 + 2xy + y^2$

**Derivatives:**
- $\\frac{\\partial f}{\\partial x} = 2x + 2y$
- $\\frac{\\partial f}{\\partial y} = 2x + 2y$

In [None]:
# Prepare meshgrid for x and y in reasonable range
x_vals = np.linspace(0, 6, 100)
y_vals = np.linspace(0, 6, 100)
X, Y = np.meshgrid(x_vals, y_vals)
F = X**2 + 2*X*Y + Y**2  # The function f = x**2 + 2*x*y + y**2

fig = plt.figure(figsize=(7,5))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, F, cmap='viridis', alpha=0.7)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('f(x, y)')
ax.set_title(r"$f(x, y) = x^2 + 2xy + y^2$")
plt.show()

In [None]:
def forward(x, y):
    return x**2 + 2*x*y + y**2

def backward(x, y):
    return 2*x + 2*y, 2*x + 2*y

x, y = 3.0, 4.0
out = forward(x, y)
gx, gy = backward(x, y)

print(f"f({x},{y}) = {out}")
print(f"∂f/∂x = {gx}")
print(f"∂f/∂y = {gy}")

### Complex Example: $y = \\sigma(Wx + b)$

In [None]:
def sigmoid(z): return 1/(1+np.exp(-z))
def sigmoid_grad(z): s=sigmoid(z); return s*(1-s)

W = np.array([[0.5,-0.3],[0.2,0.8]])
b = np.array([0.1,-0.2])
x = np.array([1.0,2.0])

# Forward
z = W @ x + b
y = sigmoid(z)
print("Forward:", y)

# Backward
grad_z = sigmoid_grad(z)
grad_W = np.outer(grad_z, x)
grad_b = grad_z
grad_x = W.T @ grad_z
print("∂L/∂W:\n", grad_W)
print("∂L/∂b:", grad_b)

## PyTorch Autograd

Automatic differentiation - no manual gradient calculation needed!

### 1. Simple Function

In [None]:
x = torch.tensor(3.0, requires_grad=True)
y = torch.tensor(4.0, requires_grad=True)

f = x**2 + 2*x*y + y**2
print(f"f = {f.item()}")

f.backward()
print(f"∂f/∂x = {x.grad.item()}")
print(f"∂f/∂y = {y.grad.item()}")

### 2. Neural Network Layer

In [None]:
W = torch.tensor([[0.5,-0.3],[0.2,0.8]], requires_grad=True, dtype=torch.float32)
b = torch.tensor([0.1,-0.2], requires_grad=True, dtype=torch.float32)
x = torch.tensor([1.0,2.0], requires_grad=True, dtype=torch.float32)

z = torch.matmul(W, x) + b
y = torch.sigmoid(z)
loss = y.sum()

loss.backward()
print("∂L/∂W:\n", W.grad.numpy())
print("∂L/∂b:", b.grad.numpy())


In [None]:
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(2, 2)  # W and b are encapsulated here

    def forward(self, x):
        out = self.linear(x)
        out = torch.sigmoid(out)
        return out

simple_model = MyModel()

In [None]:
run_dir = f'runs/simple_model_{datetime.now().strftime("%Y%m%d_%H%M%S")}'
writer = SummaryWriter(run_dir)
print(f"TensorBoard logs saved to: {run_dir}")
print(f"View with: tensorboard --logdir=runs")


In [None]:
# Create input tensor and add graph
x = torch.tensor([1.0,2.0], requires_grad=True, dtype=torch.float32)
# Add graph only once - this creates the computation graph visualization
writer.add_graph(simple_model, x)
writer.close()
print("Graph added successfully!")


### 3. Autograd Features

In [None]:
# Gradient accumulation
print("1. Accumulation:")
x = torch.tensor(2.0, requires_grad=True)
for i in range(3):
    (x**2).backward()
    print(f"  Iter {i+1}: grad = {x.grad.item()}")
print("  Gradients accumulate!\n")

# Zero gradients
x.grad.zero_()
print("2. After zeroing:", x.grad.item())

# Detach
print("\n3. Detach:")
x = torch.tensor(3.0, requires_grad=True)
y = x**2
z = y.detach()
print(f"  y.requires_grad: {y.requires_grad}")
print(f"  z.requires_grad: {z.requires_grad}")

# No grad context
print("\n4. No grad (inference):")
with torch.no_grad():
    y = x**2
    print(f"  requires_grad: {y.requires_grad}")

## Summary: PyTorch NN Layers

### Linear
- `nn.Linear(in, out)` - Fully connected
- `nn.Bilinear()` - Bilinear transformation

### Convolutional
- `nn.Conv1d/2d/3d()` - 1D/2D/3D convolution
- `nn.ConvTranspose2d()` - Upsampling

### Pooling
- `nn.MaxPool2d()` - Max pooling
- `nn.AvgPool2d()` - Average pooling
- `nn.AdaptiveAvgPool2d()` - Adaptive pooling

### Activation
- `nn.ReLU()`, `nn.LeakyReLU()`, `nn.GELU()`
- `nn.Sigmoid()`, `nn.Tanh()`, `nn.Softmax()`

### Normalization
- `nn.BatchNorm2d()` - Batch normalization
- `nn.LayerNorm()` - Layer normalization
- `nn.GroupNorm()` - Group normalization

### Recurrent
- `nn.RNN()`, `nn.LSTM()`, `nn.GRU()`

### Transformer
- `nn.Transformer()` - Full transformer
- `nn.TransformerEncoder/Decoder()`
- `nn.MultiheadAttention()`

### Regularization
- `nn.Dropout()`, `nn.Dropout2d()`

### Embedding
- `nn.Embedding()` - Lookup table

### Loss Functions
- `nn.CrossEntropyLoss()` - Classification
- `nn.MSELoss()` - Regression
- `nn.BCEWithLogitsLoss()` - Binary classification

### Utility
- `nn.Sequential()` - Chain layers
- `nn.ModuleList/Dict()` - Dynamic layers
- `nn.Flatten()` - Flatten dimensions

### Example: Simple CNN

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(1)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

model = SimpleCNN()
print(model)
params = sum(p.numel() for p in model.parameters())
print(f"\nTotal parameters: {params:,}")

In [None]:
image_data = datasets.CIFAR10('data', train=True,
                              download=True)
image, label = image_data[0]

In [None]:
# Create a unique run directory with timestamp to avoid multiple graph events
run_dir = f'runs/simple_cnn_model_{datetime.now().strftime("%Y%m%d_%H%M%S")}'
writer = SummaryWriter(run_dir)
print(f"TensorBoard logs saved to: {run_dir}")

# Convert CIFAR10 image to correct PyTorch format
# CIFAR10 images are (H, W, C) format, but PyTorch CNNs need (B, C, H, W)
x = np.asarray(image)  # Shape: (32, 32, 3)
x = torch.tensor(x, dtype=torch.float32)
print(f"Original shape: {x.shape}")

# Use .permute() to rearrange dimensions from (H, W, C) to (C, H, W)
x = x.permute(2, 0, 1)  # Now shape: (3, 32, 32)
print(f"After permute: {x.shape}")

# Add batch dimension
x = x.unsqueeze(0)  # Now shape: (1, 3, 32, 32)
x.requires_grad = True
print(f"Final shape: {x.shape}")

# Add graph only once
writer.add_graph(model, x)
writer.close()
print("CNN graph added successfully!")