In [1]:
import torch
import numpy as np 
import torch.nn as nn
import torch.nn.functional as F

### Simple Neural Network Training Step

- Initialize weights
- Compute forward pass
- Compute MSE loss
- Backpropagate using `.backward()`
- Manually update weights

In [2]:
# ----- 1. Create sample data -----
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])   # shape (4,1)
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])   # y = 2x

# ----- 2. Initialize weights -----
w1 = torch.randn(1, 4, requires_grad=True)   # first layer weights
b1 = torch.randn(4, requires_grad=True)

w2 = torch.randn(4, 1, requires_grad=True)   # second layer weights
b2 = torch.randn(1, requires_grad=True)

lr = 0.01     # learning rate
num_epochs = 5

# ----- 3. Training loop -----
for epoch in range(num_epochs):

    # ----- Forward Pass -----
    h = X @ w1 + b1        # Linear layer 1
    h_relu = torch.relu(h) # Activation
    y_pred = h_relu @ w2 + b2  # Linear layer 2

    # ----- Loss MSE -----
    loss = torch.mean((y_pred - y)**2)

    # ----- Backward -----
    loss.backward()

    # ----- Update weights manually -----
    with torch.no_grad():
        w1 -= lr * w1.grad
        b1 -= lr * b1.grad
        w2 -= lr * w2.grad
        b2 -= lr * b2.grad

    # ----- Clear gradients -----
    w1.grad.zero_()
    b1.grad.zero_()
    w2.grad.zero_()
    b2.grad.zero_()

    # if epoch % 10 == 0:
    print(f"Epoch {epoch}, Loss = {loss.item():.4f}")


Epoch 0, Loss = 39.6845
Epoch 1, Loss = 6.5552
Epoch 2, Loss = 1.1174
Epoch 3, Loss = 0.3105
Epoch 4, Loss = 0.2368


### Neural Network Using Optimizer (SGD)

In [3]:
# ----- 1. Sample Data -----
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])

# ----- 2. Initialize weights -----
w1 = torch.randn(1, 4, requires_grad=True)
b1 = torch.randn(4, requires_grad=True)

w2 = torch.randn(4, 1, requires_grad=True)
b2 = torch.randn(1, requires_grad=True)

# Collect parameters for optimizer
params = [w1, b1, w2, b2]

# ----- 3. Optimizer (SGD) -----
optimizer = torch.optim.SGD(params, lr=0.01)

num_epochs = 5
# ----- 4. Training Loop -----
for epoch in range(num_epochs):

    # Forward
    h = X @ w1 + b1
    h_relu = torch.relu(h)
    y_pred = h_relu @ w2 + b2

    # Loss
    loss = torch.mean((y_pred - y)**2)

    # Backprop
    optimizer.zero_grad()   # clear old gradients
    loss.backward()         # compute new gradients
    optimizer.step()        # update weights

    # if epoch % 10 == 0:
    print(f"Epoch {epoch}, Loss = {loss.item():.4f}")


Epoch 0, Loss = 31.0984
Epoch 1, Loss = 0.3288
Epoch 2, Loss = 0.1788
Epoch 3, Loss = 0.1720
Epoch 4, Loss = 0.1676


### What is `nn.Linear`?

`nn.Linear(in_features, out_features)` performs a **linear transformation**:

[
y = xW^{T} + b
]

* **W** â†’ weight matrix of size `(out_features Ã— in_features)`
* **b** â†’ bias vector of size `(out_features)`

So if you call:

```python
layer = nn.Linear(3, 2)
```

Then:

* Input shape must be: `[batch_size, 3]`
* Output shape will be: `[batch_size, 2]`

PyTorch handles:
âœ” Creating weights
âœ” Creating bias
âœ” Tracking gradients
âœ” Updating parameters during training

---

### Example: Using `nn.Linear` in Functional Style

**No classes, everything step-by-step.**

We build a tiny model:

```
X â†’ Linear(1â†’4) â†’ ReLU â†’ Linear(4â†’1) â†’ Output
```

---

###**Code: Functional neural network using nn.Linear**





In [4]:
# ---------------------------
# 1. Data
# ---------------------------
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])   # Inputs
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])   # Targets (y = 2x)

# ---------------------------
# 2. Define layers (Functional Style)
# ---------------------------
layer1 = nn.Linear(1, 4)   # input 1 -> hidden 4
layer2 = nn.Linear(4, 1)   # hidden 4 -> output 1

# Optimizer
optimizer = torch.optim.SGD(list(layer1.parameters()) + 
                            list(layer2.parameters()), lr=0.01)

num_epochs = 5
# ---------------------------
# 3. Training Loop
# ---------------------------
for epoch in range(num_epochs):

    # ----- Forward Pass -----
    h = layer1(X)          # linear layer
    h = F.relu(h)          # activation
    y_pred = layer2(h)     # output layer

    # ----- Loss -----
    loss = F.mse_loss(y_pred, y)

    # ----- Backward Pass -----
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # if epoch % 10 == 0:
    print(f"Epoch {epoch}, Loss = {loss.item():.4f}")


Epoch 0, Loss = 39.7125
Epoch 1, Loss = 26.2883
Epoch 2, Loss = 17.5546
Epoch 3, Loss = 10.1455
Epoch 4, Loss = 4.5563


### What this example shows

##### âœ” Functional-style usage (no class)

You manually create each layer:

```python
layer1 = nn.Linear(1, 4)
layer2 = nn.Linear(4, 1)
```

##### âœ” Forward pass is explicit

```python
h = layer1(X)
h = F.relu(h)
y_pred = layer2(h)
```

##### âœ” Loss calculation

```python
loss = F.mse_loss(y_pred, y)
```

##### âœ” Backprop + update

```python
optimizer.zero_grad()
loss.backward()
optimizer.step()
```

Everything happens step-by-step so you can clearly see how the network works.

---

### Optional: Print weights and bias

```python
print(layer1.weight)
print(layer1.bias)
```

You'll see they update each epoch.

### What is `nn.Sequential`?

`nn.Sequential` lets you **stack layers in order** like this:

```python
model = nn.Sequential(
    nn.Linear(1, 4),
    nn.ReLU(),
    nn.Linear(4, 1)
)
```

It automatically connects output â†’ next input â†’ next output.

You only define the architecture once, and call:

```python
y_pred = model(X)
```

---

### Example: Neural Network with `nn.Sequential` (Functional Style)

We build:

```
X â†’ Linear(1â†’4) â†’ ReLU â†’ Linear(4â†’1) â†’ Output
```

---

### **Code**

In [5]:
# ---------------------------
# 1. Data
# ---------------------------
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])   # y = 2x

# ---------------------------
# 2. Define model using nn.Sequential
# ---------------------------
model = nn.Sequential(
    nn.Linear(1, 4),   # layer 1
    nn.ReLU(),         # activation
    nn.Linear(4, 1)    # layer 2
)

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

num_epochs = 5
# ---------------------------
# 3. Training Loop
# ---------------------------
for epoch in range(num_epochs):

    # Forward Pass
    y_pred = model(X)

    # Loss
    loss = F.mse_loss(y_pred, y)

    # Backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # if epoch % 10 == 0:
    print(f"Epoch {epoch}, Loss = {loss.item():.4f}")


Epoch 0, Loss = 30.1275
Epoch 1, Loss = 23.3221
Epoch 2, Loss = 17.4129
Epoch 3, Loss = 11.5881
Epoch 4, Loss = 6.3862



### Why `nn.Sequential` is helpful?

##### âœ” Cleaner model definition

No need to manually call each layer.

##### âœ” Easy to add/remove layers

Just modify the list.

##### âœ” Perfect for simple feedforward networks

Especially MLPs, CNN blocks, etc.

Example:

```python
model = nn.Sequential(
    nn.Linear(10, 32),
    nn.ReLU(),
    nn.Linear(32, 16),
    nn.ReLU(),
    nn.Linear(16, 1)
)
```


Below are **three clean, minimal, class-based PyTorch model examples**, one for each style:

1. **Very simple NN (fully manual forward calculations)**
2. **Class-based model using `nn.Linear` layers**
3. **Class-based model using `nn.Sequential`**

All examples include:

* A class defining the model
* A forward function
* Simple training loop template

This will give you a **clear understanding of how PyTorch models are structured under the hood**.

---

### **1. Class-Based Model: Very First Simple NN (Manual Weights)**

Here we **manually create weights and do matrix multiplications ourselves**.

##### ðŸ‘‰ Best for: understanding how PyTorch really works under the hood.


In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# ---------------------------------------------------
# 1. Simple model with manually created parameters
# ---------------------------------------------------
class SimpleManualNN(nn.Module):
    def __init__(self):
        super().__init__()

        # Manually define weights and biases
        self.w1 = nn.Parameter(torch.randn(1, 4))
        self.b1 = nn.Parameter(torch.randn(4))

        self.w2 = nn.Parameter(torch.randn(4, 1))
        self.b2 = nn.Parameter(torch.randn(1))

    def forward(self, x):
        h = x @ self.w1 + self.b1    # Linear layer 1
        h = F.relu(h)                # Activation
        out = h @ self.w2 + self.b2  # Linear layer 2
        return out


# Example usage
model = SimpleManualNN()
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y_pred = model(X)
print(y_pred)

tensor([[-2.0261],
        [-2.0660],
        [-2.1126],
        [-2.3648]], grad_fn=<AddBackward0>)


âœ” Manually implemented linear layers
âœ” Useful for learning core mechanics

---

### **2. Class-Based Model Using `nn.Linear` (Proper PyTorch Way)**

The cleanest and most common architecture structure.

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# ---------------------------------------------------
# 2. Class model using nn.Linear layers
# ---------------------------------------------------
class LinearNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(1, 4)   # input â†’ hidden
        self.fc2 = nn.Linear(4, 1)   # hidden â†’ output

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# Example usage
model = LinearNN()
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y_pred = model(X)
print(y_pred)

tensor([[-0.5326],
        [-0.6009],
        [-0.6692],
        [-0.7375]], grad_fn=<AddmmBackward0>)


âœ” Clean and flexible
âœ” Easy to expand with more layers

---

### **3. Class-Based Model Using `nn.Sequential`**

Here, the entire network is wrapped inside one Sequential block.

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# ---------------------------------------------------
# 3. Class model using nn.Sequential
# ---------------------------------------------------
class SequentialNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(1, 4),
            nn.ReLU(),
            nn.Linear(4, 1)
        )

    def forward(self, x):
        return self.model(x)


# Example usage
model = SequentialNN()
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y_pred = model(X)
print(y_pred)

tensor([[0.5038],
        [0.6392],
        [0.7709],
        [0.8968]], grad_fn=<AddmmBackward0>)


âœ” Very compact
âœ” Perfect for simple feedforward architectures

---

### **Training Loop Template (Works for All 3 Models)**

In [9]:
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])

model = LinearNN()        # or SimpleManualNN(), SequentialNN()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

num_epochs = 5
# ---------------------------
# Training Loop
for epoch in range(num_epochs):
    optimizer.zero_grad()

    y_pred = model(X)
    loss = loss_fn(y_pred, y)

    loss.backward()
    optimizer.step()

    # if epoch % 10 == 0:
    print(f"Epoch {epoch} Loss: {loss.item():.4f}")

Epoch 0 Loss: 43.9718
Epoch 1 Loss: 27.0567
Epoch 2 Loss: 16.9035
Epoch 3 Loss: 8.8359
Epoch 4 Loss: 3.4723


### Summary Table (Easy Understanding)

| Model Type         | How It's Built                                  | Best For                          |
| ------------------ | ----------------------------------------------- | --------------------------------- |
| **SimpleManualNN** | Manual weights (`nn.Parameter`) + manual matmul | Understanding internals           |
| **LinearNN**       | Uses `nn.Linear` layers                         | Standard choice for most networks |
| **SequentialNN**   | Uses `nn.Sequential` container                  | Very compact CNN/MLP blocks       |
