# PyTorch Fundamentals - Assignment 1

<a target="_blank" href="https://colab.research.google.com/github/sham-nlp/2026nlp-1-pytorch-fundamentals/blob/main/01_pytorch_assignment_student.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

**Name:** `Your Name Here`
**Date:** `Insert Date`

---

## Instructions

Complete the code sections marked with `# YOUR CODE HERE`.

**Submission:** Submit this notebook with all cells executed and outputs visible.

---

## Part 1: Tensor Basics

In [None]:
import torch

# TODO: Create a 2x3 tensor with values 1 through 6
# Hint: Use torch.tensor() or torch.arange() with .reshape()
x = # YOUR CODE HERE

print("Tensor x:")
print(x)
print()

print(f"Shape:   {x.shape}")      # Expected: torch.Size([2, 3])
print(f"Dtype:   {x.dtype}")      # Expected: torch.int64
print(f"Device:  {x.device}")     # Expected: cpu
print(f"Stride:  {x.stride()}")   # Expected: (3, 1)
print(f"Is contiguous: {x.is_contiguous()}")  # Expected: True

In [None]:
# TODO: Create the same tensor but with dtype float32
# Hint: Pass dtype=torch.float32 to torch.tensor(..., dtype=torch.float32)
x_float = # YOUR CODE HERE

print(f"Float tensor dtype: {x_float.dtype}")  # Expected: torch.float32

---

## Part 2: Building a Linear Model

Implement y = wx + b using PyTorch tensors with gradient tracking.

In [None]:
# Input: negative news ratio
x = torch.tensor([[0.3], [0.7], [0.1], [0.5]])

# Target: exchange rate increases
y_true = torch.tensor([[12.0], [29.0], [4.0], [23.0]])

print(f"Input shape:  {x.shape}")    # (4, 1) - 4 samples, 1 feature
print(f"Target shape: {y_true.shape}")  # (4, 1) - 4 samples, 1 output

# TODO: Initialize weight w and bias b
# Use requires_grad=True so PyTorch tracks gradients
# Initialize w to 0.5 and b to 0.0
w = # YOUR CODE HERE
b = # YOUR CODE HERE

print(f"\nInitial w: {w.item():.2f}")  # Expected: 0.50
print(f"Initial b: {b.item():.2f}")  # Expected: 0.00

In [None]:
# TODO: Define the forward function: y_pred = x @ w + b
def forward(x, w, b):
    return # YOUR CODE HERE

# Test it
y_pred = forward(x, w, b)
print("Predictions with initial w, b:")
print(y_pred)

---

## Part 3: Loss and Gradients

In [None]:
# TODO: Define MSE loss function
def mse_loss(y_pred, y_true):
    return # YOUR CODE HERE

# Compute loss
loss = mse_loss(y_pred, y_true)
print(f"Initial loss: {loss.item():.2f}")

# TODO: Compute gradients using autograd
# Hint: Call something on the loss [ loss.???() ]
# YOUR CODE HERE

print(f"\nGradients computed!")
print(f"dw (gradient of loss w.r.t. w): {w.grad.item():.2f}")
print(f"db (gradient of loss w.r.t. b): {b.grad.item():.2f}")

---

## Part 4: Manual Gradient Descent

In [None]:
learning_rate = 0.1

# TODO: Update parameters using gradient descent
# w_new = w_old - learning_rate * gradient
# Use torch.no_grad() context so this update isn't tracked
with torch.no_grad():
    w -= # YOUR CODE HERE
    b -= # YOUR CODE HERE

# TODO: Zero the gradients for next iteration
# Hint: Set .grad to ???
# YOUR CODE HERE

print(f"After one update:")
print(f"w: {w.item():.4f}")
print(f"b: {b.item():.4f}")

# Check new predictions
y_pred_new = forward(x, w, b)
loss_new = mse_loss(y_pred_new, y_true)
print(f"\nNew loss: {loss_new.item():.2f}")
print(f"Old loss: {loss.item():.2f}")
print(f"Loss decreased? {loss_new < loss}")

---

## Part 5: Full Training Loop

In [None]:
# Reset parameters
w = torch.tensor([[0.5]], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

learning_rate = 0.1
epochs = 1000

for epoch in range(epochs):
    # Step 1: Forward pass
    y_pred = # YOUR CODE HERE
    
    # Step 2: Compute loss
    loss = # YOUR CODE HERE
    
    # Step 3: Backward pass
    # YOUR CODE HERE
    
    # Step 4: Update parameters
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= # YOUR CODE HERE
    
    # Step 5: Zero gradients
    # YOUR CODE HERE
    
    # Print progress every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1:4d} | Loss: {loss.item():.6f} | w: {w.item():.4f} | b: {b.item():.4f}")

print("\n" + "="*50)
print("Training complete!")
print(f"Final w: {w.item():.4f} (expected: ~40.0)")
print(f"Final b: {b.item():.4f} (expected: ~0.0)")
print(f"Final loss: {mse_loss(forward(x, w, b), y_true).item():.6f}")

---

## Part 6: Test Your Model

In [None]:
# TODO: Make a prediction for 0.1 news ratio
test_input = # YOUR CODE HERE torch.tensor([??])

prediction = # YOUR CODE HERE

print(f"Predicted exchange rate increase: {prediction.item():.2f}%")

---

## Part 7: Training with `nn.Linear` and Manual SGD

In [None]:
import torch.nn as nn

x = torch.tensor([[0.3], [0.7], [0.1], [0.5]])
y_true = torch.tensor([[12.0], [29.0], [4.0], [23.0]])

# TODO: Create an nn.Linear layer with 1 input and 1 output
# Hint: nn.Linear(in_features, out_features)
model = # YOUR CODE HERE

print(f"Model: {model}")
print(f"Initial weight: {model.weight.item():.4f}")  # Random value
print(f"Initial bias: {model.bias.item():.4f}")    # Random value

---

### Method 1: Update Each Parameter Directly

Update `model.weight` and `model.bias` separately.

In [None]:
learning_rate = 0.01
epochs = 500

print("Training with Method 1: Direct parameter update\n")
print(f"{'Epoch':>6} | {'Loss':>10} | {'Weight':>8} | {'Bias':>8}")
print("-" * 42)

for epoch in range(epochs):
    # Step 1: Forward pass
    y_pred = # YOUR CODE HERE (hint: call model like a function)
    
    # Step 2: Compute loss
    loss = # YOUR CODE HERE (hint: reuse mse_loss we coded earlier)
    
    # Step 3: Backward pass
    # YOUR CODE HERE
    
    # Step 4: Update parameters (IMPORTANT: use torch.no_grad()!)
    # Hint: model.weight -= learning_rate * model.weight.grad
    #       same for bias
    with # YOUR CODE HERE :
        # YOUR CODE HERE
    
    # Step 5: Zero the gradients
    # Hint: model.weight.grad.zero_()
    #       model.bias.grad.zero_()
    # YOUR CODE HERE
    
    # Print progress every 50 epochs
    if (epoch + 1) % 50 == 0:
        print(f"{epoch+1:6d} | {loss.item():10.6f} | {model.weight.item():8.4f} | {model.bias.item():8.4f}")

print("\n" + "="*42)
print(f"Final weight: {model.weight.item():.4f} (expected: ~4.0)")
print(f"Final bias:   {model.bias.item():.4f} (expected: ~0.0)")

---

### Method 2: Update Using `model.parameters()`

Loop through all parameters automatically - more scalable for larger models!

In [None]:
model = nn.Linear(1, 1)

learning_rate = 0.01
epochs = 500

print("Training with Method 2: Using model.parameters()\n")
print(f"{'Epoch':>6} | {'Loss':>10} | {'Weight':>8} | {'Bias':>8}")
print("-" * 42)

for epoch in range(epochs):
    # Step 1: Forward pass
    y_pred = model(x)
    
    # Step 2: Compute loss
    loss = ((y_pred - y_true) ** 2).mean()
    
    # Step 3: Backward pass
    loss.backward()
    
    # Step 4: Update parameters using model.parameters()
    # This loops through BOTH weight and bias automatically!
    # Hint: for param in model.parameters():
    #           param -= learning_rate * param.grad
    with torch.no_grad():
        # YOUR CODE HERE
    
    # Step 5: Zero the gradients using model.parameters()
    # Hint: for param in model.parameters():
    #           param.grad.zero_()
    # YOUR CODE HERE
    
    # Print progress every 50 epochs
    if (epoch + 1) % 50 == 0:
        print(f"{epoch+1:6d} | {loss.item():10.6f} | {model.weight.item():8.4f} | {model.bias.item():8.4f}")

print("\n" + "="*42)
print(f"Final weight: {model.weight.item():.4f} (expected: ~4.0)")
print(f"Final bias:   {model.bias.item():.4f} (expected: ~0.0)")

---

## Part 8: Challenge (Optional)

Explore what happens with a different relationship.

Generate data where y = 2x² + 3x + 1 and try to fit it.

Hint: You'll need to create features [x, x²] and use a weight vector with 2 elements.

In [None]:
# Your code here (optional challenge)
# 
# Steps:
# 1. Generate x values (e.g., torch.linspace(-2, 2, 50))
# 2. Compute y = 2*x² + 3*x + 1
# 3. Create feature matrix: [x, x²]
# 4. Initialize weights [w1, w2] and bias b
# 5. Train using the same loop as above

# YOUR CODE HERE