<a href="https://colab.research.google.com/github/kdmalc/intro-computer-vision/blob/main/HW4_2025.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Name: Kai Malcolm

NetID: km82

Collaborators: NA

## General instructions
Please copy this colab notebook into your own Drive to edit. This notebook will also serve as your final submission report - please ensure that code cells run correctly, and that all non-code (text/latex) blocks are rendered correctly before submissing the file. Feel free to add any additional cells (code or text) you need. Please follow good coding, markdown, and presentation etiquette.

__Please do not use any AI tools for this assignment.__


## Submission instructions

- Before submitting, please `run-all` the code. This will re-render your entire jupyter file cell by cell to produce all the outputs.

- You are required to download the colab notebook as a `.ipynb` file and submit it to canvas. Please name your `.ipynb` file as `netid.ipynb`

- Modify the text cell on top to include your name and the names of any collaborators from this class you worked with on this assignment.

- Download a pdf of the executed colab notebook. You can use print -> save as pdf. Please name your `.pdf` file as `netid.pdf`.

- Any extra images used in the homework should also be uploaded to canvas.

- For simplicity, you can also upload a `netid.zip` file to canvas containing all solution files.

In [None]:
import math
import numpy as np
import imageio.v2 as imageio
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
from torch.utils.data import Dataset
from torchvision import transforms as T

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Problem 1

### Problem 1.1: Basics of Autograd

In [25]:
"""
Taylor approximation to sin(x).
x: (Tensor[float]) input value(s)
n: (int) number of terms in Taylor approximation

Output:
(Tensor[float]) Taylor approximation to sin(x)
"""
def sin_taylor(x, n=10):
  # f(x) ~ \sum f^n(a)/(n!) * (x-a)^n
  # Let a = 0 (assuming we are evaluating at 0)
  # f(x) is just sin(x)
  # n is the number of terms to use in the approx
  result = 0
  for k in range(n):
      sign = 1 if k % 4 != 3 else -1
      if k%2==0:
          deriv = np.sin(0)
      else:
          deriv = np.cos(0)
      print(f"k: {k}; sign: {sign}, deriv: {deriv}")
      term = sign * deriv / math.factorial(k) * (x ** k)
      result += term
  return result

# Your code here for 1.1b-d goes here

In [27]:
print(sin_taylor(np.pi / 2, 10))  # Should be close to 1

k: 0; sign: 1, deriv: 0.0
k: 1; sign: 1, deriv: 1.0
k: 2; sign: 1, deriv: 0.0
k: 3; sign: -1, deriv: 1.0
k: 4; sign: 1, deriv: 0.0
k: 5; sign: 1, deriv: 1.0
k: 6; sign: 1, deriv: 0.0
k: 7; sign: -1, deriv: 1.0
k: 8; sign: 1, deriv: 0.0
k: 9; sign: 1, deriv: 1.0
1.0000035425842861


In [None]:
import torch

# 1. Create x with requires_grad=True
x = torch.tensor(np.pi / 4, requires_grad=True)

# 2. Define the sin_taylor approximation function
def sin_taylor(x, num_terms=10):
    result = 0
    for n in range(num_terms):
        # Use cycle: 0 → 0, 1 → 1, 2 → 0, 3 → -1 ...
        mod = n % 4
        if mod == 0 or mod == 2:
            coeff = 0
        elif mod == 1:
            coeff = 1
        else:  # mod == 3
            coeff = -1
        term = coeff * x ** n / math.factorial(n)
        result = result + term
    return result

# 3. Compute y = sin_taylor(x)
y = sin_taylor(x)

# 4. Use autograd to compute dy/dx
y.backward()

# 5. Compare gradient to exact derivative (cos(π/4) ≈ 0.7071)
print("Taylor approximation of sin(π/4):", y.item())
print("Autograd-computed derivative:", x.grad.item())
print("Exact derivative (cos(π/4)):", math.cos(math.pi / 4))


### Problem 1.2: Image Denoising

In [None]:
"""
Returns the x and y gradient images for input image I.
Input:
I: (Tensor) Image of shape (H, W, 3)

Output:
(Ix, Iy): (Tensor) Gradient images each of shape (H, W, 3)
"""
def get_spatial_gradients(I):
  I = I.permute(2, 0, 1).unsqueeze(0) # Change I's shape from (H, W, 3) to (1, 3, H, W)
  kx = torch.zeros(I.shape[1], I.shape[1], 3, 3).to(I.device)
  ky = torch.zeros(I.shape[1], I.shape[1], 3, 3).to(I.device)

  for i in range(3):
    kx[i, i, 1, 1] = -1
    kx[i, i, 1, 2] = 1
    ky[i, i, 1, 1] = -1
    ky[i, i, 2, 1] = 1

  Ix = F.conv2d(I, kx, padding=1)
  Iy = F.conv2d(I, ky, padding=1)
  return Ix[0,...].permute(1,2,0), Iy[0,...].permute(1,2,0)

"""
Denoising objective function.
Input:
I, J: (Tensor) Images of shape (H, W, 3)
alpha: (float) Regularization hyperparameter

Output:
loss: (Tensor[float])
"""
def denoising_loss(I, J, alpha):
  return None # Replace with your code for 1.2a.

In [None]:
img = imageio.imread('parrot_noisy.png') / 255.0
I = torch.tensor(img, dtype=torch.float32)
I = I.to('cuda')

lr = None # Learning rate
alpha = None # alpha
n_iter = None # Number of iterations

for i in range(n_iter):
  # Your code for 1.2b goes here

  with torch.no_grad():
    pass # Replace with your code for 1.2b

## Problem 2: Training an Image Classifier

### 2a: Finish implementing dataset class

In [None]:
class CIFARDataset(Dataset):
  def __init__(self, images, labels, mode, transform):
    self.transform = transform

    if mode == 'train':
      pass # Your code here. If training, use examples [0,40000) of the
      # entire dataset

    elif mode == 'val':
      pass # Your code here. If validation, use examples [40,000-50,000)
      # of the entire dataset

    elif mode == 'test':
      pass # Your code here. If testing, use examples [50,000-60,000) of the
      # entire dataset

    else:
      raise ValueError('Invalid mode!')

  def __getitem__(self, idx):
    # Do the following:
    # 1. Get the image and label from the dataset corresponding to index idx.
    # 2. Convert the label to a LongTensor (needs to be of this type because it
    # is an integer value and PyTorch will throw an error otherwise)
    # 3. Transform the image using self.transform. This will convert the image
    # into a tensor, scale it to [0,1], and apply data augmentations.
    # 4. Return the image and label.
    return None

  def __len__(self):
    return None # Replace with your code.

### 2b: Write transforms

In [None]:
cifar = np.load('/content/drive/MyDrive/CIFAR.npz')
X, y, label_names = cifar['X'], cifar['y'] * 1.0, cifar['label_names']
print("Read in CIFAR10 dataset with %d examples, and labels:\n %s" % (X.shape[0], label_names))

batch_size = 64

transform = None # Add transforms.
train_dataset = CIFARDataset(X, y, "train", transform)
train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1)

transform = None # No transforms, but should still include ToTensor().
val_dataset = CIFARDataset(X, y, "val", transform)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=1)

transform = None # No transforms, but should still include ToTensor().
test_dataset = CIFARDataset(X, y, "test", transform)
test_dataloader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=1)

### 2c: Implement the model

In [None]:
class Model(nn.Module):
  def __init__(self):
    super().__init__()
    pass #Replace with your code

  def forward(self, x):
    return x #Replace with your code

### 2d/2e: Implement the training loop, and train your model.

In [None]:
# Your code here. Don't forget to call model.train() before training!

### 2f: Choose the best model based on overall accuracy
using the validation dataset.

In [None]:
# Your code here. Don't forget to call model.eval() first!

### 2g: Implement code for computing overall accuracy, accuracy per class, and the confusion matrix on the test set.

In [None]:
# Your code here. Don't forget to call model.eval() first!

## Problem 3: Run Mask R-CNN with Detectron2

In [None]:
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).

import sys, os, distutils.core
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

In [None]:
# import libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

import detectron2
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [None]:
# load image
im = cv2.imread("image path")
cv2_imshow(im)

In [None]:
# Load pre-trained model
# You can find the model cofig from the following url, or other config of your choice
# https://github.com/facebookresearch/detectron2/tree/main/configs/COCO-Detection
# https://github.com/facebookresearch/detectron2/tree/main/configs/COCO-InstanceSegmentation

##### Your code #####
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("your choice of pretrained model"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("Your choice of pretrained model")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST =   # set threshold for this model (0-1)

In [None]:
# Create predictor
predictor = DefaultPredictor(cfg)
# Run inference on the sample image
outputs = predictor(im)

In [None]:
# Visualize results with "Visualizer"
v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2_imshow(out.get_image()[:, :, ::-1])

## Problem 4: Adversarial attacks for trained networks. Wite code below

In [None]:
def fgsm_attack(model, image, eps):
  # Your code here.

In [None]:
# Your code here for creating and displaying adversarial images.