# Exercise 6

In [1]:
import os
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data.dataloader import DataLoader

import torchvision
from torchvision.io import read_image
from torchvision.transforms import Compose, RandomCrop, ColorJitter, Resize
from torchvision.io import write_png

In [2]:
# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")  # Use GPU
else:
    device = torch.device("cpu")  # Use CPU

# Print the device being used
print("Device:", device)

# Example usage: create a tensor on the selected device
tensor = torch.tensor([1, 2, 3], device=device)
print("Tensor device:", tensor.device)


Device: cuda
Tensor device: cuda:0


In [3]:
# Define class SRDataset
class SRDataset(Dataset):
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.image_filenames = os.listdir(folder_path)
        self.transform_rc = Compose([RandomCrop(64)])
        self.transform_ds = Compose([
            ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            Resize((32, 32), interpolation=torchvision.transforms.InterpolationMode.BILINEAR, antialias=True)
        ])
        
    def __len__(self):
        return len(self.image_filenames)
    
    def __getitem__(self, index):
        image_path = os.path.join(self.folder_path, self.image_filenames[index])
        hr_image = self.transform_rc(read_image(image_path) / 255.0)  # Convert to float between 0 and 1
        
        # Apply image augmentation transforms
        lr_image = self.transform_ds(hr_image)
        
        # Return the augmented image and its corresponding label if applicable
        return lr_image, hr_image


In [4]:
# Load and initialize the train_dataset
train_datapath = os.path.join(os.path.abspath(''), 'data/train')
train_dataset = SRDataset(train_datapath)
train_dataloader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=0,
    drop_last=True,
    pin_memory=True,
    )

In [5]:
# Check Dataset initialization
print(f" * Dataset contains {len(train_dataset)} image(s).")
for _, batch in enumerate(train_dataloader, 0):
    lr_image, hr_image = batch
    write_png(lr_image[0, ...].mul(255).byte(), "lr_image.png")
    write_png(hr_image[0, ...].mul(255).byte(), "hr_image.png")
    break # we deliberately break after one batch as this is just a test

 * Dataset contains 301 image(s).


In [16]:
class BasicSRModel(nn.Module):
    def __init__(self, num_inter_blocks):
        super(BasicSRModel, self).__init__()
        
        self.conv_blocks = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=True),
        )
        
        for i in range(num_inter_blocks):  # Number of intermediate blocks
            self.conv_blocks.add_module(
                f"conv_{i+1}",
                nn.Sequential(
                    nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
                    nn.LeakyReLU(inplace=True),
                )
            )
        
        self.conv_blocks.add_module(
            "last_conv",
            nn.Conv2d(64, 3, kernel_size=3, stride=1, padding=1)
        )
    
    def forward(self, x):
        x = self.conv_blocks(x)
        return x

In [17]:
# Create an instance of the model
model = BasicSRModel(10)
model.to(device)

# Define optimizer
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Define loss function
loss_function = torch.nn.L1Loss()
loss_function.to(device)

# # Print the model architecture
# # print(model)

# # Check number of parameters in model
num_params = 0
for param in model.parameters():
    num_params += param.numel()
print("num_params: " + str(num_params))


num_params: 372803


In [15]:
number_of_epochs = 5
for epoch in range(number_of_epochs):
    with tqdm(train_dataloader, desc=f'Epoch {epoch + 1}/{number_of_epochs}', unit='batch') as tqdm_train_dataloader:
        for _, batch in enumerate(tqdm_train_dataloader):
            lr_image, hr_image = batch
            lr_image, hr_image = lr_image.to(device), hr_image.to(device)
            # reset the gradient
            optimizer.zero_grad()
            # forward pass through the model
            hr_prediction = model(lr_image)
            # compute the loss
            loss = loss_function(hr_prediction, hr_image)
            # backpropagation
            loss.backward()
            # update the model parameters
            optimizer.step()
            # log the metrics, images, etc

            # Update the progress bar description
            t.set_postfix({'loss': loss.item()})


Epoch 1/5:   0%|          | 0/75 [00:00<?, ?batch/s]


RuntimeError: Given transposed=1, weight of size [64, 3, 3, 3], expected input[4, 3, 32, 32] to have 64 channels, but got 3 channels instead