## Exercise 1.4 Hotdog -- no hotdog
This is the poster hand-in project for the course. Please see the associated PDF for instructions.

In [2]:
import torch
from torch.utils.data import DataLoader


We always check that we are running on a GPU

In [3]:
if torch.cuda.is_available():
    print("The code will run on GPU.")
else:
    print("The code will run on CPU. Go to Edit->Notebook Settings and choose GPU as the hardware accelerator")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

The code will run on CPU. Go to Edit->Notebook Settings and choose GPU as the hardware accelerator


We provide you with a class that can load the *hotdog/not hotdog* dataset you should use from /dtu/datasets1/02516/

In [4]:
from torch.utils.data import Dataset
import os
import glob
from PIL import Image
from torchvision import transforms
from torch import Tensor
from dataclasses import dataclass

@dataclass
class BaseBatch:
    input: Tensor
    target: Tensor

class Hotdog_NotHotdog(Dataset):
    def __init__(self, train : bool, image_size : int = 32):
        self.transform = transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.ToTensor(),
        ])
        data_path = os.path.join('hotdog_nothotdog', 'train' if train else 'test')
        image_classes = [os.path.split(d)[1] for d in glob.glob(data_path +'/*') if os.path.isdir(d)]
        image_classes.sort()
        self.name_to_label = {c: id for id, c in enumerate(image_classes)}
        self.image_paths = glob.glob(data_path + '/*/*.jpg')
        
    def __len__(self):
        'Returns the total number of samples'
        return len(self.image_paths)

    def __getitem__(self, idx):
        'Generates one sample of data'
        image_path = self.image_paths[idx]
        
        image = Image.open(image_path)
        c = os.path.split(os.path.split(image_path)[0])[1]
        y = self.name_to_label[c]
        X = self.transform(image)
        return X, torch.tensor(y)

Below is the simple way of converting the images to something that can be fed through a network.
Feel free to use something other than $128\times128$ images.

In [5]:
size = 128
batch_size = 64
trainset = Hotdog_NotHotdog(train=True, image_size=size)
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0)
testset = Hotdog_NotHotdog(train=False, image_size=size)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0)

Let's look at some images from our data 

In [6]:
from tqdm import tqdm
for i in tqdm(range(1000)):
    trainset[i]

100%|██████████| 1000/1000 [00:05<00:00, 180.79it/s]


In [7]:
next(iter(train_loader))

[tensor([[[[0.3333, 0.3569, 0.3608,  ..., 0.0157, 0.0157, 0.0118],
           [0.3333, 0.3451, 0.3412,  ..., 0.0118, 0.0118, 0.0157],
           [0.2078, 0.1843, 0.1451,  ..., 0.0118, 0.0157, 0.0157],
           ...,
           [0.3490, 0.3569, 0.3529,  ..., 0.3922, 0.4039, 0.4000],
           [0.3451, 0.3451, 0.3529,  ..., 0.3843, 0.3961, 0.3961],
           [0.3373, 0.3255, 0.3255,  ..., 0.3922, 0.3922, 0.3804]],
 
          [[0.1529, 0.1686, 0.1686,  ..., 0.0157, 0.0157, 0.0118],
           [0.1373, 0.1608, 0.1686,  ..., 0.0118, 0.0118, 0.0157],
           [0.1059, 0.0941, 0.0745,  ..., 0.0118, 0.0157, 0.0157],
           ...,
           [0.1686, 0.1686, 0.1569,  ..., 0.2000, 0.2157, 0.2078],
           [0.1569, 0.1569, 0.1647,  ..., 0.1922, 0.2118, 0.2039],
           [0.1490, 0.1451, 0.1451,  ..., 0.2000, 0.2078, 0.2078]],
 
          [[0.1608, 0.1804, 0.1804,  ..., 0.0157, 0.0157, 0.0118],
           [0.1490, 0.1725, 0.1804,  ..., 0.0118, 0.0118, 0.0157],
           [0.1020, 0.09