### Creating a Custom Dataset 

We load the FashionMNIST Dataset with the following parameters: <br>
- **root** is the path where the train/test data is stored,
- **train** specifies training or test dataset,
- **download=True** downloads the data from the internet if it’s not available at root.
- **transform** and **target_transform** specify the feature and label transformations


In [8]:
import torch
from torch import nn
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
import matplotlib.pyplot as plt

In [2]:
#Download training data from open datasets
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=False,
    transform=ToTensor(),
)

#Download the test data from open datasets
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=False,
)

**Iterating and Visualizing the Dataset**

In [3]:
labels_map = {
    0: "T-shirt/top",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle boot",
}

"""
figure = plt.figure(figsize=(8,8))
cols, rows = 3,3
for i in range(1, cols*rows+1):
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    img, label = training_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(labels_map[label])
    plt.axis("off")
   # plt.imshow(img.squeeze(), cmap="gray")
plt.show()
"""

'\nfigure = plt.figure(figsize=(8,8))\ncols, rows = 3,3\nfor i in range(1, cols*rows+1):\n    sample_idx = torch.randint(len(training_data), size=(1,)).item()\n    img, label = training_data[sample_idx]\n    figure.add_subplot(rows, cols, i)\n    plt.title(labels_map[label])\n    plt.axis("off")\n   # plt.imshow(img.squeeze(), cmap="gray")\nplt.show()\n'

A custom Dataset class must implement three functions, `__init__`, `__len__`, and `__getitem__`. 

In [207]:
import os
import pandas as pd
from torchvision.io import read_image
import re

class CustomImageDataset(Dataset):
    def __init__(self, img_dir, labels_file, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(labels_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        
        
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        sub_directory = self.img_labels.folder.iloc[idx]
        img_path = os.path.join(self.img_dir, self.img_labels.folder.iloc[idx], self.img_labels.image_name.iloc[idx])
        
        image = read_image(img_path)
    
        label = self.img_labels.coins_count.iloc[idx]
        if self.transform:
            image = self.transform(image)
            
        if self.target_transform:
            label = self.target_transform(label)
        
        
        #sample = {"image": image, "label": label}
       
        return image, torch.tensor([float(label)])
        

### Preparing data for training with DataLoaders

The `Dataset` retrieves our dataset's features and labels one sample at a time. While training a model, we typically want to pass samples <br> 
in "minibatches", reshuffle the data at every epoch to reduce model overfitting, and use Python's `multiprocessing` to speed up data retrieval. 
<br> <br>
`DataLoader` is an iterable that abstracts this complexity for us in an easy API.

In [208]:
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import ToTensor, Lambda, Resize


batch_size = 36

transform = transforms.Compose(
        [transforms.ToPILImage(),
        transforms.Resize((256,256)),
        transforms.ToTensor()
        ])



dataset = CustomImageDataset(img_dir = "coins_images", labels_file="coins_count_values.csv", transform=transform)

data_loader = DataLoader(dataset,  batch_size=batch_size, shuffle=True)



In [209]:
for (X, y) in data_loader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("shape of y: ", y.shape)
    break


Shape of X [N, C, H, W]:  torch.Size([36, 3, 256, 256])
shape of y:  torch.Size([36, 1])


In [225]:
#Get cpu or gpu device for training
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

#Define model
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Sequential(
            nn.Linear( 65*65*32, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
       
        )
   
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.shape[0],-1)
        out = self.fc(out)
        return out

model = ConvNet().to(device)
print(model)


Using cuda device
ConvNet(
  (layer1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=135200, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=256, bias=True)
    (5): ReLU()
    (6): Linear(in_features=256, out_features=1, bias=True)
  )
)


In [226]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [227]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        if batch != 5:
            #compute the prediction error
            pred = model(X)
            loss = loss_fn(pred, y)
        
        
            #Backpropagtion
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        if batch % 2 == 0:
            loss, current = loss.item(), batch * len(X)
            loss /= 36
            print(f"Training loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
        
        if batch == 5:
            model.eval()
            test_loss, correct = 0, 0
            with torch.no_grad():
            
                pred = model(X)
                test_loss += loss_fn(pred, y).item()
            
            
                test_loss /= 36
        
                print(f"Test Loss: {test_loss:>8f} \n")

In [228]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n -------------------------------")
    train(data_loader, model, loss_fn, optimizer)
print("Done")

Epoch 1
 -------------------------------
Training loss: 2.568112 [    0/  215]
Training loss: 3.164476 [   72/  215]
Training loss: 2.048724 [  144/  215]
Test Loss: 3.420418 

Epoch 2
 -------------------------------
Training loss: 2.471473 [    0/  215]
Training loss: 2.466288 [   72/  215]
Training loss: 4.169981 [  144/  215]
Test Loss: 2.055943 

Epoch 3
 -------------------------------
Training loss: 1.049859 [    0/  215]
Training loss: 2.259311 [   72/  215]
Training loss: 2.567061 [  144/  215]
Test Loss: 2.843059 

Epoch 4
 -------------------------------
Training loss: 3.077779 [    0/  215]
Training loss: 2.748300 [   72/  215]
Training loss: 2.398762 [  144/  215]
Test Loss: 2.679975 

Epoch 5
 -------------------------------
Training loss: 2.489185 [    0/  215]
Training loss: 2.048046 [   72/  215]
Training loss: 4.839386 [  144/  215]
Test Loss: 5.136639 

Epoch 6
 -------------------------------
Training loss: 3.287800 [    0/  215]
Training loss: 4.761718 [   72/  215