In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

import torchvision

from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

Image Data from [here](https://www.kaggle.com/andrewmvd/animal-faces)
- Animal Faces

## Images

In [None]:
# What's in this dataset?
import os
os.listdir('course_data/afhq')

In [None]:
# three labels
os.listdir('course_data/afhq/train')

In [None]:
# within each folder are the images
os.listdir('course_data/afhq/train/cat')[:5]

In [None]:
# create a dataframe for our data
data_path = 'course_data/afhq'

rows = []
for dataset in os.listdir(data_path):
    for label in os.listdir(data_path + f'/{dataset}'):
        for image in os.listdir(data_path + f'/{dataset}' + f'/{label}'):
            row = dict()
            row['image_file'] = image
            row['label'] = label
            row['dataset'] = dataset
        
            # a bit redudant, could build from other data in __getitem__ if wanted
            row['image_path'] = data_path + f'/{dataset}' + f'/{label}'
            rows.append(row)
        
df = pd.DataFrame(rows)
print(len(df))
df.head()

In [None]:
# training and validation data
df_train = df[df['dataset'] == 'train'].reset_index(drop=True)
df_val = df[df['dataset'] == 'val'].reset_index(drop=True)
len(df_train), len(df_val)

Before creating a Dataset class, let's think about what we want as our input to the network

In [None]:
import cv2

# pull up an image
row = df.iloc[0]
image_path = row['image_path']
fname = row['image_file']
path = image_path+'/'+fname
img = cv2.imread(path)

# what is an image?
img

In [None]:
# 512x512 image with 3 channels
print(img.shape)

# pixel intensity goes from 0 to 255
print(np.max(img), np.min(img))

In [None]:
# look at the image
plt.imshow(img)

In [None]:
# why is it weird? cv2 opens in BGR instead of RGB
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

## Convolutional Layers
- [Documentation](https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html) for ```Conv2d``` is a must-read

In [None]:
# number of channels of the input
in_channels = 3
# number of filters (hence number of output channels)
out_channels = 32
# filter size
kernel_size = 3 # equivalent to (3,3)

# define the layer
conv = nn.Conv2d(in_channels, out_channels, kernel_size)

# why error? (two reasons!)
conv(torch.tensor(img))

In [None]:
# let's try again
img2 = img[np.newaxis, :, :, :]
img2 = np.transpose(img2, (0, 3, 1, 2))
img2 = torch.tensor(img2).float()

output = conv(img2)

# why this shape?
output.shape

Think: How can we change this so that the output has the same 2D shape?

## Dataset and Model

In [None]:
# Let's create a Dataset for our animal faces! 
class AnimalFacesDataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        # label dictionary
        self.label_dict = {'cat':0, 'dog':1, 'wild':2}
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # get ingredients for retrieving image
        image_path = row['image_path']
        fname = row['image_file']
        path = image_path+'/'+fname
        
        # read the img
        img = cv2.imread(path)
        
        # convert to RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # move color channels to correct spot
        img = np.transpose(img, (2, 0, 1))
        
        # convert to [0,1] scale
        img = torch.tensor(img / 255.).float()
        
        label = torch.tensor(self.label_dict[row['label']])
        
        return img, label

In [None]:
ds_train = AnimalFacesDataset(df_train)
dl_train = DataLoader(ds_train, batch_size = 2, shuffle=True)

In [None]:
# make sure our recipe works!
# notice the time...
for img, label in tqdm(dl_train):
    None

Have to sketch out dimensions while constructing!

Input: (3, 512, 512)

Conv1 -> (32, 512, 512)

Pool -> (32, 256, 256)

Conv2 -> (64, 256, 256)

Pool -> (64, 128, 128)

Conv3 -> (128, 128, 128)

Pool -> (128, 64, 64)

Conv4 -> (1, 64, 64)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        # same padding!
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        
        # doing this to shrink size enough!
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=3, padding=1)
        
        self.linear1 = nn.Linear(4096, 100)
        
        # read documentation for CrossEntropy Loss!
        self.linear2 = nn.Linear(100, 3)
        
        # pooling
        self.pool = nn.MaxPool2d(kernel_size=2)
        
        # activation
        self.relu = nn.ReLU()
        
        # for unrolling into FC layer
        self.unroll = nn.Flatten()
        
    def forward(self, x):
        # helpful to do this along the way sometimes!
        #print(x.shape)
        
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        
        x = self.conv3(x)
        x = self.relu(x)
        x = self.pool(x)
        
        x = self.conv4(x)
        x = self.relu(x)
        
        # unroll x for FC layer
        x = self.linear1(self.unroll(x))
        x = self.relu(x)
        x = self.linear2(x)
        
        return x
    
model = CNN()

In [None]:
model(img2)

from torchsummary import summary
summary(model, input_size = (3, 512, 512), device='cpu')