In [313]:
import os, torch, torch.nn as nn, torchvision, matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from torchvision.io import read_image
import PIL.Image as Image

img_folder = '/Users/liam/Documents/Python/working/oxford-iiit-pet/images/'
# ann_folder = '/Users/liam/Documents/Python/working/oxford-iiit-pet/annotations/annotations/trimaps'

In [314]:
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(device)

mps


In [315]:
filenames = []
for filename in os.listdir(img_folder):
    filenames.append(filename)

# get the labels from the image filenames
# format: <class>_<color>_<number>.jpg
# e.g. british_shorthair_100.jpg
labels = []
for filename in filenames:
    labels.append(filename.split('_')[0])

# combine the filenames and labels into a dictionary
data = dict(zip(filenames, labels))

'''
# tag the filenames and labels with either cat or dog
# cats start with a capital letter, dogs start with a lowercase letter
for key, value in data.items():
    # regex: if the first letter is uppercase, it's 
    if value[0].isupper():
        data[key] = 'cat'
    else:
        data[key] = 'dog'
'''

data = dict(sorted(data.items()))


In [316]:
# create a dictionary that maps the labels to integers
# e.g. Bengal = 0, Bombay = 1, etc.
# this is necessary for the loss function
label_to_int = {}
for i, label in enumerate(set(labels)):
    label_to_int[label] = i

# create a dictionary that maps the integers back to the labels
# e.g. 0 = Bengal, 1 = Bombay, etc.
# this is necessary for the prediction step
int_to_label = {}
for i, label in enumerate(set(labels)):
    int_to_label[i] = label

# convert the labels to integers
for key, value in data.items():
    data[key] = label_to_int[value]

data

{'Abyssinian_1.jpg': 17,
 'Abyssinian_10.jpg': 17,
 'Abyssinian_100.jpg': 17,
 'Abyssinian_100.mat': 17,
 'Abyssinian_101.jpg': 17,
 'Abyssinian_101.mat': 17,
 'Abyssinian_102.jpg': 17,
 'Abyssinian_102.mat': 17,
 'Abyssinian_103.jpg': 17,
 'Abyssinian_104.jpg': 17,
 'Abyssinian_105.jpg': 17,
 'Abyssinian_106.jpg': 17,
 'Abyssinian_107.jpg': 17,
 'Abyssinian_108.jpg': 17,
 'Abyssinian_109.jpg': 17,
 'Abyssinian_11.jpg': 17,
 'Abyssinian_110.jpg': 17,
 'Abyssinian_111.jpg': 17,
 'Abyssinian_112.jpg': 17,
 'Abyssinian_113.jpg': 17,
 'Abyssinian_114.jpg': 17,
 'Abyssinian_115.jpg': 17,
 'Abyssinian_116.jpg': 17,
 'Abyssinian_117.jpg': 17,
 'Abyssinian_118.jpg': 17,
 'Abyssinian_119.jpg': 17,
 'Abyssinian_12.jpg': 17,
 'Abyssinian_120.jpg': 17,
 'Abyssinian_121.jpg': 17,
 'Abyssinian_122.jpg': 17,
 'Abyssinian_123.jpg': 17,
 'Abyssinian_124.jpg': 17,
 'Abyssinian_125.jpg': 17,
 'Abyssinian_126.jpg': 17,
 'Abyssinian_127.jpg': 17,
 'Abyssinian_128.jpg': 17,
 'Abyssinian_129.jpg': 17,
 'Abys

In [317]:
class OxfordDataset(Dataset):
    def __init__(self, filenames, labels, transform=None):
        self.filenames = filenames
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        image = Image.open(img_folder+self.filenames[idx]).convert('RGB')
        # normalize the image
        label = self.labels[idx]
        label = label_to_int[label]
        if self.transform:
            image = self.transform(image)
        return image, label

transform=torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Resize((224, 224), antialias=True),
    torchvision.transforms.RandomHorizontalFlip(0.5),
    torchvision.transforms.ColorJitter(brightness=0.05, contrast=0.1, saturation=0.05, hue=0.05),
    # random erasing
    torchvision.transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3), value=0, inplace=False),
    torchvision.transforms.RandomRotation(degrees=10),
])

In [318]:
X_train, X_test, y_train, y_test = train_test_split(filenames, labels, test_size=0.2, random_state=42)

In [319]:
class OxfordCNN(nn.Module):
    def __init__(self):
        super(OxfordCNN, self).__init__()
        self.layer1=torch.nn.Sequential(
            torch.nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Dropout(p)
        ) # 32x112x112
        self.layer2=torch.nn.Sequential(
            torch.nn.Conv2d(32, 128, kernel_size=3, stride=2, padding=1),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(128),
            torch.nn.Dropout(p)
        ) # 128x28x28
        self.layer3=torch.nn.Sequential(
            torch.nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(256),
            torch.nn.Dropout(p)
        ) # 256x7x7
        self.layer4=torch.nn.Sequential(
            torch.nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(512),
            torch.nn.Dropout(p)
        ) # 512x2x2
        self.fc1=torch.nn.Sequential(
            torch.nn.Linear(512*2*2, 1000),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(1000)
        ) # 1000
        self.fc2=torch.nn.Sequential(
            torch.nn.Linear(1000, 2),
            torch.nn.Softmax(dim=1)
        ) # 2

    def forward(self, xb):
        out=self.layer1(xb) # 32x224x224
        out=self.layer2(out) # 128x56x56
        out=self.layer3(out) # 256x14x14
        out=self.layer4(out) # 512x7x7
        out=out.reshape(out.size(0), -1) # 512x2x2 -> 2048
        out=self.fc1(out) # 1000
        out=self.fc2(out) # 2
        return out

In [320]:
def get_accuracy(model, loader):
    correct=0
    print('xb: '+type(xb)+', yb: '+type(yb))
    for xb, yb in loader:
        xb=xb.to(device)
        yb=yb.to(device)
        y_hat=model(xb)
        y_pred=torch.argmax(y_hat, dim=1)
        correct+=(y_pred==yb).sum().item()
    return correct/len(loader)

In [321]:
# convert yb to one-hot encoding
def one_hot(yb, num_classes):
    yb_onehot=torch.zeros(yb.shape[0], num_classes)
    yb_onehot[torch.arange(yb.shape[0]), yb]=1
    return yb_onehot

batch_size=128

train_dataset=OxfordDataset(X_train, y_train, transform=transform)
train_dataloader=DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset=OxfordDataset(X_test, y_test, transform=transform)
test_dataloader=DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [322]:
# hyperparameters
p=0.2
EPOCHS=20
batch_size=10

model=OxfordCNN().to(device)
loss_fn=torch.nn.CrossEntropyLoss()

optimizer=torch.optim.Adam(model.parameters(), lr=lr)

def train_batch(model, optimizer, xb, yb):
    xb=xb.to(device)
    yb=yb.to(device)
    y_hat=model(xb)
    loss=loss_fn(y_hat, yb)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss.item()

def train_epoch(model, optimizer,  x, y):
    losses=[]
    for xb, yb in train_dataloader:
        losses.append(train_batch(model, optimizer, xb, yb))
    return losses, get_accuracy(model, test_dataloader)


In [323]:
accs=[]
bar=tqdm(range(EPOCHS))
for i in bar:
    losses, acc=train_epoch(model, optimizer, X_train, y_train)
    accs.append(acc)
    bar.set_description(f"Accuracy: {acc:.3f}")
plt.plot(accs) #

  0%|          | 0/20 [00:00<?, ?it/s]

UnidentifiedImageError: cannot identify image file '/Users/liam/Documents/Python/working/oxford-iiit-pet/images/Abyssinian_100.mat'