In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import os

In [None]:
# define image directory
IMG_DIR = 'zappos_dataset'

In [None]:
# we resize our images, and normalize them
data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224,224)),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data = datasets.ImageFolder(IMG_DIR, data_transforms)

In [None]:
# get dataset size
num_img = len(data)

In [None]:
# we divide our dataset in train and test
train_data, test_data = torch.utils.data.random_split(data, [int(num_img*0.8), num_img-int(num_img*0.8)])

In [None]:
display(len(train_data), len(test_data))

In [None]:
# create dataloader to feed our network with batchsize 20. For windows users, Olé :) , set num_workers to 0
train_loader = torch.utils.data.DataLoader(train_data, batch_size=20, shuffle=True, num_workers=4)

test_loader = torch.utils.data.DataLoader(test_data, batch_size=20, shuffle=False, num_workers=4)

In [None]:
# associate numbers (0..3) to class names
class_names = {x: data.classes[x] for x in range(4)}

In [None]:
def imshow(inp, title=None):
    """Pour visualiser les images."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  


# Get batch, to visualize images and classes
inputs, classes = next(iter(train_loader))

# Visualize images and corresponding classes
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x.item()] for x in classes])

In [None]:
# define model and freeze vgg16 parameters
model = models.vgg16(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

In [None]:
class ZapposModel(nn.Module):
    """ Define our own model based on vgg16 model """
    def __init__(self, new_model):
        """ Create network with vgg16 featues extractor and avgpool, and adding our own classifier """
        super(ZapposModel, self).__init__()
        self.features = new_model.features
        self.avgpool = new_model.avgpool
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(25088, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5, inplace=False),
            nn.Linear(2048, 4)
        )

    def forward(self, x):
        """ Forwarding pass """
        x = self.features(x)
        x = self.avgpool(x)
        x = self.classifier(x)
        return x

In [None]:
# putting model on GPU, if gpu available, instantiate our model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = ZapposModel(model)
net.to(device)

In [None]:
# we use cross entropy for loss
criterion = nn.CrossEntropyLoss()

# we update only our classifier parameters with stochastic gradient descent
optimizer = optim.SGD(net.classifier.parameters(), lr=0.001, momentum=0.9)

In [None]:
# train step
epochs = 1

len_train_data = len(train_loader)
len_test_data = len(test_loader)

for epoch in range(epochs):
    
    net.train()

    running_loss = 0.0
    for i, data in enumerate(train_loader):

        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 20 == 0:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, (i+1)*20, running_loss))
            
        running_loss = 0.0
        
        # we break the loop if all the images have been used
        if (i > len_train_data):
            break
            
    
    # we evaluate our model precision on test_dataset
    correct = 0
    total = 0
    net.eval()
    with torch.no_grad():
        net.eval()
        for i, data in enumerate(test_loader):
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            if (i > len_test_data):
                break 

    print('Accuracy of the network on the %d test images: %d %%' % (total, 100 * correct / total))

print('Finished Training')

In [None]:
# empty cuda if necesary
# torch.cuda.empty_cache()