In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import torch.nn.functional as F
import numpy as np
import os

In [2]:
NUM_TRAIN = 0
NUM_VAL = 0

class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """
    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        path = self.imgs[index][0]
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

train_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.768986940,.6641706 ,0.5923363),(0.18613161, 0.22524446, 0.23932885))])

val_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.768986940,.6641706 ,0.5923363),(0.18613161, 0.22524446, 0.23932885))])

test_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.768986940,.6641706 ,0.5923363),(0.18613161, 0.22524446, 0.23932885))])

train_data = datasets.ImageFolder("train", transform=train_transforms)
val_data = datasets.ImageFolder("val", transform = val_transforms)
test_data = ImageFolderWithPaths("test", transform = test_transforms)
batch_size = 16

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1)


In [3]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 11800

print('using device:', device)

using device: cpu


In [4]:
def check_accuracy(loader, model):  
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

def train(model, optimizer, epochs):
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        
        for t, (x, y) in enumerate(train_loader):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % 100 == 0:
                print(t, loss.item())
            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy(val_loader, model)
                print()

In [3]:
model = models.resnet101(pretrained=True)

In [5]:
print(model.classifier)

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)


In [18]:
model.classifier[6] = nn.Linear(4096,251)
optimizer = optim.SGD(model.parameters(), 
                      lr=1e-2, momentum=.9, nesterov=True)

train(model, optimizer, 5)

0 5.78176736831665
Iteration 0, loss = 5.7818
Got 99 / 11994 correct (0.83)

100 5.588245391845703
200 5.472803592681885
300 5.566573619842529
400 5.382837772369385
500 5.363278388977051
600 5.529534816741943
700 4.957097053527832
800 5.195234775543213
900 4.512930393218994
1000 4.73974609375
1100 4.964661121368408
1200 4.2095184326171875
1300 4.465920448303223
1400 4.78986120223999
1500 4.721585750579834
1600 4.524926662445068
1700 4.249468803405762
1800 4.3668532371521


KeyboardInterrupt: 

In [6]:
check_accuracy(val_loader, model)
model.eval()
model.to(device=device)
with open('submission2.txt', 'w') as file:
    file.write("label1 label2 label3\n")
    with torch.no_grad():
        for x, y, path in test_loader:
            name = path[0][-15:]
            x = x.to(device=device, dtype=dtype)
            scores = model(x)
            out_labels = [int(x) for x in (torch.topk(scores, 3)[1][0])]
            file.write(name + "," + str(out_labels[0]) + " " + str(out_labels[1]) + " " + str(out_labels[2]) + "\n")
print("done writing")

Got 4462 / 11994 correct (37.20)
done writing


In [None]:
torch.save(model)

In [5]:
images, labels = iter(test_loader).next()
numpy_images = images.numpy()

per_image_mean = np.mean(numpy_images, axis=(2,3)) #Shape (32,3)
per_image_std = np.std(numpy_images, axis=(2,3)) #Shape (32,3)

pop_channel_mean = np.mean(per_image_mean, axis=0) # Shape (3,)
pop_channel_std = np.mean(per_image_std, axis=0)
print(pop_channel_mean)
print(pop_channel_std)

[0.7097613  0.59887534 0.54972327]
[0.19917071 0.22445115 0.24484591]


In [13]:
for path in ["vgg11_2epochs_1.pth", 
             "resnet101_2epochs.pth", 
             "resnet152_2epochs.pth"]:
    print(path)
    model = torch.load(path, map_location=torch.device('cpu')).to(device=device)
    check_accuracy(train_loader,model) # training accuracy
    check_accuracy(val_loader, model) # validation accuacy 

vgg11_2epochs_1.pth


KeyboardInterrupt: 

In [8]:
!ls

Untitled.ipynb              submission.csv
[34mVisualization[m[m               submission1.csv
Visualization .ipynb        submission2.csv
[34m__pycache__[m[m                 [34mtest[m[m
classifier.py               test.py
data_utils.py               [34mtrain[m[m
epoch2.txt                  [31mtrain_032521.jpg[m[m
iFood.ipynb                 [31mtrain_032523.jpg[m[m
[34mifood-2019-fgvc6[m[m            [31mtrain_032524.jpg[m[m
loss.txt                    [31mtrain_032526.jpg[m[m
optvis.py                   [31mtrain_032531.jpg[m[m
[34mpictures[m[m                    train_labels.csv
resnet100_loss.txt          [34mtrain_partial[m[m
resnet101_2epochs.pth       [34mval[m[m
resnet152_2epochs.pth       val_images.csv
resnet152_loss.txt          val_labels.csv
resnet152_train4.pth        vgg11_2epochs.onnx
resnet50_epoch20_batch16.h5 vgg11_2epochs_1.pth
rurs_poster_template.pptx   vgg11_loss.txt
separate.py                 [34mxgb