In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import torch.nn.functional as F
import numpy as np
import os

In [2]:
NUM_TRAIN = 0
NUM_VAL = 0

class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path


train_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.768986940,.6641706 ,0.5923363),(0.18613161, 0.22524446, 0.23932885))
])

val_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.768986940,.6641706 ,0.5923363),(0.18613161, 0.22524446, 0.23932885))
])

test_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.768986940,.6641706 ,0.5923363),(0.18613161, 0.22524446, 0.23932885))
])

train_data = datasets.ImageFolder("train", transform=train_transforms)
val_data = datasets.ImageFolder("val", transform = val_transforms)
test_data = ImageFolderWithPaths("test", transform = test_transforms)

batch_size = 12

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True
                                        )
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1)


In [3]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 11800

print('using device:', device)

using device: cuda


In [4]:
def check_accuracy(loader, model):  
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

def train(model, optimizer, epochs):
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        
        for t, (x, y) in enumerate(train_loader):
            model.train()  # put model to training mode
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % 100 == 0:
                print(t, loss.item())
            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy(val_loader, model)
                print()

In [5]:
model = models.vgg11_bn(pretrained=True) 
model.classifier[6] = nn.Linear(4096,251)
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=.9, nesterov=True)

train(model, optimizer, 2)

0 5.629727840423584
Iteration 0, loss = 5.6297
Got 51 / 11994 correct (0.43)

100 5.6182122230529785
200 5.466123104095459
300 4.838940143585205
400 4.867872714996338
500 5.372663497924805
600 4.668335914611816
700 4.513436794281006
800 4.877199172973633
900 4.866020679473877
1000 4.656344890594482
1100 3.896261215209961
1200 3.7679102420806885
1300 4.67205810546875
1400 3.9424381256103516
1500 4.714105129241943
1600 4.459527969360352
1700 3.8977839946746826
1800 4.569260597229004
1900 4.1941633224487305
2000 4.760339736938477
2100 3.9242098331451416
2200 3.87227463722229
2300 3.698213815689087
2400 4.435128688812256
2500 3.567122220993042
2600 4.389989376068115
2700 4.440151214599609
2800 4.191368579864502
2900 3.3194189071655273
3000 3.649888753890991
3100 3.4508800506591797
3200 3.9935734272003174
3300 4.125541687011719
3400 3.128769636154175
3500 3.641315460205078
3600 4.828465938568115
3700 3.4607436656951904
3800 3.5418968200683594
3900 3.639989137649536
4000 3.5609219074249268
4

In [6]:
check_accuracy(val_loader, model)
model.eval()
model.to(device=device)
with open('submission2.txt', 'w') as file:
    file.write("label1 label2 label3\n")
    with torch.no_grad():
        for x, y, path in test_loader:
            name = path[0][-15:]
            x = x.to(device=device, dtype=dtype)
            scores = model(x)
            out_labels = [int(x) for x in (torch.topk(scores, 3)[1][0])]
            file.write(name + "," + str(out_labels[0]) + " " + str(out_labels[1]) + " " + str(out_labels[2]) + "\n")
print("done writing")

Got 4462 / 11994 correct (37.20)
done writing


In [None]:
torch.save(model) 

In [5]:
# find the mean of the images 
images, labels = iter(test_loader).next()
numpy_images = images.numpy()

per_image_mean = np.mean(numpy_images, axis=(2,3)) #Shape (32,3)
per_image_std = np.std(numpy_images, axis=(2,3)) #Shape (32,3)

pop_channel_mean = np.mean(per_image_mean, axis=0) # Shape (3,)
pop_channel_std = np.mean(per_image_std, axis=0)
print(pop_channel_mean)
print(pop_channel_std)

[0.7097613  0.59887534 0.54972327]
[0.19917071 0.22445115 0.24484591]
