In [299]:
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.autograd import Variable
import numpy as np
import random
from PIL import Image
from ipywidgets import FloatProgress
from IPython.display import display
from __future__ import print_function

In [178]:
def readImages(imageFile="imList.txt", size=(299,299), openAll=True):
    """
        args :
            imageFile = file with one image path per line
            openAll = bool : load images in memory or not
        ret :
            with openAll : <image path list>, <image list>
            without openAll : <image path list>
    """
    with open(imageFile) as f:
        imList = f.read().splitlines()
        if openAll:
            imOpen = []
            for im in imList:
                i = Image.open(im).resize(size, Image.BILINEAR)
                if openAll:
                    imOpen.append(i)
            return imList, imOpen
        else:
            return imList

In [203]:
def ComputeMean(imagesList):
    """
        TODO : make efficient
    """
    r,g,b = 0,0,0
    toT = transforms.ToTensor()
    h = len(imagesList[0])
    w = len(imagesList[0][0])

    #f = FloatProgress(min=0, max=len(imagesList))
    #display(f)

    for im in imagesList:
        #f.value += 1
        t = toT(im)
        for e in t[0].view(-1):
            r += e
        for e in t[1].view(-1):
            g += e
        for e in t[2].view(-1):
            b += e
    return r(len(imagesList)*h*w), g/(len(imagesList)*h*w), b/(len(imagesList)*h*w) 

In [214]:
def ComputeStdDev(imagesList, mean):
    """
        TODO : make efficient
    """
    toT = transforms.ToTensor()
    r,g,b = 0,0,0
    h = len(toT(imagesList[0])[0])
    w = len(toT(imagesList[0])[0][0])
    for im in imagesList:
        i = toT(im)
        for e in t[0].view(-1):
            r += (e - mean[0])**2
        for e in t[1].view(-1):
            g += (e - mean[1])**2
        for e in t[2].view(-1):
            b += (e - mean[2])**2
    return (r/(len(imagesList)*h*w))**0.5, (g/(len(imagesList)*h*w))**0.5, (b/(len(imagesList)*h*w))**0.5

Read the dataset and compute the mean and std dev :

In [300]:
trainset, imagesList = readImages("CliList.txt")
m = ComputeMean(imagesList)
print("Mean : ", m)
s = ComputeStdDev(imagesList, m)
print("std dev : ", s)

Mean :  (0.42602490885018174, 0.4269285229908378, 0.418182238544934)
std dev :  (0.20014586928330125, 0.17607878531703874, 0.17040668227814146)


Define the network as class (from nn.Module) :

In [297]:
class maxnet(nn.Module):
    def __init__(self, nbClass=464):
        super(maxnet, self).__init__()
        self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)),
                nn.ReLU(True),
                nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1)),
                nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)),
                nn.ReLU(True),
                nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1)),
                nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                nn.ReLU(True),
                nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                nn.ReLU(True),
                nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                nn.ReLU(True),
                nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1))
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, nbClass),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

Training

In [347]:
mymodel = maxnet().cuda()

criterion = nn.loss.CrossEntropyLoss()
optimizer = optim.SGD(mymodel.parameters(), lr=0.01, momentum=0.9)

#trainset, imagesList = readImages("CliList.txt")
#testset, imagesTest = readImages("CliListTest.txt")
labels = open("CliConcept.txt").read().splitlines()

imageTransform = transforms.Compose( (transforms.RandomCrop(225), transforms.ToTensor(), transforms.Normalize(m,s)) )
testTransform = transforms.Compose( (transforms.Scale(225), transforms.ToTensor(), transforms.Normalize(m,s)))
batchSize = 64

for epoch in range(50): # loop over the dataset multiple times
    running_loss = 0.0
    for i in range(len(trainset)/batchSize):
        # get the inputs
        elIndex = [random.randrange(0, len(trainset)) for j in range(batchSize)]
        inputs = torch.Tensor(batchSize,3,225,225).cuda()
        for j in range(batchSize):
            inputs[j] = imageTransform(imagesList[elIndex[j]])
        inputs = Variable(inputs)
        lab = Variable(torch.LongTensor([labels.index(trainset[j].split('/')[-1].split('-')[0]) for j in elIndex]).cuda())
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = mymodel(inputs)
        loss = criterion(outputs, lab)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.data[0]
        if i % 10 == 9: # print every 10 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss / 10))
            running_loss = 0.0
        if i % 50 == 49: #test every 20 mini-batches
            print('test :')
            correct = 0
            tot = 0
            cpt = 0
            for j in range(len(testset)/batchSize):
                inp = torch.Tensor(batchSize,3,225,225).cuda()
                for k in range(batchSize):
                    inp[k] = testTransform(imagesTest[j*batchSize+k])
                    cpt += 1
                outputs = mymodel(Variable(inp))
                _, predicted = torch.max(outputs.data, 1)
                predicted = predicted.tolist()
                for k in range(batchSize):
                    if (testset[j*batchSize+k].split('/')[-1].split('-')[0] in labels):
                        correct += (predicted[k][0] == labels.index(testset[j*batchSize+k].split('/')[-1].split('-')[0]))
                        tot += 1
                        
            rest = len(testset)%batchSize
            inp = torch.Tensor(rest,3,225,225).cuda()
            for j in range(rest):
                inp[j] = testTransform(imagesTest[len(testset)-rest+j])
            outputs = mymodel(Variable(inp))
            _, predicted = torch.max(outputs.data, 1)
            predicted = predicted.tolist()
            for j in range(rest):
                if (testset[len(testset)-rest+j].split('/')[-1].split('-')[0] in labels):
                   correct += (predicted[j][0] == labels.index(testset[len(testset)-rest+j].split('/')[-1].split('-')[0]))
                   tot += 1
            print("Correct : ", correct, "/", tot)
                
            
print('Finished Training')

[1,    10] loss: 6.141
[1,    20] loss: 6.139
[1,    30] loss: 6.137
[1,    40] loss: 6.135
[1,    50] loss: 6.129
test :
Correct :  1 / 165
[2,    10] loss: 6.116
[2,    20] loss: 6.068
[2,    30] loss: 6.090
[2,    40] loss: 6.037
[2,    50] loss: 6.050
test :
Correct :  1 / 165
[3,    10] loss: 6.029
[3,    20] loss: 5.988
[3,    30] loss: 5.975
[3,    40] loss: 5.964
[3,    50] loss: 5.969
test :
Correct :  0 / 165
[4,    10] loss: 5.917
[4,    20] loss: 5.918
[4,    30] loss: 5.959
[4,    40] loss: 5.888
[4,    50] loss: 5.859
test :
Correct :  3 / 165
[5,    10] loss: 5.836
[5,    20] loss: 5.800
[5,    30] loss: 5.828
[5,    40] loss: 5.787
[5,    50] loss: 5.697
test :
Correct :  0 / 165
[6,    10] loss: 5.641
[6,    20] loss: 5.683
[6,    30] loss: 5.580
[6,    40] loss: 5.638
[6,    50] loss: 5.619
test :
Correct :  0 / 165
[7,    10] loss: 5.556
[7,    20] loss: 5.496
[7,    30] loss: 5.431
[7,    40] loss: 5.451
[7,    50] loss: 5.439
test :
Correct :  3 / 165
[8,    10] lo

KeyboardInterrupt: 

Variable containing:
-7.5819
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
-1.4579
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
-5.7211
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
-3.3315
[torch.cuda.FloatTensor of size 1 (GPU 0)]

