In [1]:
import torch.nn as nn
import torch
from torchvision import transforms, datasets
import numpy as np
import math

## Image Preprocessing

They only mean center so we found the mean pixel value of faces and normalize with that.

In [2]:
data_transform = transforms.Compose([transforms.Grayscale(),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.5089547997389491],
                                     std=[1])])
allImages = datasets.ImageFolder(root='./training',transform = data_transform)
label_mapping = torch.FloatTensor([float(clazz) for clazz in allImages.classes])
# label_mappin
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# make it output from [0,1] rather than [1900,2010]
label_mapping_scaled = (label_mapping - label_mapping.min())/(label_mapping.max() - label_mapping.min())

In [4]:
dataloader = torch.utils.data.DataLoader(allImages,batch_size = 16, shuffle=True)

In [5]:
class ResNet(nn.Module):
    
    def __init__(self, n_layers, final_output):
        super(ResNet,self).__init__()
        self.conv_params = {'kernel_size': 3, 'padding': 1}
        self.width = 186
        self.height = 171
        self.layer_dict = {
            18: [2,2,2,2],
            34: [3,4,6,3],
            50: [3,4,6,3],
            101: [3,4,23,3]
        }
        self.bottleneck = True if n_layers in [50,101] else False
        self.layers = {}
        
        in_channels = 1
        out_channels = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size = 7, stride = 2, padding = 3),
            nn.BatchNorm2d(num_features = out_channels),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1, dilation = 1)
        )
        self.width = self.width / 2
        self.width = math.floor( (self.width - 1) / 2 + 1 )
        self.height = self.height / 2
        self.height = math.floor( (self.height - 1) / 2 + 1)
        print("Height is now:", self.height, "Width is now:", self.width)
        
        
        in_channels = 64
        
        num_repeat = self.layer_dict[n_layers]
        for i in range(2,6):
            self.res_layer = i
            # [ [blocks], transition ]
            self.layers[self.res_layer] = [[], None]
            for j in range(num_repeat[i-2]):
                self.create_block(in_channels, out_channels, j)
                if j == 0:
                    self.add_transition(in_channels, out_channels)
                if self.bottleneck:
                    in_channels = out_channels * 4
                else:
                    in_channels = out_channels
            out_channels = out_channels * 2
        self.relu = nn.ReLU(inplace=True)
        # global average pooling
        self.global_avg = nn.AvgPool2d(kernel_size = (self.width,self.height), stride = 1)
        # fully connected to final
        self.output = nn.Linear(in_channels,1)
        
    def create_block(self, in_channels, out_channels, block_num):
        if self.bottleneck:
            block = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, stride = 1, kernel_size = 1),
                nn.BatchNorm2d(num_features = out_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, stride = 1, **self.conv_params),
                nn.BatchNorm2d(num_features = out_channels),
                nn.ReLU(inplace=True), 
                nn.Conv2d(out_channels, out_channels*4, stride = 2 if (block_num == 0 and self.res_layer > 2) else 1, kernel_size = 1),
                nn.BatchNorm2d(num_features = out_channels*4),
            )
            print("Added", "conv_bottleneck" + str(self.res_layer) + "_" + str(block_num), "input: " + str(in_channels), "output: " + str(out_channels*4))
        else:
            block = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, stride = 1, **self.conv_params),
                nn.BatchNorm2d(num_features = out_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, stride = 2 if block_num == 0 else 1, **self.conv_params),
                nn.BatchNorm2d(num_features = out_channels)
            )
            print("Added", "conv" + str(self.res_layer) + "_" + str(block_num), "input: " + str(in_channels), "output: " + str(out_channels))
        self.add_module("conv" + str(self.res_layer) + "_" + str(block_num), block)
        self.layers[self.res_layer][0].append(block)
        
        if block_num == 0 and self.res_layer > 2:
            self.height = math.floor( (self.height - 1) / 2 + 1)
            self.width = math.floor( (self.width - 1) / 2 + 1)
            print("Height is now:", self.height, "Width is now:", self.width)
        
    def add_transition(self, in_channel, out_channel):
        transition = nn.Sequential(
            nn.Conv2d(in_channel, out_channel * 4, stride = 2 if self.res_layer > 2 else 1, kernel_size = 1),
            nn.BatchNorm2d(num_features = out_channel*4),
        )
#         transition = nn.AvgPool2d(kernel_size = 3, stride = 2 if self.res_layer > 2 else 1, padding = 1)
        self.add_module("transition"+ str(self.res_layer), transition)
        self.layers[self.res_layer][1] = transition
    
    def forward(self, X):
        # go through conv1
        X = self.conv1(X)
        # go through residuals
        for i in range(2,self.res_layer + 1):
            layers,transition = self.layers[i]
            for j,layer in enumerate(layers):
                identity = X
                if j == 0:
                    identity = transition(X)
                X = layer(X) + identity
                X = self.relu(X)
        X = self.global_avg(X)
        X = X.view(X.shape[0],-1)
        X = self.output(X)
        print(X.shape)
        return X.view(-1)
        

In [6]:
class ResNeXt(nn.Module):
    
    def __init__(self, n_layers, final_output):
        super(ResNeXt,self).__init__()
        self.conv_params = {'kernel_size': 3, 'padding': 1}
        self.width = 186
        self.height = 171
        self.layer_dict = {
            50: [3,4,6,3],
            101: [3,4,23,3]
        }
        self.bottleneck = True if n_layers in [50,101] else False
        self.layers = {}
        self.C = 32
        
        in_channels = 1
        out_channels = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size = 7, stride = 2, padding = 3),
            nn.BatchNorm2d(num_features = out_channels),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1, dilation = 1)
        )
        self.width = self.width / 2
        self.width = math.floor( (self.width - 1) / 2 + 1 )
        self.height = self.height / 2
        self.height = math.floor( (self.height - 1) / 2 + 1)
        print("Height is now:", self.height, "Width is now:", self.width)
        
        
        in_channels = 64
        out_channels = 128
        
        num_repeat = self.layer_dict[n_layers]
        for i in range(2,6):
            self.res_layer = i
            # [ [blocks], transition ]
            self.layers[self.res_layer] = [[], None]
            for j in range(num_repeat[i-2]):
                blocks = []
                for k in range(self.C):
                    blocks.append(self.create_block(in_channels, out_channels//self.C, j, k))
                if j == 0:
                    self.add_transition(in_channels, out_channels)
                in_channels = out_channels * 2
                self.layers[self.res_layer][0].append(blocks)
            out_channels = out_channels * 2
        self.relu = nn.ReLU(inplace=True)
        # global average pooling
        print(self.width,self.height)
        self.global_avg = nn.AvgPool2d(kernel_size = (self.width,self.height), stride = 1)
        # fully connected to final
        self.output = nn.Linear(in_channels,1)
        
    def create_block(self, in_channels, out_channels, block_num, k):
        if self.bottleneck:
            block = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, stride = 1, kernel_size = 1),
                nn.BatchNorm2d(num_features = out_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, stride = 1, **self.conv_params),
                nn.BatchNorm2d(num_features = out_channels),
                nn.ReLU(inplace=True), 
                nn.Conv2d(out_channels, out_channels*self.C*2, stride = 2 if (block_num == 0 and self.res_layer > 2) else 1, kernel_size = 1),
                nn.BatchNorm2d(num_features = out_channels*self.C*2),
            )
            print("Added", "conv_bottleneck" + str(self.res_layer) + "_" + str(block_num) + "_" + str(k), "input: " + str(in_channels), "output: " + str(out_channels*self.C*2))
        else:
            block = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, stride = 1, **self.conv_params),
                nn.BatchNorm2d(num_features = out_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, stride = 2 if block_num == 0 else 1, **self.conv_params),
                nn.BatchNorm2d(num_features = out_channels)
            )
            print("Added", "conv" + str(self.res_layer) + "_" + str(block_num) + "_" + str(k), "input: " + str(in_channels), "output: " + str(out_channels))
        self.add_module("conv" + str(self.res_layer) + "_" + str(block_num) + "_" + str(k), block)
        
        if block_num == 0 and self.res_layer > 2 and k == 0:
            self.height = math.floor( (self.height - 1) / 2 + 1)
            self.width = math.floor( (self.width - 1) / 2 + 1)
            print("Height is now:", self.height, "Width is now:", self.width)
        
        return block
        
    def add_transition(self, in_channel, out_channel):
        transition = nn.Sequential(
            nn.Conv2d(in_channel, out_channel * 2, stride = 2 if self.res_layer > 2 else 1, kernel_size = 1),
            nn.BatchNorm2d(num_features = out_channel*2),
        )
#         transition = nn.AvgPool2d(kernel_size = 3, stride = 2 if self.res_layer > 2 else 1, padding = 1)
        self.add_module("transition"+ str(self.res_layer), transition)
        self.layers[self.res_layer][1] = transition
    
    def forward(self, X):
        # go through conv1
        X = self.conv1(X)
        # go through residuals
        for i in range(2,self.res_layer + 1):
            layers,transition = self.layers[i]
            for j,block in enumerate(layers):
                identity = X
                res = None
                for k,layer in enumerate(block):
                    if res is None:
                        res = layer(X)
                    else:
                        res = res + layer(X)
                if j == 0:
                    identity = transition(X)
                X = res + identity
                X = self.relu(X)
        X = self.global_avg(X)
        X = X.view(X.shape[0],-1)
        X = self.output(X)
        return X.view(-1)
        

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
resnet = ResNeXt(50,1).to(device)

Height is now: 43 Width is now: 47
Added conv_bottleneck2_0_0 input: 64 output: 256
Added conv_bottleneck2_0_1 input: 64 output: 256
Added conv_bottleneck2_0_2 input: 64 output: 256
Added conv_bottleneck2_0_3 input: 64 output: 256
Added conv_bottleneck2_0_4 input: 64 output: 256
Added conv_bottleneck2_0_5 input: 64 output: 256
Added conv_bottleneck2_0_6 input: 64 output: 256
Added conv_bottleneck2_0_7 input: 64 output: 256
Added conv_bottleneck2_0_8 input: 64 output: 256
Added conv_bottleneck2_0_9 input: 64 output: 256
Added conv_bottleneck2_0_10 input: 64 output: 256
Added conv_bottleneck2_0_11 input: 64 output: 256
Added conv_bottleneck2_0_12 input: 64 output: 256
Added conv_bottleneck2_0_13 input: 64 output: 256
Added conv_bottleneck2_0_14 input: 64 output: 256
Added conv_bottleneck2_0_15 input: 64 output: 256
Added conv_bottleneck2_0_16 input: 64 output: 256
Added conv_bottleneck2_0_17 input: 64 output: 256
Added conv_bottleneck2_0_18 input: 64 output: 256
Added conv_bottleneck2_0_

Added conv_bottleneck3_2_0 input: 512 output: 512
Added conv_bottleneck3_2_1 input: 512 output: 512
Added conv_bottleneck3_2_2 input: 512 output: 512
Added conv_bottleneck3_2_3 input: 512 output: 512
Added conv_bottleneck3_2_4 input: 512 output: 512
Added conv_bottleneck3_2_5 input: 512 output: 512
Added conv_bottleneck3_2_6 input: 512 output: 512
Added conv_bottleneck3_2_7 input: 512 output: 512
Added conv_bottleneck3_2_8 input: 512 output: 512
Added conv_bottleneck3_2_9 input: 512 output: 512
Added conv_bottleneck3_2_10 input: 512 output: 512
Added conv_bottleneck3_2_11 input: 512 output: 512
Added conv_bottleneck3_2_12 input: 512 output: 512
Added conv_bottleneck3_2_13 input: 512 output: 512
Added conv_bottleneck3_2_14 input: 512 output: 512
Added conv_bottleneck3_2_15 input: 512 output: 512
Added conv_bottleneck3_2_16 input: 512 output: 512
Added conv_bottleneck3_2_17 input: 512 output: 512
Added conv_bottleneck3_2_18 input: 512 output: 512
Added conv_bottleneck3_2_19 input: 512 ou

Added conv_bottleneck4_2_29 input: 1024 output: 1024
Added conv_bottleneck4_2_30 input: 1024 output: 1024
Added conv_bottleneck4_2_31 input: 1024 output: 1024
Added conv_bottleneck4_3_0 input: 1024 output: 1024
Added conv_bottleneck4_3_1 input: 1024 output: 1024
Added conv_bottleneck4_3_2 input: 1024 output: 1024
Added conv_bottleneck4_3_3 input: 1024 output: 1024
Added conv_bottleneck4_3_4 input: 1024 output: 1024
Added conv_bottleneck4_3_5 input: 1024 output: 1024
Added conv_bottleneck4_3_6 input: 1024 output: 1024
Added conv_bottleneck4_3_7 input: 1024 output: 1024
Added conv_bottleneck4_3_8 input: 1024 output: 1024
Added conv_bottleneck4_3_9 input: 1024 output: 1024
Added conv_bottleneck4_3_10 input: 1024 output: 1024
Added conv_bottleneck4_3_11 input: 1024 output: 1024
Added conv_bottleneck4_3_12 input: 1024 output: 1024
Added conv_bottleneck4_3_13 input: 1024 output: 1024
Added conv_bottleneck4_3_14 input: 1024 output: 1024
Added conv_bottleneck4_3_15 input: 1024 output: 1024
Add

Added conv_bottleneck5_1_23 input: 2048 output: 2048
Added conv_bottleneck5_1_24 input: 2048 output: 2048
Added conv_bottleneck5_1_25 input: 2048 output: 2048
Added conv_bottleneck5_1_26 input: 2048 output: 2048
Added conv_bottleneck5_1_27 input: 2048 output: 2048
Added conv_bottleneck5_1_28 input: 2048 output: 2048
Added conv_bottleneck5_1_29 input: 2048 output: 2048
Added conv_bottleneck5_1_30 input: 2048 output: 2048
Added conv_bottleneck5_1_31 input: 2048 output: 2048
Added conv_bottleneck5_2_0 input: 2048 output: 2048
Added conv_bottleneck5_2_1 input: 2048 output: 2048
Added conv_bottleneck5_2_2 input: 2048 output: 2048
Added conv_bottleneck5_2_3 input: 2048 output: 2048
Added conv_bottleneck5_2_4 input: 2048 output: 2048
Added conv_bottleneck5_2_5 input: 2048 output: 2048
Added conv_bottleneck5_2_6 input: 2048 output: 2048
Added conv_bottleneck5_2_7 input: 2048 output: 2048
Added conv_bottleneck5_2_8 input: 2048 output: 2048
Added conv_bottleneck5_2_9 input: 2048 output: 2048
Add

In [9]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [10]:
count_parameters(resnet)

24414465

### Training

In [9]:
optim = torch.optim.Adam(resnet.parameters(),lr = 0.001, betas = (0.9,0.999))
loss_metric = nn.L1Loss()
n_epochs = 100
iteration = 0
for e in range(n_epochs):
    losses = []
    for batch_input, batch_labels in dataloader:
        if iteration % 1 == 0:
            print(iteration)
        # make sure to zero out gradient
        resnet.zero_grad()
        
        # move to gpu + get correct labels
        batch_input = batch_input.to(device)
        batch_labels = label_mapping_scaled[batch_labels].to(device)
        
        loss = loss_metric(resnet(batch_input),batch_labels)
        losses.append(loss.data)
        loss.backward()
        optim.step()
        iteration += 1
        del batch_input
        del batch_labels
#         break
    print("Epoch %d: Training Loss: %0.3f" % (e,np.mean(losses)))

0


RuntimeError: cuda runtime error (2) : out of memory at c:\programdata\miniconda3\conda-bld\pytorch_1524549877902\work\aten\src\thc\generic/THCStorage.cu:58

## Evaluation

In [40]:
data_transform = transforms.Compose([transforms.Grayscale(),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.5089547997389491],
                                     std=[1])])
valImages = datasets.ImageFolder(root='./validation',transform = data_transform)
label_mapping_v = torch.FloatTensor([float(clazz) for clazz in valImages.classes])

# label_mapping
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [41]:
label_mapping

tensor([ 1905.,  1906.,  1908.,  1909.,  1910.,  1911.,  1912.,  1913.,
         1914.,  1915.,  1916.,  1919.,  1922.,  1923.,  1924.,  1925.,
         1926.,  1927.,  1928.,  1929.,  1930.,  1931.,  1932.,  1933.,
         1934.,  1935.,  1936.,  1937.,  1938.,  1939.,  1940.,  1941.,
         1942.,  1943.,  1944.,  1945.,  1946.,  1947.,  1948.,  1949.,
         1950.,  1951.,  1952.,  1953.,  1954.,  1955.,  1956.,  1957.,
         1958.,  1959.,  1960.,  1961.,  1962.,  1963.,  1964.,  1965.,
         1966.,  1967.,  1968.,  1969.,  1970.,  1971.,  1972.,  1973.,
         1974.,  1975.,  1976.,  1977.,  1978.,  1979.,  1980.,  1981.,
         1982.,  1983.,  1984.,  1985.,  1986.,  1987.,  1988.,  1989.,
         1990.,  1991.,  1992.,  1993.,  1994.,  1995.,  1996.,  1997.,
         1998.,  1999.,  2000.,  2001.,  2002.,  2003.,  2004.,  2005.,
         2006.,  2007.,  2008.,  2009.,  2010.,  2011.,  2012.,  2013.])

In [42]:
label_mapping_v

tensor([ 1933.,  1935.,  1936.,  1940.,  1944.,  1945.,  1946.,  1947.,
         1949.,  1950.,  1951.,  1952.,  1954.,  1955.,  1959.,  1961.,
         1962.,  1963.,  1965.,  1967.,  1968.,  1970.,  1972.,  1973.,
         1975.,  1976.,  1977.,  1978.,  1979.,  1981.,  1983.,  1984.,
         1990.,  1991.,  1992.,  2000.,  2001.,  2002.,  2005.,  2008.,
         2011.,  2012.])

In [43]:
label_mapping_scaled_v = (label_mapping_v - label_mapping.min())/(label_mapping.max() - label_mapping.min())

In [23]:
label_mapping_scaled_v

tensor([ 0.0000,  0.0253,  0.0380,  0.0886,  0.1392,  0.1519,  0.1646,
         0.1772,  0.2025,  0.2152,  0.2278,  0.2405,  0.2658,  0.2785,
         0.3291,  0.3544,  0.3671,  0.3797,  0.4051,  0.4304,  0.4430,
         0.4684,  0.4937,  0.5063,  0.5316,  0.5443,  0.5570,  0.5696,
         0.5823,  0.6076,  0.6329,  0.6456,  0.7215,  0.7342,  0.7468,
         0.8481,  0.8608,  0.8734,  0.9114,  0.9494,  0.9873,  1.0000])

In [25]:
valDataloader = torch.utils.data.DataLoader(valImages,batch_size = 128,shuffle=True)

In [46]:
# to turn off running averages in batch norm
resnet34.eval()
losses = []
for batch_input,batch_labels in valDataloader:
    batch_input = batch_input.to(device)
    batch_labels = label_mapping_scaled_v[batch_labels].to(device)
    res = resnet34(batch_input)
#     print(res)
#     print(batch_labels)
    loss = loss_metric(res,batch_labels)
#     print(loss.data)
    losses.append(loss.data)
print(np.mean(losses))

0.046557505


In [27]:
print(losses)

[tensor(0.1226, device='cuda:0'), tensor(0.1452, device='cuda:0'), tensor(0.1358, device='cuda:0'), tensor(0.1300, device='cuda:0'), tensor(0.1321, device='cuda:0'), tensor(0.1246, device='cuda:0'), tensor(0.1419, device='cuda:0'), tensor(0.1292, device='cuda:0'), tensor(0.1356, device='cuda:0'), tensor(0.1464, device='cuda:0'), tensor(0.1410, device='cuda:0'), tensor(0.1308, device='cuda:0'), tensor(0.1389, device='cuda:0'), tensor(0.1282, device='cuda:0'), tensor(0.1357, device='cuda:0'), tensor(0.1299, device='cuda:0'), tensor(0.1335, device='cuda:0'), tensor(0.1398, device='cuda:0'), tensor(0.1365, device='cuda:0'), tensor(0.1391, device='cuda:0'), tensor(0.1293, device='cuda:0'), tensor(0.1508, device='cuda:0'), tensor(0.1500, device='cuda:0'), tensor(0.1600, device='cuda:0'), tensor(0.1442, device='cuda:0'), tensor(0.1385, device='cuda:0'), tensor(0.1298, device='cuda:0'), tensor(0.1410, device='cuda:0'), tensor(0.1505, device='cuda:0'), tensor(0.1444, device='cuda:0'), tensor(0.

In [28]:
batch_labels

tensor([ 0.2785,  0.8608,  0.8481,  0.7468,  0.3291,  0.2152,  0.4304,
         0.3797,  0.5823,  0.1392,  0.7468,  0.5443,  0.0380,  0.2785,
         0.4684,  0.4430,  0.6076], device='cuda:0')