In [3]:
import torch
from torchvision import transforms, datasets

In [4]:
data_transform = transforms.Compose([transforms.Grayscale(),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.5089547997389491],
                                     std=[1])])
allImages = datasets.ImageFolder(root='./train',transform = data_transform)

In [15]:
dataloader = torch.utils.data.DataLoader(allImages,batch_size = 128)

In [16]:
import torch.nn as nn
import math

In [17]:
class VGG(nn.Module):


    def __init__(self, init_output = 64):
        super(VGG,self).__init__()
        self.conv_params = {'kernel_size': 3, 'stride': 1, 'padding': 1}
        self.maxpool_params = {'kernel_size': 2, 'stride': 2, 'padding': 1, 'dilation': 1}
        self.init_output = init_output
        
        self.layers = []
        
        self.in_channels = 1
        self.maxpool_out = -1
        self.in_features = -1
        self.width = 186
        self.height = 171
        
        # this assumes you can mix conv and maxpools, with all fc at the end
        def conv(out_channels):
            self.layers.append(nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, **self.conv_params),
                nn.BatchNorm2d(num_features = out_channels),
                nn.LeakyReLU(0.2)
            ))
            self.in_channels = out_channels
            print("applied conv: image is now ", self.width, " by ", self.height, " by ", self.in_channels)

        def maxpool_size(x):
            kernel_size = self.maxpool_params['kernel_size']
            padding = self.maxpool_params['padding']
            stride = self.maxpool_params['stride']
            dilation = self.maxpool_params['dilation']
            return math.floor((x + 2 * padding - dilation * (kernel_size - 1) - 1) / stride  + 1)
            
        def maxpool():
            self.layers.append(nn.MaxPool2d(**self.maxpool_params))
            self.width = maxpool_size(self.width)
            self.height = maxpool_size(self.height)

            self.maxpool_out = self.width * self.height * self.in_channels
            print("applied maxpool: image is now ", self.width, " by ", self.height, " by ", self.in_channels)

        def fc(out_features, first=False):
            in_features = self.maxpool_out if first else self.in_features
            
            self.layers.append(nn.Sequential(
                nn.Linear(in_features, out_features),
                nn.Sigmoid()
            ))
            self.in_features = out_features
        
        conv(64)
        maxpool()
        conv(128)
        maxpool()
        conv(256)
        conv(256)
        maxpool()
        conv(512)
        conv(512)
        maxpool()
        conv(512)
        conv(512)
        maxpool()
        fc(4096, first=True)
        fc(4096)
        fc(100)
        
        
    def forward(self,X):
        for layer in self.layers[:-3]:
            X = layer(X)
            
        print(X.shape)
            
        X = X.view(-1, self.maxpool_out)
        
        for layer in self.layers[-3:]:
            X = layer(X)
        
        return X

In [18]:
vgg = VGG()

applied conv: image is now  186  by  171  by  64
applied maxpool: image is now  94  by  86  by  64
applied conv: image is now  94  by  86  by  128
applied maxpool: image is now  48  by  44  by  128
applied conv: image is now  48  by  44  by  256
applied conv: image is now  48  by  44  by  256
applied maxpool: image is now  25  by  23  by  256
applied conv: image is now  25  by  23  by  512
applied conv: image is now  25  by  23  by  512
applied maxpool: image is now  13  by  12  by  512
applied conv: image is now  13  by  12  by  512
applied conv: image is now  13  by  12  by  512
applied maxpool: image is now  7  by  7  by  512


In [None]:
for batch in dataloader:
    vgg(batch[0])
    break