In [1]:
import numpy as np
import pandas as pd
import torch.nn.functional as F
import pdb
from torch import nn
import math
from torch.optim import lr_scheduler
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn import metrics
import torch
import itertools
import seaborn as sns
sns.set(style="darkgrid")
from torch.autograd import Variable
from torch.optim import Adam
from torchvision.utils import make_grid
from torchvision.datasets.mnist import MNIST
from tqdm import tqdm
from torchvision import models
import torch.optim as optim
import torchvision
from matplotlib.ticker import MaxNLocator
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch import nn
from torch.utils.data import Dataset, DataLoader
import os
from torch.nn import MaxPool2d
!pip install torchsummary
from torchsummary import summary
import chainer.links as L
from PIL import Image
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
plt.ion()

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


--------------------------------------------------------------------------------
CuPy (cupy-cuda101) version 8.1.0 may not be compatible with this version of Chainer.
Please consider installing the supported version by running:
  $ pip install 'cupy-cuda101>=7.7.0,<8.0.0'

See the following page for more details:
  https://docs-cupy.chainer.org/en/latest/install.html
--------------------------------------------------------------------------------

  requirement=requirement, help=help))


In [2]:
#Check if gpu is available
torch.cuda.is_available()

True

In [3]:
data_path = "/home/trojan/Desktop/dimentia/data_10slices/dataset with PGGAN/train"

In [4]:
transform = transforms.Compose([
                       transforms.Resize((28,28)),
                       transforms.Grayscale(num_output_channels=1),
                       transforms.ToTensor(),
                       transforms.Normalize(mean=(0.5,), std=(0.5,))
                   ])
dataset = torchvision.datasets.ImageFolder(root=data_path, transform=transform)
train_set, val_set = torch.utils.data.random_split(dataset, [15743, 3934])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=32,
                                          shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=32,
                                          shuffle=True, num_workers=4)


In [5]:
class CapsuleLevel(nn.Module):
    def __init__(self, in_channels, out_channels, capsule_dimension = 8, num_capsules = 10, num_capsule_units = 6 * 6 * 32, routing = False, iterations=3):
        super(CapsuleLevel, self).__init__()
        self.routing = routing
        self.iterations = iterations
        self.num_capsules = num_capsules
        self.num_capsule_units = num_capsule_units
        if routing == True:
            self.route_weights = nn.Parameter(torch.randn(num_capsule_units, num_capsules, out_channels, in_channels))
        else:
            self.capsules = nn.ModuleList(
                [nn.Conv2d(in_channels, out_channels, kernel_size=(9, 9), stride=(2, 2), padding=0) for _ in
                 range(capsule_dimension)])

            
    # The squash function
    def squash(self, s, dim=-1):
        norm = torch.sum(s**2, dim=dim, keepdim=True)
        return norm / (1 + norm) * s / (torch.sqrt(norm) + 1e-8)

    def forward(self, x):
        if self.routing == True:          # Routing by Agreement
            batch_size = x.size(0)
            
            route_weights = torch.stack([self.route_weights] * batch_size, dim = 0) #dim:([1152, 10, 16, 8])
            x = torch.stack([x] * self.num_capsules, dim=2).unsqueeze(4) #dim:([128, 1152, 10, 8, 1])
            u_hat = torch.matmul(self.route_weights, x)  #dim:([128, 1152, 10, 16, 1])
            u_hat = u_hat.squeeze(-1)   #dim([128, 1152, 10, 16])
            temp_u_hat = u_hat.detach()
            
            #b_ij dim:([128, 1152, 10, 1])
            b_ij = Variable(torch.zeros(batch_size, self.num_capsule_units, self.num_capsules, 1).cuda()) 
            for iteration in range(self.iterations):
                c_ij = F.softmax(b_ij, dim=1)   # Equation 1 # c_ij dim:([128, 1152, 10, 1])
                s_ij = (c_ij * temp_u_hat).sum(dim=1)  # Equation 2 # s_ij dim:([128, 10, 16])
                temp_u_hat = temp_u_hat.unsqueeze(3)  # temp_u_hat dim:([128, 1152, 10, 16, 1])
                v_j = self.squash(s_ij, dim=2) # Equation 3  # v_j dim:([128, 10, 16])
                v_j_i = torch.stack([v_j] * self.num_capsule_units, dim = 1).unsqueeze(-1) # v_j_i dim:([128, 10, 16, 1])
                v_j_i = torch.matmul(temp_u_hat, v_j_i).squeeze(3)  
                temp_u_hat = temp_u_hat.squeeze(3)
                b_ij = b_ij + v_j_i # Equation 4
        else:
            v_j = [capsule(x).view(x.size(0), -1, 1) for capsule in self.capsules]   
            v_j = torch.cat(v_j, dim=-1)
            v_j = self.squash(v_j)

        return v_j

In [6]:
class CapsuleNetwork(nn.Module):  
    def __init__(self):
        super(CapsuleNetwork, self).__init__()
        self.batch_size = 32
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=256, kernel_size=(9, 9), stride=(1, 1), padding=0)
        self.primaryCaps = CapsuleLevel(in_channels=256, out_channels=32, capsule_dimension=8)
        self.digitCaps   = CapsuleLevel(in_channels=8, out_channels=16, num_capsules=2, routing=True)
        self.decoder     = nn.Sequential(
            nn.Linear(16 * 2, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 784),
            nn.Sigmoid()
        )
    def forward(self, x, y=None):
        x = F.relu(self.conv1(x), inplace=True)
        x = self.primaryCaps(x)
        x = self.digitCaps(x)
        classes = (x ** 2).sum(dim=-1) ** 0.5
        classes = F.softmax(classes, dim=-1)
        if y is None:
            # Get most active capsule
            _, max_length_indices = classes.max(dim=1)
            y = Variable(torch.eye(2)).cuda().index_select(dim=0, index=Variable(max_length_indices.data))
        reconstructions = self.decoder((x * y[:, :, None]).view(x.size(0), -1))
        return classes, reconstructions

In [7]:
class LossFunction(nn.Module):
    def __init__(self):
        super(LossFunction, self).__init__()
        self.reconstruction_loss = nn.MSELoss(size_average=False)

    def forward(self, img, target, classes, reconstructions):
        fn_1 = F.relu(0.9 - classes, inplace=True) ** 2  # Calculated for correct digit cap
        fn_2 = F.relu(classes - 0.1, inplace=True) ** 2  # Calculated for incorrect digit cap
        margin_loss = target * fn_1 + 0.5 * (1. - target) * fn_2
        margin_loss = margin_loss.sum()
        img = img.view(reconstructions.size()[0], -1)
        reconstruction_loss = self.reconstruction_loss(reconstructions, img)
        return (margin_loss + 0.0005 * reconstruction_loss) / img.size(0)

In [8]:
train_loss = []
test_loss = []
def train(train_loader, epoch):
    global model
    model.train()
    if torch.cuda.is_available():
        model = model.cuda()
    tr_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        batch_size = data.size(0)
        labels = target
        data, target = Variable(data), Variable(target)
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        optimizer.zero_grad()
        capsule_loss = LossFunction()
        labels = torch.LongTensor(labels)
        labels = torch.eye(2).index_select(dim=0, index=labels)
        labels = Variable(labels).cuda()
        optimizer.zero_grad()
        classes, reconstructions = model(data, labels)
        loss = capsule_loss(data, labels, classes, reconstructions)
        loss.backward()
        optimizer.step()
        tr_loss += loss.item()
        pred = classes.data.max(1, keepdim=True)[1]
        if (batch_idx + 1)% 500 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.item()))
    train_loss.append(tr_loss / len(train_loader))

In [9]:
def evaluate(data_loader):
    global model
    model.eval()
    loss = 0
    
    for data, target in data_loader:
        labels = target
        data, target = Variable(data, volatile=True), Variable(target)
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        labels = torch.LongTensor(labels)
        labels = torch.eye(10).index_select(dim=0, index=labels)
        labels = Variable(labels).cuda()
        classes, reconstructions = model(data)
        capsule_loss = LossFunction()
        loss += capsule_loss(data, labels, classes, reconstructions).item()
        pred = classes.data.max(1, keepdim=True)[1]
        
    loss /= len(data_loader.dataset)
    test_loss.append(loss)
    print('\nAverage Validation loss: {:.6f}\n'.format(loss))

In [10]:
summary(CapsuleNetwork().cuda(), input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 256, 20, 20]          20,992
            Conv2d-2             [-1, 32, 6, 6]         663,584
            Conv2d-3             [-1, 32, 6, 6]         663,584
            Conv2d-4             [-1, 32, 6, 6]         663,584
            Conv2d-5             [-1, 32, 6, 6]         663,584
            Conv2d-6             [-1, 32, 6, 6]         663,584
            Conv2d-7             [-1, 32, 6, 6]         663,584
            Conv2d-8             [-1, 32, 6, 6]         663,584
            Conv2d-9             [-1, 32, 6, 6]         663,584
     CapsuleLevel-10              [-1, 1152, 8]               0
     CapsuleLevel-11                [-1, 2, 16]               0
           Linear-12                  [-1, 512]          16,896
             ReLU-13                  [-1, 512]               0
           Linear-14                 [-

In [11]:
model = CapsuleNetwork()
optimizer = optim.Adam(model.parameters(), lr=0.01)
n_epochs = 15
for epoch in range(n_epochs):
    train(train_loader, epoch)  #Training the model
    evaluate(val_loader)  #evaluating the model

RuntimeError: The size of tensor a (10) must match the size of tensor b (2) at non-singleton dimension 1