In [1]:
import cv

In [2]:
cv.__dir__()

['__name__',
 '__doc__',
 '__package__',
 '__loader__',
 '__spec__',
 '__path__',
 '__file__',
 '__cached__',
 '__builtins__',
 'cnn',
 'CNN',
 'vgg',
 'VGG',
 'vgg11',
 'vgg13',
 'vgg16',
 'vgg19',
 'inception',
 'Inception',
 'inception_v3',
 'resnet',
 'ResNet',
 'resnet18',
 'resnet34',
 'resnet50',
 'resnet101',
 'resnet152',
 'resnext',
 'nn',
 'F',
 'IdentityLayers_C',
 'ResNeXt',
 'cfg',
 'resnext26',
 'resnext50',
 'resnext101',
 'capsule_network',
 'squash',
 'CapsuleNetwork',
 'CapsuleLoss']

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

# cnn

In [4]:
model = cv.cnn().cuda()
model

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
  )
  (fc): Linear(in_features=1568, out_features=10, bias=True)
)

In [5]:
transform = transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=True, download=False, transform=transform),
    batch_size=256, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=False, transform=transform),
    batch_size=256, shuffle=True)

In [6]:
optimizer = optim.Adam(model.parameters())
loss_func = nn.CrossEntropyLoss().cuda()

def train(train_loader):
    model.train()
    train_loss = 0
    for batch_idx, (x, y) in enumerate(train_loader):
        b_x = Variable(x).cuda()
        b_y = Variable(y).cuda()
        
        optimizer.zero_grad()
        outputs = model(b_x)
        
        loss = loss_func(outputs, b_y)
        train_loss += loss.data[0]

        loss.backward()
        optimizer.step()

    train_loss = train_loss / len(train_loader)
    
    return train_loss

def valid(test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    for x, y in test_loader:
        b_x = Variable(x, volatile=True).cuda()
        b_y = Variable(y, volatile=True).cuda()
        
        outputs = model(b_x)
        loss = loss_func(outputs, b_y)
        test_loss += loss.data[0]
        
        pred = outputs.data.max(1, keepdim=True)[1]
        correct += pred.eq(b_y.data.view_as(pred)).cpu().sum()
        total += b_y.size(0)

    val_loss = test_loss / len(test_loader)
    val_acc = correct / total
    
    return val_loss, val_acc

In [7]:
epochs = 10
loss_list = []
val_loss_list = []
val_acc_list = []
for epoch in range(epochs):
    loss = train(train_loader)
    val_loss, val_acc = valid(test_loader)

    print('epoch %d, loss: %.4f val_loss: %.4f val_acc: %.4f'
          % (epoch, loss, val_loss, val_acc))
    
    # logging
    loss_list.append(loss)
    val_loss_list.append(val_loss)
    val_acc_list.append(val_acc)

epoch 0, loss: 0.4013 val_loss: 0.1018 val_acc: 0.9714
epoch 1, loss: 0.0927 val_loss: 0.0659 val_acc: 0.9797
epoch 2, loss: 0.0653 val_loss: 0.0464 val_acc: 0.9846
epoch 3, loss: 0.0533 val_loss: 0.0400 val_acc: 0.9868
epoch 4, loss: 0.0443 val_loss: 0.0406 val_acc: 0.9858
epoch 5, loss: 0.0377 val_loss: 0.0376 val_acc: 0.9876
epoch 6, loss: 0.0338 val_loss: 0.0339 val_acc: 0.9889
epoch 7, loss: 0.0312 val_loss: 0.0321 val_acc: 0.9895
epoch 8, loss: 0.0267 val_loss: 0.0317 val_acc: 0.9900
epoch 9, loss: 0.0246 val_loss: 0.0387 val_acc: 0.9869


# vgg

In [4]:
model = cv.vgg11(in_channels=1, num_classes=10).cuda()
model

VGG(
  (convs): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace)
    (15): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (16): Conv2d(5

In [4]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

transform=transforms.Compose([
   transforms.Resize(256),
   transforms.CenterCrop(224),
   transforms.ToTensor(),
   normalize
])


train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=True, download=False, transform=transform),
    batch_size=64, num_workers=2, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=False, download=False, transform=transform),
    batch_size=64, num_workers=2, shuffle=True)

In [6]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

def train(train_loader):
    model.train()
    train_loss = 0
    for batch_idx, (x, y) in enumerate(train_loader):
        b_x = Variable(x).cuda()
        b_y = Variable(y).cuda()
        
        optimizer.zero_grad()
        outputs = model(b_x)
        
        loss = loss_func(outputs, b_y)
        train_loss += loss.data[0]

        loss.backward()
        optimizer.step()

    train_loss = train_loss / len(train_loader)
    
    return train_loss

def valid(test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    for x, y in test_loader:
        b_x = Variable(x, volatile=True).cuda()
        b_y = Variable(y, volatile=True).cuda()
        
        outputs = model(b_x)
        loss = loss_func(outputs, b_y)
        test_loss += loss.data[0]
        
        pred = outputs.data.max(1, keepdim=True)[1]
        correct += pred.eq(b_y.data.view_as(pred)).cpu().sum()
        total += b_y.size(0)

    val_loss = test_loss / len(test_loader)
    val_acc = correct / total
    
    return val_loss, val_acc

In [7]:
epochs = 1
loss_list = []
val_loss_list = []
val_acc_list = []
for epoch in range(epochs):
    loss = train(train_loader)
    val_loss, val_acc = valid(test_loader)

    print('epoch %d, loss: %.4f val_loss: %.4f val_acc: %.4f'
          % (epoch, loss, val_loss, val_acc))
    
    # logging
    loss_list.append(loss)
    val_loss_list.append(val_loss)
    val_acc_list.append(val_acc)

Process Process-1:
Process Process-2:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/lapis/.pyenv/versions/anaconda3-4.4.0/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/lapis/.pyenv/versions/anaconda3-4.4.0/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/lapis/.pyenv/versions/anaconda3-4.4.0/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/lapis/.pyenv/versions/anaconda3-4.4.0/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/lapis/.pyenv/versions/anaconda3-4.4.0/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = index_queue.get()
  File "/home/lapis/.pyenv/versions/anaconda3-4.4.0/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = 

KeyboardInterrupt: 

In [8]:
x, y = iter(train_loader).next()
b_x = Variable(x).cuda()
b_y = Variable(y).cuda()

In [10]:
model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.3045
[torch.cuda.FloatTensor of size 1 (GPU 0)]

In [5]:
x, y = iter(train_loader).next()
b_x = Variable(x)
b_y = Variable(y)

In [9]:
model = cv.vgg11(in_channels=1, num_classes=10, batch_norm=True)
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.3037
[torch.FloatTensor of size 1]

In [10]:
model = cv.vgg13(in_channels=1, num_classes=10)
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.3027
[torch.FloatTensor of size 1]

In [11]:
model = cv.vgg13(in_channels=1, num_classes=10, batch_norm=True)
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.3074
[torch.FloatTensor of size 1]

In [7]:
model = cv.vgg16(in_channels=1, num_classes=10, batch_norm=True)
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.3056
[torch.FloatTensor of size 1]

In [8]:
model = cv.vgg16(in_channels=1, num_classes=10)
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.3007
[torch.FloatTensor of size 1]

In [6]:
model = cv.vgg19(in_channels=1, num_classes=10)
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.3029
[torch.FloatTensor of size 1]

In [7]:
model = cv.vgg19(in_channels=1, num_classes=10, batch_norm=True)
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.3076
[torch.FloatTensor of size 1]

# inception

In [4]:
model = cv.inception_v3(in_channels=1, num_classes=10).cuda()
model

Inception(
  (base): BASE(
    (conv2d_bn_1): BNConv2d(
      (conv): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True)
    )
    (conv2d_bn_2): BNConv2d(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True)
    )
    (conv2d_bn_3): BNConv2d(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True)
    )
    (conv2d_bn_4): BNConv2d(
      (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True)
    )
    (conv2d_bn_5): BNConv2d(
      (conv): Conv2d(80, 192, kernel_size=(3, 3), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True)
    )
  )
  (mixed0): NIN_A(
    (branch1x1): BNConv2d(
      (conv): Conv2d(

In [5]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

transform=transforms.Compose([
   transforms.RandomResizedCrop(max((224, 224))),
   transforms.RandomHorizontalFlip(),
   transforms.ToTensor(),
   normalize
])


train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=True, download=False, transform=transform),
    batch_size=64, num_workers=2, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=False, download=False, transform=transform),
    batch_size=64, num_workers=2, shuffle=True)

In [6]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
loss_func.cuda()

x, y = iter(train_loader).next()
b_x = Variable(x).cuda()
b_y = Variable(y).cuda()
model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 3.9429
[torch.cuda.FloatTensor of size 1 (GPU 0)]

# resnet

In [4]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

transform=transforms.Compose([
   transforms.RandomResizedCrop(max((224, 224))),
   transforms.RandomHorizontalFlip(),
   transforms.ToTensor(),
   normalize
])


train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=True, download=False, transform=transform),
    batch_size=64, num_workers=2, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=False, download=False, transform=transform),
    batch_size=64, num_workers=2, shuffle=True)

In [5]:
model = cv.resnet18(in_channels=1, num_classes=10)
model

ResNet(
  (conv1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU(inplace)
  )
  (conv2): Sequential(
    (0): IdentityLayers(
      (conv): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU(inplace)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): IdentityLayers(
      (conv): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU(inplace)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e

In [6]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
loss_func

x, y = iter(train_loader).next()
b_x = Variable(x)
b_y = Variable(y)
model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.3934
[torch.FloatTensor of size 1]

In [8]:
model = cv.resnet34(in_channels=1, num_classes=10)
model

ResNet(
  (conv1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU(inplace)
  )
  (conv2): Sequential(
    (0): IdentityLayers(
      (conv): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU(inplace)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): IdentityLayers(
      (conv): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU(inplace)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e

In [9]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
loss_func

x, y = iter(train_loader).next()
b_x = Variable(x)
b_y = Variable(y)
model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.4719
[torch.FloatTensor of size 1]

In [10]:
model = cv.resnet50(in_channels=1, num_classes=10)
model

ResNet(
  (conv1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU(inplace)
  )
  (conv2): Sequential(
    (0): IdentityLayers(
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
      (conv): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU(inplace)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (5): ReLU(inplace)
        (6): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): IdentityLayers(


In [11]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
loss_func

x, y = iter(train_loader).next()
b_x = Variable(x)
b_y = Variable(y)
model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.5462
[torch.FloatTensor of size 1]

In [12]:
model = cv.resnet101(in_channels=1, num_classes=10)
model

ResNet(
  (conv1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU(inplace)
  )
  (conv2): Sequential(
    (0): IdentityLayers(
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
      (conv): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU(inplace)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (5): ReLU(inplace)
        (6): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): IdentityLayers(


In [13]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
loss_func

x, y = iter(train_loader).next()
b_x = Variable(x)
b_y = Variable(y)
model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.5142
[torch.FloatTensor of size 1]

In [14]:
model = cv.resnet152(in_channels=1, num_classes=10)
model

ResNet(
  (conv1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU(inplace)
  )
  (conv2): Sequential(
    (0): IdentityLayers(
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
      (conv): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU(inplace)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (5): ReLU(inplace)
        (6): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): IdentityLayers(


In [15]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
loss_func

x, y = iter(train_loader).next()
b_x = Variable(x)
b_y = Variable(y)
model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

Variable containing:
 2.4110
[torch.FloatTensor of size 1]

# resnext

In [9]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])


train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../cv/data', train=True, download=False,
                   transform=transforms.Compose([
                       transforms.RandomResizedCrop(max((224, 224))),
                       transforms.RandomHorizontalFlip(),
                       transforms.ToTensor(),
                       normalize
                   ])),
    batch_size=65, num_workers=2, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../cv/data', train=False,
                   transform=transforms.Compose([
                       transforms.Resize(int(max((224, 224))/224*256)),
                       transforms.CenterCrop(max((224, 224))),
                       transforms.ToTensor(),
                       normalize
                   ])),
    batch_size=65, num_workers=2, shuffle=True)

In [4]:
model = cv.resnext50(cardinality=32, base_width=4)
model

ResNeXt(
  (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (residual1): Sequential(
    (0): IdentityLayers_C(
      (conv): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU(inplace)
        (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
        (5): ReLU(inplace)
        (6): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): IdentityLayers_C(
      (conv): Sequential

In [11]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()
loss_func

x, y = iter(train_loader).next()
b_x = Variable(x)
b_y = Variable(y)
model.train()
optimizer.zero_grad()
outputs = model(b_x)

loss = loss_func(outputs, b_y)
loss

KeyboardInterrupt: 

In [6]:
model = cv.resnext26(cardinality=8, base_width=16)
model

ResNeXt(
  (conv): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (residual1): Sequential(
    (0): IdentityLayers_C(
      (conv): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
        (2): ReLU(inplace)
        (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
        (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
        (5): ReLU(inplace)
        (6): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (1): IdentityLayers_C(
      (conv): Sequential(

# capsule net

In [4]:
model = cv.capsule_network(use_gpu=True).cuda()
model

CapsuleNetwork(
  (conv): Conv2d(1, 256, kernel_size=(9, 9), stride=(1, 1))
  (primary_caps): PrimaryCaps(
    (capsules): ModuleList(
      (0): Conv2d(256, 32, kernel_size=(9, 9), stride=(2, 2))
      (1): Conv2d(256, 32, kernel_size=(9, 9), stride=(2, 2))
      (2): Conv2d(256, 32, kernel_size=(9, 9), stride=(2, 2))
      (3): Conv2d(256, 32, kernel_size=(9, 9), stride=(2, 2))
      (4): Conv2d(256, 32, kernel_size=(9, 9), stride=(2, 2))
      (5): Conv2d(256, 32, kernel_size=(9, 9), stride=(2, 2))
      (6): Conv2d(256, 32, kernel_size=(9, 9), stride=(2, 2))
      (7): Conv2d(256, 32, kernel_size=(9, 9), stride=(2, 2))
    )
  )
  (disit_caps): DigitCaps(
  )
  (decoder): Sequential(
    (0): Linear(in_features=160, out_features=512, bias=True)
    (1): ReLU(inplace)
    (2): Linear(in_features=512, out_features=1024, bias=True)
    (3): ReLU(inplace)
    (4): Linear(in_features=1024, out_features=784, bias=True)
    (5): Sigmoid()
  )
)

In [5]:
transform = transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=True, download=False, transform=transform),
    batch_size=256, num_workers=2, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../CV/data', train=False, transform=transform),
    batch_size=256, num_workers=2, shuffle=True)

In [8]:
import numpy as np
optimizer = optim.Adam(model.parameters())
loss_func = cv.CapsuleLoss().cuda()

def train(train_loader):
    model.train()
    train_loss = 0
    for batch_idx, (x, y) in enumerate(train_loader):
        b_x = Variable(x).cuda()
        b_y = Variable(torch.eye(10).index_select(dim=0, index=y)).cuda()
        
        
        optimizer.zero_grad()
        outputs, reconstr = model(b_x, b_y)
        
        loss = loss_func(b_x, b_y, outputs, reconstr)
        train_loss += loss.data[0]

        loss.backward()
        optimizer.step()
        
        
        if batch_idx % 100 == 0:
            print("train accuracy:", sum(np.argmax(outputs.data.cpu().numpy(), 1) == 
                                   np.argmax(b_y.data.cpu().numpy(), 1)) / 256.)
        

    train_loss = train_loss / len(train_loader)
    return train_loss

def valid(test_loader):
    model.eval()
    test_loss = 0
    for batch_idx, (x, y) in enumerate(test_loader):
        b_x = Variable(x, volatile=True).cuda()
        b_y = Variable(torch.eye(10).index_select(dim=0, index=y), volatile=True).cuda()
        
        outputs, reconstr = model(b_x)
        loss = loss_func(b_x, b_y, outputs, reconstr)
        test_loss += loss.data[0]
        
        if batch_idx % 100 == 0:
            print("test accuracy:", sum(np.argmax(outputs.data.cpu().numpy(), 1) == 
                                   np.argmax(b_y.data.cpu().numpy(), 1)) / 256.)

    val_loss = test_loss / len(test_loader)
    return val_loss

In [9]:
epochs = 3
loss_list = []
val_loss_list = []
for epoch in range(epochs):
    loss = train(train_loader)
    val_loss = valid(test_loader)

    print('epoch %d, loss: %.4f val_loss: %.4f'
          % (epoch, loss, val_loss))
    
    # logging
    loss_list.append(loss)
    val_loss_list.append(val_loss)

train accuracy: 0.28125
train accuracy: 0.921875
train accuracy: 0.9609375
test accuracy: 0.953125
epoch 0, loss: 0.8429 val_loss: 0.7816
train accuracy: 0.94921875
train accuracy: 0.9765625
train accuracy: 0.95703125
test accuracy: 0.9921875
epoch 1, loss: 0.7654 val_loss: 0.7554
train accuracy: 0.9609375
train accuracy: 0.9765625
train accuracy: 0.97265625
test accuracy: 0.96875
epoch 2, loss: 0.7480 val_loss: 0.7424
