In [1]:
import torch
x = torch.randn(3, requires_grad=True)
y = x * 16
# for i in range(3):
# while y.data.norm() < 1000:
#     y = y * 2

print x,y
gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(gradients)

print gradients
print x.grad

tensor([ 0.7338, -0.1399, -0.7515]) tensor([ 11.7402,  -2.2390, -12.0240])
tensor([ 1.0000e-01,  1.0000e+00,  1.0000e-04])
tensor([  1.6000,  16.0000,   0.0016])


In [1]:
# prepare training/testing data
import torch
from torchvision import datasets, transforms
from os.path import join
mnist_path = join('./','data','mnist')
batch_size = 128
# batch_size = 2
train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(mnist_path, train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(mnist_path, train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)



In [2]:
# construct neuron network
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        ker_size = 3
        padding = ker_size-1


#         self.conv_lays = []
#         in_ch = 1
#         for i in range(3):
#             out_ch = in_ch *2
#             conv_lay = nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=3, stride=2, padding=padding)
#             in_ch = out_ch
            
        
        out_ch = 16
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=out_ch, kernel_size=3, stride=2, padding=padding)
        
        out_ch = out_ch*2
        self.conv2 = nn.Conv2d(in_channels=out_ch/2, out_channels=out_ch, kernel_size=3, stride=2, padding=padding)
        
        out_ch = out_ch*2
        self.conv3 = nn.Conv2d(in_channels=out_ch/2, out_channels=out_ch, kernel_size=3, stride=2, padding=padding)
        
            
        self.conv4 = nn.Conv2d(in_channels=out_ch, out_channels=1, kernel_size=3, stride=1, padding=0)
        
#         self.fc = nn.Linear(out_ch*6*6, 10)
#         Fully convolution layer

    def forward(self, x):
        # Max pooling over a (2, 2) window
#         for conv_lay in self.conv_lays:
#             x = conv_lay(x)
#             x = F.relu(x)

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        rest_dim = self.num_flat_features(x)
        x = x.view(-1, rest_dim)
#         x = nn.AlphaDropout(0.5)(x)
#         x = self.fc(x)
#         x = F.softmax(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for dim in size:
            num_features *= dim
        return num_features
net = Net()
print net

Net(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
  (conv4): Conv2d(64, 1, kernel_size=(3, 3), stride=(1, 1))
)


In [3]:
# loss function
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.001)



In [4]:
# training setting

with_gpu = torch.cuda.is_available()
device = torch.device("cuda:0" if with_gpu else "cpu")
model = net.to(device)

def acc(net, data):
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    with torch.no_grad():
        outputs = net(inputs)
        max_vals, max_indices = torch.max(outputs,1)
        acc = (max_indices == labels).sum().cpu().data.numpy()/ float(max_indices.size()[0])
    return acc



epoch_num = 100
for epoch in range(epoch_num):
    running_loss = 0.0
    i = 1
    for data, test_data in zip(train_loader, train_loader):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
#         print inputs.shape

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_acc = acc(net, data)
        test_acc = acc(net, test_data)
        
        # print statistics
        running_loss += loss.item()
        if i % 500 == 0:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f train_acc: %.3f test_acc: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000, train_acc, test_acc))
            running_loss = 0.0
        i+=1



# environment setting




[1,    11] loss: 0.013 train_acc: 0.227 test_acc: 0.211
[1,    21] loss: 0.011 train_acc: 0.289 test_acc: 0.281
[1,    31] loss: 0.009 train_acc: 0.367 test_acc: 0.383
[1,    41] loss: 0.009 train_acc: 0.391 test_acc: 0.398
[1,    51] loss: 0.008 train_acc: 0.523 test_acc: 0.555
[1,    61] loss: 0.008 train_acc: 0.617 test_acc: 0.531
[1,    71] loss: 0.008 train_acc: 0.523 test_acc: 0.539
[1,    81] loss: 0.007 train_acc: 0.555 test_acc: 0.453
[1,    91] loss: 0.007 train_acc: 0.562 test_acc: 0.531
[1,   101] loss: 0.007 train_acc: 0.531 test_acc: 0.477
[1,   111] loss: 0.007 train_acc: 0.469 test_acc: 0.633
[1,   121] loss: 0.007 train_acc: 0.602 test_acc: 0.594
[1,   131] loss: 0.007 train_acc: 0.555 test_acc: 0.508
[1,   141] loss: 0.007 train_acc: 0.609 test_acc: 0.609
[1,   151] loss: 0.007 train_acc: 0.477 test_acc: 0.594
[1,   161] loss: 0.007 train_acc: 0.555 test_acc: 0.609
[1,   171] loss: 0.007 train_acc: 0.516 test_acc: 0.484
[1,   181] loss: 0.007 train_acc: 0.539 test_acc

KeyboardInterrupt: 