In [1]:
import torch
x = torch.randn(3, requires_grad=True)
y = x * 16
# for i in range(3):
# while y.data.norm() < 1000:
#     y = y * 2

print x,y
gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(gradients)

print gradients
print x.grad

tensor([ 0.7338, -0.1399, -0.7515]) tensor([ 11.7402,  -2.2390, -12.0240])
tensor([ 1.0000e-01,  1.0000e+00,  1.0000e-04])
tensor([  1.6000,  16.0000,   0.0016])


In [1]:
# prepare training/testing data
import torch
from torchvision import datasets, transforms
from os.path import join
mnist_path = join('./','data','mnist')
batch_size = 128

train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(mnist_path, train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(mnist_path, train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)



In [2]:
# construct neuron network
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        ker_size = 3
        padding = ker_size-1


#         self.conv_lays = []
#         in_ch = 1
#         for i in range(3):
#             out_ch = in_ch *2
#             conv_lay = nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=3, stride=2, padding=padding)
#             in_ch = out_ch
            
        
        out_ch = 16
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=out_ch, kernel_size=3, stride=2, padding=padding)
        
        out_ch = out_ch*2
        self.conv2 = nn.Conv2d(in_channels=out_ch/2, out_channels=out_ch, kernel_size=3, stride=2, padding=padding)
        
        out_ch = out_ch*2
        self.conv3 = nn.Conv2d(in_channels=out_ch/2, out_channels=out_ch, kernel_size=3, stride=2, padding=padding)
            
        
        self.fc = nn.Linear(out_ch*6*6, 10)
#         Fully convolution layer

    def forward(self, x):
        # Max pooling over a (2, 2) window
#         for conv_lay in self.conv_lays:
#             x = conv_lay(x)
#             x = F.relu(x)

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        rest_dim = self.num_flat_features(x)
        x = x.view(-1, rest_dim)
        x = self.fc(x)
#         x = F.softmax(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for dim in size:
            num_features *= dim
        return num_features
net = Net()
print net

Net(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(2, 2))
  (fc): Linear(in_features=2304, out_features=10, bias=True)
)


In [3]:
# loss function
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.01)



In [5]:
# training setting
with_gpu = torch.cuda.is_available()
device = torch.device("cuda:0" if with_gpu else "cpu")
model = Net().to(device)

epoch_num = 100
for epoch in range(epoch_num):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
#         print inputs.shape

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 1 == 0:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0



# environment setting




[1,     1] loss: 0.001
[1,     2] loss: 0.001
[1,     3] loss: 0.001
[1,     4] loss: 0.001
[1,     5] loss: 0.000
[1,     6] loss: 0.000
[1,     7] loss: 0.000
[1,     8] loss: 0.001
[1,     9] loss: 0.001
[1,    10] loss: 0.000
[1,    11] loss: 0.000
[1,    12] loss: 0.000
[1,    13] loss: 0.000
[1,    14] loss: 0.000
[1,    15] loss: 0.000
[1,    16] loss: 0.000
[1,    17] loss: 0.000
[1,    18] loss: 0.000
[1,    19] loss: 0.000
[1,    20] loss: 0.000
[1,    21] loss: 0.000
[1,    22] loss: 0.000
[1,    23] loss: 0.000
[1,    24] loss: 0.000
[1,    25] loss: 0.000
[1,    26] loss: 0.000
[1,    27] loss: 0.000
[1,    28] loss: 0.000
[1,    29] loss: 0.000
[1,    30] loss: 0.000
[1,    31] loss: 0.000
[1,    32] loss: 0.000
[1,    33] loss: 0.000
[1,    34] loss: 0.000
[1,    35] loss: 0.000
[1,    36] loss: 0.000
[1,    37] loss: 0.000
[1,    38] loss: 0.000
[1,    39] loss: 0.000
[1,    40] loss: 0.000
[1,    41] loss: 0.000
[1,    42] loss: 0.000
[1,    43] loss: 0.000
[1,    44] 

KeyboardInterrupt: 

In [15]:
from torchvision import datasets, transforms
from os.path import join
mnist_path = join('./','data','mnist')
batch_size = 128

train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(mnist_path, train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(mnist_path, train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)


In [None]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [4]:
a = [1,2,3,4]
for i,b in enumerate(a,0):
    print i,b

0 1
1 2
2 3
3 4
