In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
# use cuda or not
root = './data'
download = './data'
use_cuda = torch.cuda.is_available()
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
train_set = dset.MNIST(root=root, train=True, transform=trans, download=download)
test_set = dset.MNIST(root=root, train=False, transform=trans)
batch_size = 128
train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=False)

In [2]:
class MLPNet(nn.Module):
    def __init__(self):
        super(MLPNet, self).__init__()
        self.fc1 = nn.Linear(28*28, 500)
        self.fc2 = nn.Linear(500, 256)
        self.fc3 = nn.Linear(256, 10)
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [3]:
# With square kernels and equal stride
filters = Variable(torch.randn(8,4,3,3))
inputs = Variable(torch.randn(1,4,5,5))
F.conv2d(inputs, filters, padding=1)

tensor([[[[  2.6334,   3.0732,   3.5752,  -2.8558,  -0.6678],
          [ -6.6182,   2.8962,   2.3551,  -1.1548,  10.1742],
          [  3.7924,  -5.0099,  -1.5768,  -3.5562,  -4.3213],
          [  1.7063,  -7.2806,  -5.5358,  -3.2389,  -0.7981],
          [  0.7166,  -0.8292,   1.8475,   0.1187,  -4.3306]],

         [[  9.7406,  10.6250,  -3.2297,   5.2186,  -1.4527],
          [  2.8501,   0.2087,   6.6916,   8.8027,  -9.1573],
          [ -3.0890,  -0.4009,   6.0380,   0.1441,   1.6997],
          [ -7.2276,   2.5110,  -1.1458,  -0.5283,   2.8640],
          [  2.5072,   2.2631,   0.7670,  -3.1104,   0.8933]],

         [[ -0.1179,   1.1403,   5.9147,   7.7415,   2.2282],
          [ -0.7132,  -8.7781,  -1.0048, -10.1007,  -0.5111],
          [  2.3851,  -2.3365,  -0.6726,  -1.4656,  -2.8438],
          [  3.5161,   4.7662,  -0.4206,   6.2132,  -3.2529],
          [ -4.1244,   0.8008,  -0.2242,   6.7110,   2.4739]],

         [[  0.0834,   8.8400,   4.5404,   9.8741,  -1.5970],
  

In [4]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [7]:
## training
model = LeNet()
if use_cuda:
    model = model.cuda()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
ceriation = nn.CrossEntropyLoss()
for epoch in range(10):
    # trainning
    ave_loss = 0
    for batch_idx, (x, target) in enumerate(train_loader):
        optimizer.zero_grad()
        if use_cuda:
            x, target = x.cuda(), target.cuda()
        x, target = Variable(x), Variable(target)
        out = model(x)
        loss = ceriation(out, target)
        ave_loss = ave_loss * 0.9 + loss.item() * 0.1
        loss.backward()
        optimizer.step()
        if (batch_idx+1) % 100 == 0 or (batch_idx+1) == len(train_loader):
            print('==>>> epoch: {}, batch index: {}, train loss: {:.6f}'.format(
                epoch, batch_idx+1, ave_loss))
    # testing
    correct_cnt, ave_loss = 0, 0
    total_cnt = 0
    for batch_idx, (x, target) in enumerate(test_loader):
        if use_cuda:
            x, targe = x.cuda(), target.cuda()
        x, target = Variable(x, volatile=True), Variable(target, volatile=True)
        out = model(x).cuda()
        loss = ceriation(out, target)
        _, pred_label = torch.max(out.data, 1)
        total_cnt += x.data.size()[0]
        correct_cnt += (pred_label == target.data).sum()
        # smooth average
        ave_loss = ave_loss * 0.9 + loss.item() * 0.1
        
        if(batch_idx+1) % 100 == 0 or (batch_idx+1) == len(test_loader):
            print('==>>> epoch: {}, batch index: {}, test loss: {:.6f}, acc: {:.3f}'.format(
                epoch, batch_idx+1, ave_loss, correct_cnt * 1.0 / total_cnt))

==>>> epoch: 0, batch index: 100, train loss: 0.616393
==>>> epoch: 0, batch index: 200, train loss: 0.250498
==>>> epoch: 0, batch index: 300, train loss: 0.181325
==>>> epoch: 0, batch index: 400, train loss: 0.141158
==>>> epoch: 0, batch index: 469, train loss: 0.121895




RuntimeError: Expected object of backend CUDA but got backend CPU for argument #2 'target'

In [None]:
torch.save(model.state_dict(), './')   #保存网络参数
the_model = TheModelClass(*args, **kwargs)
the_model.load_state_dict(torch.load('./'))  #读取网络参数