### import packages

In [31]:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
import os
import time
import copy

### data preprocess

In [2]:
# 数据预处理，做一些scale，随机crop，normalizaiotn到0-1之间
img_transform = {
    'train': transforms.Compose([
            transforms.Scale(300),
            transforms.RandomCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(0, 255)
        ]),
    'val': transforms.Compose([
            transforms.Scale(300),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(0, 255)
        ])
}

In [3]:
root_path = '../data'

In [4]:
#读取数据文件夹
dset = {
    x: ImageFolder(os.path.join(root_path, x), transform=img_transform[x])
    for x in ['train', 'val']
}

In [5]:
#读取数据
dataloader = {
    'train': DataLoader(dset['train'], batch_size=32, shuffle=True, num_workers=4),
    'val': DataLoader(dset['val'], batch_size=32, num_workers=4)
}

In [6]:
#确定数据size
data_size = {
    x: len(dataloader[x])
    for x in ['train', 'val']
}

In [7]:
print('size of train data set: {}'.format(data_size['train']))# 每个文件夹放了一张图片做一个示例
print('size of validation data set: {}'.format(data_size['val']))

size of train data set: 1
size of validation data set: 1


In [8]:
img_classes = dataloader['train'].dataset.classes

In [9]:
# 创建了三个省份作为示例，顺序按照文件夹的顺序
print('class of province: {}'.format(img_classes))

class of province: ['四川', '安徽', '浙江']


In [10]:
#判断能不能用cuda，增加鲁棒性
use_gpu = torch.cuda.is_available()

In [11]:
print("in my computer, cuda availabel? \n{}".format(use_gpu))

in my computer, cuda availabel? 
True


## build vgg net
之所以选择vgg因为这个问题不需要太复杂的网络，所以选择了一个相对简单的网络结构  
详细网络结构见下面网址
http://ethereon.github.io/netscope/#/gist/dc5003de6943ea5a6b8b

In [20]:
class vgg16(nn.Module):
    def __init__(self, in_c, out_class):
        super(vgg16, self).__init__()
        self.conv1 = nn.Sequential(
                nn.Conv2d(in_c, 64, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(64, 64, 3, stride=1, padding=1),
                nn.ReLU()
            )
        self.conv2 = nn.Sequential(
                nn.Conv2d(64, 128, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(128, 128, 3, stride=1, padding=1),
                nn.ReLU()
            )
        self.conv3 = nn.Sequential(
                nn.Conv2d(128, 256, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(256, 256, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(256, 256, 3, stride=1, padding=1),
                nn.ReLU()
            )
        self.conv4 = nn.Sequential(
                nn.Conv2d(256, 512, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU()
            )
        self.conv5 = nn.Sequential(
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU()
            )
        self.fc1 = nn.Sequential(
                nn.Linear(512 * 7 * 7, 4096),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(4096, 1000),
                nn.ReLU(),
                nn.Dropout(0.5)
            )
        self.fc2 = nn.Sequential(
                nn.Linear(1000, 500),
                nn.ReLU(),
                nn.Dropout(0.5)
            )
        self.fc3 = nn.Linear(500, out_class)
        
    def forward(self, x):
        out = self.conv1(x)
        out = F.max_pool2d(out, 2, stride=2)
        out = self.conv2(out)
        out = F.max_pool2d(out, 2, stride=2)
        out = self.conv3(out)
        out = F.max_pool2d(out, 2, stride=2)
        out = self.conv4(out)
        out = F.max_pool2d(out, 2, stride=2)
        out = self.conv5(out)
        out = F.max_pool2d(out, 2, stride=2)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [21]:
mynet = vgg16(1, 6)

In [22]:
if use_gpu:
    mynet = mynet.cuda()

In [25]:
print('network structure:')
mynet

network structure:


vgg16 (
  (conv1): Sequential (
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU ()
  )
  (conv2): Sequential (
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU ()
  )
  (conv3): Sequential (
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU ()
    (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU ()
  )
  (conv4): Sequential (
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU ()
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU ()
    (4): Conv2d(512, 512, kernel_size=(3, 3)

In [27]:
# define optimizer and loss
optimizer = optim.SGD(mynet.parameters(), lr=1e-3, momentum=0.9) # 随机梯度下降，之后可以选择别的速度更快的如rmsprop
criterion = nn.CrossEntropyLoss()

### begin train

In [28]:
epoches = 100

In [32]:
def train_model(model, criterion, optimizer, num_epoch=epoches):
    since = time.time() # 取得当前时间
    
    best_model = model
    best_acc = 0.0
        
    for epoch in range(num_epoch): # 开始每个epoch
        print('{}/{}'.format(epoch+1, num_epoch))
        print('-'*10)
    
        for phase in ['train', 'val']: # 判断是train还是validation
            if phase == 'train':
                optimizer.zero_grad() # 将梯度归零
            
            running_loss = 0.0
            running_acc = 0.0
            for data in dataloader[phase]:
                img, label = data
                if use_gpu:
                    img = Variable(img).cuda()
                    label = Variable(label).cuda()
                else:
                    img = Variable(img)
                    label = Variable(label)
                
                # forward
                output = model(img)
                _, pred = torch.max(output, 1)
                loss = criterion(output, label)
                # backward
                if phase == 'train': # 如果是train，则反向传播更新参数
                    loss.backward()
                    optimizer.step()
                # log statistics
                running_loss += loss.data[0] * label.size(0)
                num_correct = torch.sum(pred == label)
                running_acc += num_correct.data[0]

            running_loss /= datasize[phase]
            running_acc /= datasize[phase]
            print('{} Loss:{:.4f} Acc:{:.4f}'.format(phase, running_loss, running_acc))
            if phase == 'val' and running_acc > best_acc: # 根据validation 判断更新之后的model是否更好
                best_acc = running_acc
                best_model = copy.deepcopy(model)

        print()
    time_eplise = time.time() - since
    print('Traing comlete in {:.0f}m{:.0f}s'.format(time_eplise//60, time_eplise%60))
    print('Best Acc:{:.4f}'.format(best_acc))
    return best_model