### import packages

In [1]:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
import os
import time
import tqdm

### data preprocess

In [2]:
# 数据预处理，做一些scale，随机crop，normalizaiotn到0-1之间
img_transform = {
    'train': transforms.Compose([
            transforms.Scale(300),
            transforms.RandomCrop(224),
            transforms.ToTensor()
#             transforms.Normalize((0, 0, 0), (255, 255, 255))
        ]),
    'val': transforms.Compose([
            transforms.Scale(300),
            transforms.CenterCrop(224),
            transforms.ToTensor()
#             transforms.Normalize((0, 0, 0), (255, 255, 255))
        ])
}

In [3]:
root_path = '../data'

In [4]:
#读取数据文件夹
dset = {
    x: ImageFolder(os.path.join(root_path, x), transform=img_transform[x])
    for x in ['train', 'val']
}

In [5]:
#读取数据
dataloader = {
    'train': DataLoader(dset['train'], batch_size=16, shuffle=True, num_workers=4),
    'val': DataLoader(dset['val'], batch_size=16, num_workers=4)
}

In [6]:
#确定数据size
data_size = {
    x: len(dataloader[x].dataset.imgs)
    for x in ['train', 'val']
}

In [7]:
print('size of train data set: {}'.format(data_size['train']))# 每个文件夹放了一张图片做一个示例
print('size of validation data set: {}'.format(data_size['val']))

size of train data set: 2121
size of validation data set: 400


In [8]:
img_classes = dataloader['train'].dataset.classes

In [9]:
# 创建了三个省份作为示例，顺序按照文件夹的顺序
print('class of province: {}'.format(img_classes))

class of province: ['cat', 'dog']


In [10]:
#判断能不能用cuda，增加鲁棒性
use_gpu = torch.cuda.is_available()

In [11]:
print("in my computer, cuda availabel? \n{}".format(use_gpu))

in my computer, cuda availabel? 
True


## build vgg net
之所以选择vgg因为这个问题不需要太复杂的网络，所以选择了一个相对简单的网络结构  
详细网络结构见下面网址
http://ethereon.github.io/netscope/#/gist/dc5003de6943ea5a6b8b

In [12]:
class vgg16(nn.Module):
    def __init__(self, in_c, out_class):
        super(vgg16, self).__init__()
        self.conv1 = nn.Sequential(
                nn.Conv2d(in_c, 64, 3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(64, 64, 3, stride=1, padding=1),
                nn.ReLU(True)
            )
        self.conv2 = nn.Sequential(
                nn.Conv2d(64, 128, 3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(128, 128, 3, stride=1, padding=1),
                nn.ReLU(True)
            )
        self.conv3 = nn.Sequential(
                nn.Conv2d(128, 256, 3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(256, 256, 3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(256, 256, 3, stride=1, padding=1),
                nn.ReLU(True)
            )
        self.conv4 = nn.Sequential(
                nn.Conv2d(256, 512, 3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU(True)
            )
        self.conv5 = nn.Sequential(
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(512, 512, 3, stride=1, padding=1),
                nn.ReLU(True)
            )
        self.fc1 = nn.Sequential(
                nn.Linear(512 * 7 * 7, 4096),
                nn.ReLU(True),
                nn.Dropout(0.5),
                nn.Linear(4096, 1000),
                nn.ReLU(True),
                nn.Dropout(0.5)
            )
        self.fc2 = nn.Sequential(
                nn.Linear(1000, 500),
                nn.ReLU(True),
                nn.Dropout(0.5)
            )
        self.fc3 = nn.Linear(500, out_class)
        
    def forward(self, x):
        out = self.conv1(x)
        out = F.max_pool2d(out, 2, stride=2)
        out = self.conv2(out)
        out = F.max_pool2d(out, 2, stride=2)
        out = self.conv3(out)
        out = F.max_pool2d(out, 2, stride=2)
        out = self.conv4(out)
        out = F.max_pool2d(out, 2, stride=2)
        out = self.conv5(out)
        out = F.max_pool2d(out, 2, stride=2)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [13]:
# mynet = vgg16(3, 2)
mynet = torchvision.models.vgg16()

In [14]:
mynet.classifier = nn.Sequential(nn.Linear(25088, 4096), 
                                 nn.ReLU(True), 
                                 nn.Dropout(0.5),
                                 nn.Linear(4096, 400),
                                 nn.ReLU(True),
                                 nn.Dropout(0.5),
                                 nn.Linear(400, 2),
                                 nn.Softmax()
                                 )

In [15]:
if use_gpu:
    mynet = mynet.cuda()

In [16]:
#print('network structure:')
#mynet

In [17]:
# define optimizer and loss
optimizer = optim.SGD(mynet.parameters(), lr=1e-2, momentum=0.9, nesterov=True) 
# 随机梯度下降，之后可以选择别的速度更快的如rmsprop
criterion = nn.CrossEntropyLoss()

### begin train

In [18]:
num_epoch = 10

In [19]:
for epoch in range(num_epoch): # 开始每个epoch
    since = time.time() # 取得当前时间
    print('{}/{}'.format(epoch+1, num_epoch))
    print('-'*10)

#     for phase in ['train', 'val']: # 判断是train还是validation
#     if phase == 'train':
    optimizer.zero_grad() # 将梯度归零

    running_loss = 0.0
    running_acc = 0.0
    for i, data in enumerate(dataloader['train'], 1):
        img, label = data
        if use_gpu:
            img = Variable(img).cuda()
            label = Variable(label).cuda()
        else:
            img = Variable(img)
            label = Variable(label)

        # forward
        output = mynet(img)
        _, pred = torch.max(output, 1)
        loss = criterion(output, label)
        # backward
#         if phase == 'train': # 如果是train，则反向传播更新参数
        loss.backward()
        optimizer.step()
        # log statistics
        running_loss += loss.data[0] * label.size(0)
        num_correct = torch.sum(pred == label)
        running_acc += num_correct.data[0]
        if i % 10 == 0:
            print('{}/{}, Loss: {:.4f}, Acc:{:.4f}'.format(i//10, data_size['train']//(10*16), 
                                                   running_loss/(i*16), running_acc/(i*16)))
        
    running_loss /= data_size['train']
    running_acc /= data_size['train']
    print('{} Loss:{:.4f} Acc:{:.4f}'.format('train', running_loss, running_acc))
    time_eplise = time.time() - since
    print('complete in {:.0f}m {:.0f} s'.format(time_eplise//60, time_eplise%60))
#             if phase == 'val' and running_acc > best_acc: # 根据validation 判断更新之后的model是否更好
#                 best_acc = running_acc
#                 best_model = copy.deepcopy(model)

    print()


1/10
----------
1/13, Loss: 0.6912, Acc:0.5625
2/13, Loss: 0.6981, Acc:0.5500
3/13, Loss: 0.7121, Acc:0.5104
4/13, Loss: 0.7071, Acc:0.5188
5/13, Loss: 0.7116, Acc:0.5025
6/13, Loss: 0.7096, Acc:0.5062
7/13, Loss: 0.7081, Acc:0.5080
8/13, Loss: 0.7079, Acc:0.5094
9/13, Loss: 0.7072, Acc:0.5132
10/13, Loss: 0.7093, Acc:0.5081
11/13, Loss: 0.7207, Acc:0.5034
12/13, Loss: 0.7259, Acc:0.5062
13/13, Loss: 0.7357, Acc:0.5010
train Loss:0.7355 Acc:0.5021
complete in 1m 52 s

2/10
----------
1/13, Loss: 0.8169, Acc:0.4938
2/13, Loss: 0.8370, Acc:0.4750
3/13, Loss: 0.8332, Acc:0.4792
4/13, Loss: 0.8173, Acc:0.4953
5/13, Loss: 0.8227, Acc:0.4900
6/13, Loss: 0.8087, Acc:0.5042
7/13, Loss: 0.8120, Acc:0.5009
8/13, Loss: 0.8145, Acc:0.4984
9/13, Loss: 0.8164, Acc:0.4965
10/13, Loss: 0.8099, Acc:0.5031
11/13, Loss: 0.8079, Acc:0.5051
12/13, Loss: 0.8089, Acc:0.5042
13/13, Loss: 0.8087, Acc:0.5043
train Loss:0.8081 Acc:0.5050
complete in 1m 53 s

3/10
----------
1/13, Loss: 0.8133, Acc:0.5000
2/13, L

In [None]:

    
#     best_model = model
#     best_acc = 0.0
        
for epoch in range(num_epoch): # 开始每个epoch
    since = time.time() # 取得当前时间
    print('{}/{}'.format(epoch+1, num_epoch))
    print('-'*10)

    for phase in ['train', 'val']: # 判断是train还是validation
        if phase == 'train':
            optimizer.zero_grad() # 将梯度归零

        running_loss = 0.0
        running_acc = 0.0
        for data in dataloader[phase]:
            img, label = data
            if use_gpu:
                img = Variable(img).cuda()
                label = Variable(label).cuda()
            else:
                img = Variable(img)
                label = Variable(label)

            # forward
            output = mynet(img)
            _, pred = torch.max(output, 1)
            loss = criterion(output, label)
            # backward
            if phase == 'train': # 如果是train，则反向传播更新参数
                loss.backward()
                optimizer.step()
            # log statistics
            running_loss += loss.data[0] * label.size(0)
            num_correct = torch.sum(pred == label)
            running_acc += num_correct.data[0]

        running_loss /= data_size[phase]
        running_acc /= data_size[phase]
        print('{} Loss:{:.4f} Acc:{:.4f}'.format(phase, running_loss, running_acc))
    time_eplise = time.time() - since
    print('comlete in {:.0f}m{:.0f}s'.format(epoch+1, num_epoch,time_eplise//60, time_eplise%60))
#             if phase == 'val' and running_acc > best_acc: # 根据validation 判断更新之后的model是否更好
#                 best_acc = running_acc
#                 best_model = copy.deepcopy(model)

    print()
