#### 1 什么是迁移学习？
参考论文：  
http://www.jos.org.cn/jos/ch/reader/view_abstract.aspx?file_no=4631&flag=1    
https://journalofbigdata.springeropen.com/articles/10.1186/s40537-016-0043-6  
https://ieeexplore.ieee.org/abstract/document/5288526  
迁移学习是运用已存有的知识对不同但相关领域问题进行求解的一种新的机器学习方法.  
迁移学习的做法：在一个数据集上训练好网络，得到网络参数和预训练模型，直接使用预训练模型在第二个数据集上使用，但是二个数据集必须相关。  

### 2 为什么要使用迁移学习？
总结原因：  
(1) 避免标注大量的训练样本(标注样本非常耗时)  
(2) 避免构建自定义神经网络(构建神经网络需要精心模式设计和相关的计算)  
(3) 避免长时间的模型训练(迁移学习直接使用预训练的模型，实际中只需要微调)  

### 3 pytorch图像处理中有哪些预训练模型？
第14节进行过介绍，具体请前往参考。

### 4 pytorch迁移学习入门小案例  
实际中就是重写全连接层的部分，然后保持卷积层的权重不变化，训练过程只是进行重写的全连接层参数的训练。

In [1]:
from torchvision import models
import torch.nn as nn

In [2]:
model = models.alexnet() # alexnet网络 

In [4]:
model  # 查看下网络结构 

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

解释下网络的结构：  
1 该网络有3个属性即网络分3个阶段：features、avgpool、classifier   
2 第一个部分是卷积部分用于特征提取，第二个部分是维度转换用于连接上下层，第三部分是全连接部分用于分类   
3 其他网络结构可以自行分析   

In [5]:
# 重写全连接层的部分 
model.classifier = nn.Sequential(
    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=9216, out_features=100, bias=True),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=100, out_features=100, bias=True),
    nn.ReLU(inplace=True),
    nn.Linear(in_features=100, out_features=2, bias=True)
  )

In [6]:
model  # 再来看一下网络的结构 

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=100, bias=True)
  

###  如何设置只训练重写的全连接层参数呢？
第一步：for param in model.parameters():param.requires_grad = False  
第二步：重写全连接层   
第三步：optimizer = optim.Adam(model.classifier.parameters(),lr=0.01)   

### 5 迁移学习实现猫狗分类 

In [9]:
# 1 导入需要的模块
from torchvision import models
import torch
import torch.utils.data as data
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn as nn
from torch.optim import sgd, adam
from PIL import Image
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
transform_train = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.RandomCrop((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

    ]
)

transform_test = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.RandomCrop((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

    ]
)


In [11]:
image_path = "/content/drive/My Drive"

In [12]:
class CatDogProcessor(data.Dataset):
    def __init__(self, mode, directory):
        self.mode = mode
        self.list_img = []
        self.list_label = []
        self.data_size = 0

        if self.mode == "train":
            self.transform = transform_train
            directory = directory + "/sdh_train/"
            for file in os.listdir(directory):
                self.list_img.append(directory + file)
                self.data_size += 1
                name = file.split(sep=".")
                if name[0] == "cat":
                    self.list_label.append(0)
                elif name[0] == "dog":
                    self.list_label.append(1)

        elif self.mode == "test":
            self.transform = transform_test
            directory = directory + "/sdh_test/"
            for file in os.listdir(directory):
                self.list_img.append(directory + file)
                self.data_size += 1
                name = file.split(sep=".")
                if name[0] == "cat":
                    self.list_label.append(0)
                elif name[0] == "dog":
                    self.list_label.append(1)

    def __getitem__(self, item):
        if self.mode == "train":
            img = Image.open(self.list_img[item])
            label = self.list_label[item]
            return self.transform(img), torch.tensor([label])
        elif self.mode == "test":
            img = Image.open(self.list_img[item])
            label = self.list_label[item]
            return self.transform(img), torch.tensor([label])
        else:
            return None

    def __len__(self):
        return self.data_size

In [13]:
def pre_trained_model():
    model = models.resnet34(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    model.fc = nn.Sequential(
        nn.Linear(in_features=512, out_features=2, bias=True)
    )

    return model

In [14]:
def plot_image_train(acces, train_losses):
    plt.plot(np.arange(len(acces)), acces)
    plt.title('train acc')
    plt.show()

    plt.plot(np.arange(len(train_losses)), train_losses)
    plt.title('train loss')
    plt.show()

def plot_image_test(acces, test_losses):
    plt.plot(np.arange(len(acces)), acces)
    plt.title('test acc')
    plt.show()

    plt.plot(np.arange(len(test_losses)), test_losses)
    plt.title('test loss')
    plt.show()

In [15]:
def train():
    train_image_data_set = CatDogProcessor("train", image_path)
    train_data = DataLoader(train_image_data_set, batch_size=16, shuffle=True)
    print("train_data loaded", len(train_data))

    test_image_data_set = CatDogProcessor("test", image_path)
    test_data = DataLoader(test_image_data_set, batch_size=16, shuffle=True)
    print("train_data loaded", len(test_data))

    model = pre_trained_model()
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = sgd.SGD(model.fc.parameters(), lr=0.001,momentum=0.9)

    train_losses = []
    train_acces = []
    test_losses = []
    test_acces = []

    for i in range(100):
        train_loss = 0
        train_acc = 0
        model.train()
        for im, label in train_data:
            # 前向传播
            im = im.cuda()
            label = label.cuda()
            label = label.squeeze()
            out = model(im)
            loss = criterion(out, label)

            # 反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 记录误差
            train_loss += loss.item()
            # 计算分类的准确率
            _, pred = torch.max(out, 1)
            num_correct = (pred == label).sum().item()
            acc = num_correct / im.shape[0]
            train_acc += acc
        train_losses.append(train_loss / len(train_data))
        train_acces.append(train_acc / len(train_data))

        test_loss = 0
        test_acc = 0
        model.eval()
        for im_test, label_test in test_data:
            # 前向传播
            im_test = im_test.cuda()
            label_test = label_test.cuda()
            label_test = label_test.squeeze()

            out_test = model(im_test)
            loss_test = criterion(out_test, label_test)

            # 记录误差
            test_loss += loss_test.item()
            # 计算分类的准确率
            _, pred_test = torch.max(out_test, 1)
            num_correct_test = (pred_test == label_test).sum().item()
            acc_test = num_correct_test / im_test.shape[0]
            test_acc += acc_test

        test_losses.append(test_loss / len(test_data))
        test_acces.append(test_acc / len(test_data))

        print("epoch: %d, train loss: %.6f, train acc: %.6f, test loss: %.6f, test acc: %.6f" % (
            i, train_loss / len(train_data), train_acc / len(train_data), test_loss / len(test_data),
            test_acc / len(test_data)))

    print("train and test-----------------Done!")

    plot_image_train(train_acces, train_losses)
    plot_image_test(test_acces, test_losses)

    torch.save(model.state_dict(), "model4.pth")

![jupyter](./demo35.png)

### 7 关于卷积神经网络的原型的说明 
(1) 详细原理和网络结构参考论文，原始论文自行搜索。   
(2) 关于网络模型的coding，可以从头构建，遇到问题可以参考GitHub中的相关内容。   
(3) 更直接的方法，B站看大神的视频。   