# 模型轻量化

## 模型加载

In [1]:
import torch
import torch.nn as nn
from torchsummary import summary
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
import os
from PIL import Image
import numpy as np

In [2]:
class DogVsCatDataset(Dataset):
    def __init__(self, data_path, train=True, transform=None):
        self.data_path=data_path
        self.img_path = []
        self.label = []
        self.train = train
        if train:
            img_path_names = os.listdir(os.path.join(self.data_path, 'train'))
            for img_path_name in img_path_names:
                temp_list = os.listdir(os.path.join(self.data_path, 'train', img_path_name))
                self.img_path.extend(temp_list)
                if img_path_name == 'cat':
                    self.label.extend([0]*len(temp_list))
                elif img_path_name == 'dog':
                    self.label.extend([1]*len(temp_list))
        else:
            img_path_names = os.listdir(os.path.join(self.data_path, 'val'))
            for img_path_name in img_path_names:
                temp_list = os.listdir(os.path.join(self.data_path, 'val', img_path_name))
                self.img_path.extend(temp_list)
                if img_path_name == 'cat':
                    self.label.extend([0]*len(temp_list))
                elif img_path_name == 'dog':
                    self.label.extend([1]*len(temp_list))
        self.transform = transform
        
    def __len__(self):
        return len(self.img_path)
    
    def __getitem__(self, idx):
        label = self.label[idx]
        img_type = 'cat' if label == 0 else 'dog'
        model_type = 'train' if self.train else 'val'
        if self.train:
            image = Image.open(os.path.join(self.data_path, model_type, img_type, self.img_path[idx]))
        else:
            image = Image.open(os.path.join(self.data_path, model_type, img_type, self.img_path[idx]))
        if self.transform:
            image = self.transform(image)
        label = torch.from_numpy(np.array(label))
        return image, label

In [3]:
test_dataset = DogVsCatDataset(
                        data_path = r'D:\泰迪智能科技有限公司\09课程\02案例课程\03 计算机视觉案例\图像分类\猫狗大战\demo',
                        train=False,
                        transform = transforms.Compose([          
                                        transforms.Resize([150, 150]),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                             std=[0.229, 0.224, 0.225])]))
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=True)

In [4]:
def test(model, device, test_loader):
    model.eval()  # 声明验证函数，禁止所有梯度进行更新
    test_loss = 0
    correct = 0
    # 强制后面的计算不生成计算图，加快测试效率
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target.long(), reduction='sum').item()  # 对每个batch的loss进行求和
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    
    print('\nTest: average loss: {:.4f}, accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))  

In [5]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, 1)
        self.max_pool = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 32, 3, 1)
        self.conv3 = nn.Conv2d(32, 64, 3, 1)
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.5)
        self.line1 = nn.Linear(18496, 512)
        self.line2 = nn.Linear(512, 128)
        self.line3 = nn.Linear(128, 2)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.max_pool(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.max_pool(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.max_pool(x)
        x = self.relu(x)
        x = self.flatten(x)
        x = self.line1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.line2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.line3(x)
        output = F.log_softmax(x, dim=1)
        return output

In [6]:
model = CNNModel()
parms = torch.load('cnn_model_parms.pth')
model.load_state_dict(parms)

<All keys matched successfully>

In [7]:
file_size = os.path.getsize('cnn_model_parms.pth')
print('文件大小:\n\t{}字节\n\t{}M'.format(file_size, round(file_size/1024**2, 2)))

文件大小:
	38241888字节
	36.47M


In [8]:
summary(model.to('cuda'), (3, 150, 150))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 148, 148]             448
         MaxPool2d-2           [-1, 16, 74, 74]               0
              ReLU-3           [-1, 16, 74, 74]               0
            Conv2d-4           [-1, 32, 72, 72]           4,640
         MaxPool2d-5           [-1, 32, 36, 36]               0
              ReLU-6           [-1, 32, 36, 36]               0
            Conv2d-7           [-1, 64, 34, 34]          18,496
         MaxPool2d-8           [-1, 64, 17, 17]               0
              ReLU-9           [-1, 64, 17, 17]               0
          Flatten-10                [-1, 18496]               0
           Linear-11                  [-1, 512]       9,470,464
             ReLU-12                  [-1, 512]               0
          Dropout-13                  [-1, 512]               0
           Linear-14                  [

In [9]:
import time
start = time.time()
test(model, 'cuda', test_loader)
end = time.time()
print('Cost:', end-start)


Test: average loss: 0.5234, accuracy: 2063/2500 (83%)
Cost: 5.509939432144165


## 模型剪枝

### 层剪枝

<b><font color='red'>将每层中较小的权重置零

In [10]:
def weight_prune(model, pruning_perc):
    '''
    Prune pruning_perc % weights layer-wise
    '''
    threshold_list = []
    for p in model.parameters():
        if len(p.data.size()) != 1: # bias
            weight = p.cpu().data.abs().numpy().flatten()
            threshold = np.percentile(weight, pruning_perc)
            threshold_list.append(threshold)

    # generate mask
    masks = []
    idx = 0
    for p in model.parameters():
        if len(p.data.size()) != 1:
            pruned_inds = p.data.abs() > threshold_list[idx]
            masks.append(pruned_inds.float())
            idx += 1
    return masks

In [11]:
from copy import deepcopy

pruned_model = deepcopy(model)

In [12]:
mask = weight_prune(pruned_model, 80)  # 剪枝60%

In [13]:
weight_layer_name = [i for i in list(parms.keys()) if 'weight' in i]
for i in range(len(weight_layer_name)):
    parms[weight_layer_name[i]] *= mask[i]

In [14]:
pruned_model.load_state_dict(parms)

<All keys matched successfully>

In [15]:
parms

OrderedDict([('conv1.weight',
              tensor([[[[-0.0000,  0.0000, -0.0000],
                        [-0.0000, -0.0000, -0.0000],
                        [-0.5159, -0.0000, -0.5250]],
              
                       [[ 0.0000,  0.4308, -0.0000],
                        [-0.0000, -0.0000,  0.0000],
                        [-0.0000, -0.0000, -0.4818]],
              
                       [[ 0.6757,  0.7788,  0.4527],
                        [ 0.0000,  0.6251,  0.0000],
                        [-0.0000, -0.0000, -0.0000]]],
              
              
                      [[[-0.0000, -0.6090, -0.5513],
                        [ 0.0000,  0.0000,  0.0000],
                        [ 0.4241,  0.6808,  0.4858]],
              
                       [[-0.0000, -0.4679, -0.4809],
                        [ 0.0000,  0.0000,  0.0000],
                        [ 0.0000,  0.0000,  0.0000]],
              
                       [[ 0.0000, -0.0000, -0.4427],
                        [ 

In [16]:
torch.save(pruned_model.state_dict(), 'pruned_model1.pth')

In [17]:
file_size = os.path.getsize('pruned_model1.pth')
print('{}文件大小:\n\t{}字节\n\t{}M'.format('pruned_model1.pth', file_size, round(file_size/1024**2, 2)))

pruned_model1.pth文件大小:
	38241888字节
	36.47M


In [18]:
import time
start = time.time()
test(pruned_model, 'cuda', test_loader)
end = time.time()
print('Cost:', end-start)


Test: average loss: 0.5400, accuracy: 1854/2500 (74%)
Cost: 6.404947519302368


<b><font color='red'>可见这样处理后，模型无论在大小上还是在推理速度上均无明显提升，原始是因为经过剪枝操作后虽然将大部分的权重都变为0了，但是计算机在存储的时候还是使用的相同大小的空间存储0。要想真正压缩模型还需要进一步处理。

In [19]:
for i in range(len(mask)):
    # 将mask值由float转化为bool
    mask[i] = mask[i] > 0

In [20]:
torch.save(mask, 'mask.pth')

In [21]:
file_size = os.path.getsize('mask.pth')
print('{}文件大小:\n\t{}字节\n\t{}M'.format('mask.pth', file_size, round(file_size/1024**2, 2)))

mask.pth文件大小:
	9560134字节
	9.12M


#### 小例子（稀疏编码）

In [22]:
a = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [23]:
a

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [24]:
mask = torch.tensor([[False, True, False], [True, True, False], [True, False, True]])

In [25]:
save_a = torch.masked_select(a, mask)
save_a

tensor([2, 4, 5, 7, 9])

In [26]:
index = mask.nonzero()
index

tensor([[0, 1],
        [1, 0],
        [1, 1],
        [2, 0],
        [2, 2]])

In [27]:
parms = torch.tensor(mask, dtype=torch.float32)

  parms = torch.tensor(mask, dtype=torch.float32)


In [28]:
for i in range(len(index)):
    r, c = index[i]
    parms[r, c] *= save_a[i]

In [29]:
parms

tensor([[0., 2., 0.],
        [4., 5., 0.],
        [7., 0., 9.]])

#### 对剪枝后的模型采用稀疏编码

In [30]:
mask = torch.load('mask.pth')

In [31]:
parms = torch.load('pruned_model1.pth')

In [32]:
weight_layer_name = [i for i in list(parms.keys()) if 'weight' in i]
for i in range(len(weight_layer_name)):
    parms[weight_layer_name[i]] = torch.masked_select(parms[weight_layer_name[i]], mask[i])

In [33]:
torch.save(parms, 'new_parms.pth')

In [34]:
file_size1 = os.path.getsize('mask.pth')
print('{}文件大小:\n\t{}字节\n\t{}M'.format('mask.pth', file_size1, round(file_size1/1024**2, 2)))
file_size2 = os.path.getsize('new_parms.pth')
print('{}文件大小:\n\t{}字节\n\t{}M'.format('new_parms.pth', file_size2, round(file_size2/1024**2, 2)))
print('一共{}M'.format(round((file_size1+file_size2)/1024**2, 2)))

mask.pth文件大小:
	9560134字节
	9.12M
new_parms.pth文件大小:
	7652338字节
	7.3M
一共16.42M


<b><font color='red'>经过稀疏编码后，模型参数下降到了16.42M，相比原始的36.47M缩减了一半多

#### 稀疏编码还原

In [35]:
mask = torch.load('mask.pth')  
parms = torch.load('new_parms.pth')

In [36]:
weight_layer_name

['conv1.weight',
 'conv2.weight',
 'conv3.weight',
 'line1.weight',
 'line2.weight',
 'line3.weight']

In [37]:
weight_layer_name = [i for i in list(parms.keys()) if 'weight' in i]
new_parms = deepcopy(parms)

In [38]:
parms[weight_layer_name[i]]

tensor([ 0.1782, -0.1238, -0.1828, -0.1475,  0.1324,  0.1354, -0.1275,  0.1769,
         0.1471, -0.1322, -0.1181, -0.1453, -0.1188,  0.1220,  0.1211, -0.1556,
        -0.1472,  0.1176,  0.1339, -0.1293, -0.1453,  0.1524,  0.1176, -0.1281,
        -0.1660, -0.1254,  0.1212,  0.1212,  0.1356,  0.1510,  0.1376, -0.2217,
        -0.1588,  0.1254, -0.1629,  0.1447,  0.1552,  0.1208, -0.1206, -0.1332,
         0.1329, -0.1383,  0.1252,  0.1304, -0.1855, -0.1621, -0.1439,  0.1249,
        -0.1336, -0.1252,  0.1270], device='cuda:0')

In [39]:
new_parms[weight_layer_name[i]] = torch.tensor(mask[i], dtype=torch.float32)

  new_parms[weight_layer_name[i]] = torch.tensor(mask[i], dtype=torch.float32)


In [40]:
for i in range(len(weight_layer_name)):
    new_parms[weight_layer_name[i]] = torch.tensor(mask[i], dtype=torch.float32)
    for j in range(len(index)):
        new_parms[weight_layer_name[i]][new_parms[weight_layer_name[i]] != 0] = parms[weight_layer_name[i]].to('cpu')

  new_parms[weight_layer_name[i]] = torch.tensor(mask[i], dtype=torch.float32)


#### 使用还原的权重进行模型预测

In [41]:
pruned_model.load_state_dict(new_parms)

<All keys matched successfully>

In [42]:
summary(pruned_model, (3, 150, 150))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 148, 148]             448
         MaxPool2d-2           [-1, 16, 74, 74]               0
              ReLU-3           [-1, 16, 74, 74]               0
            Conv2d-4           [-1, 32, 72, 72]           4,640
         MaxPool2d-5           [-1, 32, 36, 36]               0
              ReLU-6           [-1, 32, 36, 36]               0
            Conv2d-7           [-1, 64, 34, 34]          18,496
         MaxPool2d-8           [-1, 64, 17, 17]               0
              ReLU-9           [-1, 64, 17, 17]               0
          Flatten-10                [-1, 18496]               0
           Linear-11                  [-1, 512]       9,470,464
             ReLU-12                  [-1, 512]               0
          Dropout-13                  [-1, 512]               0
           Linear-14                  [

In [43]:
import time
start = time.time()
test(pruned_model, 'cuda', test_loader)
end = time.time()
print('Cost:', end-start)


Test: average loss: 0.5400, accuracy: 1854/2500 (74%)
Cost: 5.528990983963013


### 通道剪枝

<b><font color='red'>将不重要的卷积核得到的特征图去掉

## 模型量化

<b><font color='red'>将模型参数的精度类型减小（float32 -> int8）
<br>狭义上的量化：连续 -> 离散
<br>神经网络的量化：float32 -> int8, int4, int2等，缩小可表示的空间大小 

## 知识蒸馏

<b><font color='red'>老师教学生<br>知识蒸馏通常用于模型压缩，用一个已经训练好的模型A去“教”另外一个模型B。这两个模型称为老师-学生模型。<br>通常模型A比模型B更强。在模型A的帮助下，模型B可以突破自我，学得更好。