In [1]:
%reload_ext watermark
%reload_ext autoreload
%autoreload 2
%matplotlib inline
%watermark -v -p numpy,pandas,matplotlib,sklearn,torch,torchvision,pytorch_lightning

CPython 3.6.9
IPython 7.16.1

numpy 1.18.5
pandas 1.0.4
matplotlib 3.2.1
sklearn 0.23.1
torch 1.6.0.dev20200609+cu101
torchvision 0.7.0.dev20200609+cu101
pytorch_lightning 0.8.5


In [2]:
import warnings

import json
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
import torch
import torchvision
from PIL import Image

import torch.nn.functional as F
from torch.optim import SGD, Adam
from torch.nn import NLLLoss, MSELoss, CrossEntropyLoss
from torch.nn import Module, Conv2d, Dropout2d, Linear
from torch.optim.lr_scheduler import ExponentialLR, StepLR
from torch.utils.data import (Dataset, DataLoader)
from torchvision import transforms


warnings.filterwarnings('ignore')

In [3]:
from k12libs.utils.nb_easy import (EasyaiClassifier, EasyaiTrainer)

---------------------------------------

## 简单实例

该实例没有实际意义, 默认配置的是 resnet18网络, 使用rmnist数据集

In [None]:
trainer = EasyaiTrainer(max_epochs=2)
trainer.fit(EasyaiClassifier())
trainer.test()

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11 M  


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

## 自定义: 修改默认预置模型和预置数据集

继承EasyaiClassifier, 实现prepare_data和build_model.

In [None]:
class CustomClassifier(EasyaiClassifier):
    # 修改: 使用预置的'rchestxray'数据集
    def prepare_dataset(self):
        # 返回dict: {'train': Dataset, 'val': Dataset, 'test': Dataset}
        return self.load_presetting_dataset_('chestxray')
    
    # 修改: 使用预置的'shufflenetv2'模型
    def build_model(self):
        return self.load_pretrained_model_('shufflenet_v2_x0_5', num_classes=2)

In [None]:
trainer = EasyaiTrainer(max_epochs=10)
# 训练
trainer.fit(CustomClassifier())
# 评估
trainer.test()

## 自定义: 构建用户模型, 加载用户数据集

继承EasyaiClassifier, 实现build_model(不适用预置模型)和prepare_data(自定义数据集解析处理)

In [None]:
class CustomClassifier(EasyaiClassifier):
    
    # 预处理数据集
    def prepare_dataset(self):
        # 使用json文件描述数据集的情况
        class JsonfileDataset(Dataset):
            def __init__(self, root, phase, info):
                self.root = root
                self.info = info
                image_list = []
                label_list = []
                with open(os.path.join(self.root, f'{phase}.json')) as f:
                    items = json.load(f)
                    for item in items:
                        image_list.append(os.path.join(self.root, item['image_path']))
                        label_list.append(item['label'])
                self.image_list, self.label_list = image_list, label_list
                
                self.augtrans = None
                self.imgtrans = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize(mean=info['mean'], std=info['std'])
                ])
                
            # 实现data_augment方法
            def data_augment(self, augtrans):
                self.augtrans = transforms.Compose(augtrans)
                
            def __getitem__(self, index):
                img = Image.open(self.image_list[index]).convert('RGB')
                if self.augtrans:
                    img = self.augtrans(img)
                img = self.imgtrans(img)
                return img, self.label_list[index]
            def __len__(self):
                return len(self.image_list)

        # 数据集存放目录
        dataroot = os.path.join('/data/datasets/cv/', 'rmnist')
        with open(os.path.join(dataroot, 'info.json')) as f:
            info = json.load(f)
            
        return {
            'train': JsonfileDataset(dataroot, 'train', info),
            'val': JsonfileDataset(dataroot, 'val', info),
            'test': JsonfileDataset(dataroot, 'test', info),
        }
    
    # 构建模型
    def build_model(self):
        class CustomNet(Module):
            def __init__(self):
                super(CustomNet, self).__init__()
                self.conv1 = Conv2d(3, 32, 3, 1)  # 卷积层, 图片特征提取
                self.conv2 = Conv2d(32, 64, 3, 1)
                self.dropout1 = Dropout2d(0.25)   # Dropout正则化, 减少模型过拟合
                self.fc1 = Linear(9216, 128)
                self.dropout2 = Dropout2d(0.5)
                self.fc2 = Linear(128, 10)        # 全连接层, 图片线性变换
            def forward(self, x):
                x = self.conv1(x)
                x = F.relu(x)
                x = self.conv2(x)
                x = F.relu(x)
                x = F.max_pool2d(x, 2)
                x = self.dropout1(x)
                x = torch.flatten(x, 1)
                x = self.fc1(x)
                x = F.relu(x)
                x = self.dropout2(x)
                x = self.fc2(x)
                # x = F.log_softmax(x, dim=1)
                return x
        return CustomNet()

In [None]:
trainer = EasyaiTrainer(max_epochs=10)
# 训练
trainer.fit(CustomClassifier())
# 评估
trainer.test()

## 自定义: 用户自定义loss, optimize, schedule

继承EasyaiClassifier, 实现configure_criterion, configure_optimizer, configure_scheduler

In [None]:
class CustomClassifier2(EasyaiClassifier):
    # 配置损失函数: CrossEntropyLoss(交叉熵损失)
    def configure_criterion(self):
        loss = CrossEntropyLoss(
            reduction='mean' # 约简方式: 张量各个维度上的元素的平均值
        )
        return loss
    
    # 配置优化方法: 随机梯度下降(SGD)
    def configure_optimizer(self):
        # self.model是在build_model构造的, 如果build_model没定义, 使用默认的构造的预置模型
        optimizer = SGD(
            filter(lambda p: p.requires_grad, self.model.parameters()),
            lr=0.01,           # 基础学习率
            weight_decay=1e-6, # 权重衰减, 使得模型参数值更小, 有效防止过拟合
            momentum=0.9,      # 动量因子, 更快局部收敛
            nesterov=True      # 使用Nesterov动量, 加快收敛速度
        )
        return optimizer
    
    # 配置学习率策略: 固定步长衰减(StepLR)
    def configure_scheduler(self, optimizer):
        # optmizer是在configure_optimizer配置的
        scheduler = StepLR(
            optimizer,   # 优化器
            step_size=2, # 每间隔2次epoch进行一次LR调整
            gamma=0.6    # LR调整为原来0.6倍
        )
        return scheduler

In [None]:
trainer = EasyaiTrainer(max_epochs=10)
# 训练
trainer.fit(CustomClassifier2())
# 评估
trainer.test()

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

## 自定义: 数据增强

继承EasyaiClassifier, 实现train_dataloader, val_dataloader, test_dataloader

In [None]:
class CustomClassifier3(EasyaiClassifier):
    # 训练时进行数据增强
    def train_dataloader(self) -> DataLoader:
        # self.datasets是在prepare_data方法中生成, 如果没有自定义prepare_data, 使用默认预置数据集
        dataset = self.datasets['train']
        dataset.data_augment([
            transforms.Resize((64, 64)),      # 改变图片的大小
            transforms.RandomHorizontalFlip() # 让图片进行水平翻转                    
        ])
        return DataLoader(
            dataset,
            batch_size=64,    # 每次输入模型的图片个数(批量大小)
            drop_last=False,  # 最后一次的图片数量不等于设置的batch_size是否丢弃
            num_workers=2,    # 启动多个进程加载数据集, 不可设置过大
            shuffle=True,     # 送入模型的图片是否进行随机打散
        )
    
    # 校验时进行数据增强 (同train_dataloader)
    def val_dataloader(self) -> DataLoader:
        dataset = self.datasets['val']
        dataset.data_augment([
            transforms.Resize((64, 64)),
        ])
        return DataLoader(dataset, batch_size=64)
    
    # 评估时不进行数据增强
    def test_dataloader(self) -> DataLoader:
        dataset = self.datasets['test']
        return DataLoader(dataset, batch_size=64)

In [None]:
trainer = EasyaiTrainer(max_epochs=10)
# 训练
trainer.fit(CustomClassifier3())
# 评估
trainer.test()