<a href="https://colab.research.google.com/github/njucs/med/blob/master/PM/PALM_PaddleX_10_Yifu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**本方案使用TNT模型进行训练与预测，在有限训练次数下，取得了较稳定的成绩：0.99515**

**采取的训练图像预处理方案：**

- 随机垂直翻转
- 随机角度翻转--0~20度
- 缩放大小--（520，520）
- 归一化--mean:[0.2, 0.3, 0.5], std:[0., 0., 0.]

**采取的验证与预测图像预处理方案：**

- 缩放大小--（520，520）
- 归一化--mean:[0.2, 0.3, 0.5], std:[0., 0., 0.]
- 数据集划分比例：0.8

**TNT模型比较：**

- 微调参数： tnt_s_patch16_224 得分 > tnt_b_patch16_224 得分

**针对Yifu数据集：**

数据预处理：要处理成正方形，切除外围的黑色区域

数据增强：（N/H增加1+1+1倍，P增加1+12+12倍）
- 水平翻转
- 随机向外缩放20%以内
- 随机旋转+/-10度以内
- 高斯噪声（opt）
（填充都是黑色）


## PaddleX配置

### paddlex安装

In [None]:
# install PaddlePaddle-GPU
!python -m pip install paddlepaddle-gpu==2.2.2 -i https://mirror.baidu.com/pypi/simple

In [None]:
!pip install paddlex==1.3.11 -i https://mirror.baidu.com/pypi/simple

In [None]:
import pandas as pd
import os
import time
from tqdm import tqdm
import cv2 as cv
import numpy as np
import math

import paddle
from paddle import nn
from paddle import optimizer
from paddle import regularizer
from paddle import metric
from paddle.nn import loss
from paddle.nn import Layer

from paddle.io import Dataset, DataLoader
from paddle.vision import transforms

## 准备数据集

### 准备数据

In [None]:
# 授权 Colab 访问 Google Drive
from google.colab import drive
drive.mount('/content/drive')

%cd drive/MyDrive/'Colab Notebooks'/Ophthalmology/PathologicMyopia/

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/Ophthalmology/PathologicMyopia


In [None]:
!ls dataset-yifu | wc -w

2
800
400


In [None]:
########## 重写，自动随机划分Train/Test集

Image_path = 'dataset-yifu'
Train_data = pd.read_excel('dataset/Train/Classification.xlsx')
Train_data.head()

for i in range(len(Train_data)):
    Train_data.iloc[i, 0] = os.path.join(Image_path, Train_data.iloc[i, 0])
Train_data = Train_data.sample(frac=1.0).reset_index(drop=True)
Train_data.head()

Test_data = []
Test_path = 'dataset/Test'
for _, _, files in os.walk(Test_path):
    for i in files:
        Test_data.append([i, 0])
Test_data = np.asarray(Test_data)
Test_data = pd.DataFrame(Test_data)
Test_data = Test_data.sort_values(by=0, ascending=True).reset_index(drop=True)
for i in range(len(Test_data)):
    Test_data.iloc[i, 0] = os.path.join(Test_path, Test_data.iloc[i, 0])
Test_data.head()

Unnamed: 0,imgName,Label
0,V0001.jpg,0
1,V0002.jpg,1
2,V0003.jpg,1
3,V0004.jpg,0
4,V0005.jpg,0


In [None]:
class Train_Dataset(Dataset):
    '''加载训练集
        把数据加载函数拼进来
    '''
    def __init__(self, df, trans=None):
        super(Train_Dataset, self).__init__()

        self.df = df
        
        if trans is None:
            self.trans = transforms.Compose([
                transforms.RandomVerticalFlip(),
                transforms.RandomRotation(20),
                transforms.Resize(size=(520, 520)),
                transforms.ToTensor(),
                transforms.Normalize([0.2, 0.3, 0.5])
            ])
        else:
            self.trans = trans

        self.lens = len(df)

    def __getitem__(self, indexs):
        im_data, im_label = self._load_img_and_label(self.df, indexs)
        im_data = self.trans(im_data)
        return im_data, paddle.to_tensor(im_label)

    def _load_img_and_label(self, df, index):
        '''加载DF中的路径为图片和标签
            df: 输入DF
            index: 第几条数据
            mode: 加载训练集数据模式还是测试集模式--区别在于是否转换数据域
        '''
        assert index < self.lens, \
            'please check the index, which has more than the dataset length!'

        im_data = cv.imread(df.iloc[index, 0], cv.COLOR_BGR2RGB)  # 转为RGB数据
        im_label = int(df.iloc[index, 1])  # 标签
        return np.asarray(im_data).astype('float32'), im_label
    
    def __len__(self):
        return self.lens

class Test_Dataset(Dataset):
    '''加载测试集
        把数据加载函数拼进来
    '''
    def __init__(self, df, trans=None):
        super(Test_Dataset, self).__init__()

        self.df = df
        
        if trans is None:
            self.trans = transforms.Compose([
                transforms.Resize(size=(520, 520)),  # 保证迁移前后输入特征大小一致
                transforms.ToTensor(),
                transforms.Normalize([0.2, 0.3, 0.5])
            ])
        else:
            self.trans = trans

        self.lens = len(df)

    def __getitem__(self, indexs):
        im_data, im_label = self._load_img_and_label(self.df, indexs)
        im_data = self.trans(im_data)
        return im_data, paddle.to_tensor(im_label)

    def _load_img_and_label(self, df, index):
        '''加载DF中的路径为图片和标签
            df: 输入DF
            index: 第几条数据
            mode: 加载训练集数据模式还是测试集模式--区别在于是否转换数据域
        '''
        assert index < self.lens, \
            'please check the index, which has more than the dataset length!'

        im_data = cv.imread(df.iloc[index, 0], cv.COLOR_BGR2RGB)  # 转为RGB数据
        im_label = int(df.iloc[index, 1])  # 标签
        return np.asarray(im_data).astype('float32'), im_label

    def __len__(self):
        return self.lens

## 训练

In [None]:
# 训练参数-=dict
Train_Paramdict = {
    'data_length':len(Train_data),  # 数据长度
    'train_frac':0.8,              # 训练集比例
    'num_class':2,                  # 类别
    'epoches':20,                   # 训练轮次
    'batchsize':8,                 # 批量大小
    'lr':0.0001,                      # 学习率
    'l2':0.0005                    # L2正则化参数
}

In [None]:
# 数据集划分
Fit_data  = Train_data.iloc[:int(Train_Paramdict['data_length']*Train_Paramdict['train_frac'])]
Eval_data = Train_data.iloc[int(Train_Paramdict['data_length']*Train_Paramdict['train_frac']):]

In [None]:
# 数据加载
Fit_dataset = Train_Dataset(Fit_data)
Eval_dataset = Test_Dataset(Eval_data)
All_dataset = Train_Dataset(Train_data)

Fit_dataloader = DataLoader(Fit_dataset, batch_size=Train_Paramdict['batchsize'], shuffle=True)
Eval_dataloader = DataLoader(Eval_dataset, batch_size=Train_Paramdict['batchsize'])
All_dataloader = DataLoader(All_dataset, batch_size=Train_Paramdict['batchsize'], shuffle=True)

In [None]:
import paddle
from paddle import nn

import math
import numpy as np

def _cfg(url='', **kwargs):
    return {
        'url': url,
        'num_classes': 2, 'input_size': (3, 600, 600), 'pool_size': None,
        'crop_pct': .9, 'interpolation': 'bicubic',
        'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5),
        'first_conv': 'pixel_embed.proj', 'classifier': 'head',
        **kwargs
    }


default_cfgs = {
    'tnt_s_patch16_224': _cfg(
        url='',
        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
    ),
    'tnt_b_patch16_224': _cfg(
        mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
    ),
}


class Identity(nn.Layer):
    r"""A placeholder identity operator that is argument-insensitive.
    Args:
        args: any argument (unused)
        kwargs: any keyword argument (unused)
    Examples::
        >>> m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 20])
    """
    def __init__(self, *args, **kwargs):
        super(Identity, self).__init__()

    def forward(self, inputs):
        return inputs


def drop_path(x, drop_prob: float = 0., training: bool = False):
    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
    'survival rate' as the argument.
    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
    random_tensor = keep_prob + paddle.rand(shape=shape, dtype=x.dtype, device=x.device)
    random_tensor.floor()  # binarize
    output = x.divide(keep_prob) * random_tensor

    return output


class DropPath(nn.Layer):
    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    """
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)


class Attention(nn.Layer):
    '''
        注意力部分
    '''

    def __init__(self, dim, hidden_dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
        super(Attention, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        head_dim = hidden_dim // num_heads
        self.head_dim = head_dim
        self.scale = head_dim ** -0.5

        self.qk = nn.Linear(dim, hidden_dim * 2, bias_attr=qkv_bias)
        self.v = nn.Linear(dim, dim, bias_attr=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)  # no inplace
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)
    
    def forward(self, inputs):
        x = inputs
        B, N, C = x.shape
        qk = self.qk(x).reshape((B, N, 2, self.num_heads, self.head_dim)).transpose((2, 0, 3, 1, 4))
        q, k = qk[0], qk[1]
        v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose((0, 2, 1, 3))

        attn = paddle.matmul(q, k.transpose((0, 1, 3, 2))) * self.scale
        attn = paddle.nn.functional.softmax(attn, axis=-1)
        attn = self.attn_drop(attn)

        x = paddle.matmul(attn, v).transpose((0, 2, 1, 3)).reshape((B, N, -1))
        x = self.proj(x)
        x = self.proj_drop(x)
        return x


class Mlp(nn.Layer):
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
        super(Mlp, self).__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x

class Block(nn.Layer):
    """ TNT Block
    """
    def __init__(self, dim, in_dim, num_pixel, num_heads=12, in_num_head=4, mlp_ratio=4.,
            qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
        super(Block, self).__init__()
        # Inner transformer
        self.norm_in = norm_layer(in_dim)
        self.attn_in = Attention(
            in_dim, in_dim, num_heads=in_num_head, qkv_bias=qkv_bias,
            attn_drop=attn_drop, proj_drop=drop)
        
        self.norm_mlp_in = norm_layer(in_dim)
        self.mlp_in = Mlp(in_features=in_dim, hidden_features=int(in_dim * 4),
            out_features=in_dim, act_layer=act_layer, drop=drop)
        
        self.norm1_proj = norm_layer(in_dim)
        self.proj = nn.Linear(in_dim * num_pixel, dim, bias_attr=True)
        # Outer transformer
        self.norm_out = norm_layer(dim)
        self.attn_out = Attention(
            dim, dim, num_heads=num_heads, qkv_bias=qkv_bias,
            attn_drop=attn_drop, proj_drop=drop)
        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
        
        self.norm_mlp = norm_layer(dim)
        self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio),
            out_features=dim, act_layer=act_layer, drop=drop)

    def forward(self, pixel_embed, patch_embed):
        # inner
        pixel_embed = pixel_embed + self.drop_path(self.attn_in(self.norm_in(pixel_embed)))
        pixel_embed = pixel_embed + self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed)))
        # outer
        B, N, C = patch_embed.shape
        patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj(self.norm1_proj(pixel_embed).reshape((B, N - 1, -1)))
        patch_embed = patch_embed + self.drop_path(self.attn_out(self.norm_out(patch_embed)))
        patch_embed = patch_embed + self.drop_path(self.mlp(self.norm_mlp(patch_embed)))
        return pixel_embed, patch_embed


class PixelEmbed(nn.Layer):
    """ Image to Pixel Embedding
    """
    def __init__(self, img_size=224, patch_size=16, in_chans=3, in_dim=48, stride=4):
        super(PixelEmbed, self).__init__()
        num_patches = (img_size // patch_size) ** 2
        self.img_size = img_size
        self.num_patches = num_patches
        self.in_dim = in_dim
        new_patch_size = math.ceil(patch_size / stride)
        self.new_patch_size = new_patch_size

        self.proj = nn.Conv2D(in_chans, self.in_dim, kernel_size=7, padding=3, stride=stride)

    def forward(self, x, pixel_pos):
        B, C, H, W = x.shape
        assert H == self.img_size and W == self.img_size, \
            f"Input image size ({H}*{W}) doesn't match model ({self.img_size}*{self.img_size})."
        x = self.proj(x)
        x = nn.functional.unfold(x=x, kernel_sizes=self.new_patch_size, strides=self.new_patch_size)

        x = x.transpose((0, 2, 1)).reshape((B * self.num_patches, self.in_dim, self.new_patch_size, self.new_patch_size))
        x = x + pixel_pos

        x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose((0, 2, 1))
        return x


class TNT(nn.Layer):
    """ Transformer in Transformer - https://arxiv.org/abs/2103.00112
    """
    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, in_dim=48, depth=12,
                 num_heads=12, in_num_head=4, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0.,
                 drop_path_rate=0., norm_layer=nn.LayerNorm, first_stride=4):
        super(TNT, self).__init__()
        self.num_classes = num_classes
        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models

        self.pixel_embed = PixelEmbed(
            img_size=img_size, patch_size=patch_size, in_chans=in_chans, in_dim=in_dim, stride=first_stride)
        num_patches = self.pixel_embed.num_patches
        self.num_patches = num_patches
        new_patch_size = self.pixel_embed.new_patch_size
        num_pixel = new_patch_size ** 2
        
        self.norm1_proj = norm_layer(num_pixel * in_dim)
        self.proj = nn.Linear(num_pixel * in_dim, embed_dim)
        self.norm2_proj = norm_layer(embed_dim)
            
        # 创建参数
        self.cls_token = paddle.create_parameter((1, 1, embed_dim), 'float32', attr=nn.initializer.Assign(paddle.zeros((1, 1, embed_dim))))
        self.patch_pos = paddle.create_parameter((1, num_patches + 1, embed_dim), 'float32', attr=nn.initializer.Assign(paddle.zeros((1, num_patches + 1, embed_dim))))
        self.pixel_pos = paddle.create_parameter((1, in_dim, new_patch_size, new_patch_size), 'float32', attr=nn.initializer.Assign(paddle.zeros((1, in_dim, new_patch_size, new_patch_size))))
        self.pos_drop = nn.Dropout(p=drop_rate)

        dpr = [x for x in paddle.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
        blocks = []
        for i in range(depth):
            blocks.append(Block(
                dim=embed_dim, in_dim=in_dim, num_pixel=num_pixel, num_heads=num_heads, in_num_head=in_num_head,
                mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop=drop_rate, attn_drop=attn_drop_rate,
                drop_path=dpr[i], norm_layer=norm_layer))
        self.blocks = nn.LayerList(blocks)
        self.norm = norm_layer(embed_dim)

        self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()

        with paddle.no_grad():
            self.cls_token = paddle.create_parameter(self.cls_token.shape, 'float32', attr=nn.initializer.Assign(paddle.normal(self.cls_token, std=.02)))
            self.patch_pos = paddle.create_parameter(self.patch_pos.shape, 'float32', attr=nn.initializer.Assign(paddle.normal(self.patch_pos, std=.02)))
            self.pixel_pos = paddle.create_parameter(self.pixel_pos.shape, 'float32', attr=nn.initializer.Assign(paddle.normal(self.pixel_pos, std=.02)))
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            with paddle.no_grad():
                m.weight = paddle.create_parameter(m.weight.shape, 'float32', attr=nn.initializer.Assign(paddle.normal(m.weight, std=.02)))
            if isinstance(m, nn.Linear) and m.bias is not None:
                m.bias = paddle.create_parameter(m.bias.shape, 'float32', attr=nn.initializer.Constant(value=0.))
        elif isinstance(m, nn.LayerNorm):
            m.bias = paddle.create_parameter(m.bias.shape, 'float32', attr=nn.initializer.Constant(value=0.))
            m.weight = paddle.create_parameter(m.weight.shape, 'float32', attr=nn.initializer.Constant(value=1.))

    def no_weight_decay(self):
        return {'patch_pos', 'pixel_pos', 'cls_token'}

    def get_classifier(self):
        return self.head

    def reset_classifier(self, num_classes, global_pool=''):
        self.num_classes = num_classes
        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()

    def forward_features(self, x):
        B = x.shape[0]
        pixel_embed = self.pixel_embed(x, self.pixel_pos)
        
        patch_embed = self.norm2_proj(self.proj(self.norm1_proj(pixel_embed.reshape((B, self.num_patches, -1)))))
        patch_embed = paddle.concat((self.cls_token.expand([B, self.cls_token.shape[1],self.cls_token.shape[2]]), patch_embed), axis=1)  # expand
        patch_embed = patch_embed + self.patch_pos
        patch_embed = self.pos_drop(patch_embed)

        for blk in self.blocks:
            pixel_embed, patch_embed = blk(pixel_embed, patch_embed)

        patch_embed = self.norm(patch_embed)
        return patch_embed[:, 0]

    def forward(self, x):
        x = self.forward_features(x)
        x = self.head(x)
        return x


def tnt_s_patch16_224(pretrained=False, **kwargs):
    model = TNT(patch_size=16, embed_dim=384, in_dim=24, depth=12, num_heads=6, in_num_head=4,
        qkv_bias=False, **kwargs)
    model.default_cfg = default_cfgs['tnt_s_patch16_224']
    if pretrained:
        load_pretrained(
            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3))
    return model


def tnt_b_patch16_224(pretrained=False, **kwargs):
    model = TNT(patch_size=16, embed_dim=640, in_dim=40, depth=12, num_heads=10, in_num_head=4,
        qkv_bias=False, **kwargs)
    model.default_cfg = default_cfgs['tnt_b_patch16_224']
    if pretrained:
        load_pretrained(
            model, num_classes=model.num_classes, in_chans=kwargs.get('in_chans', 3))
    return model

In [None]:
# 创建模型
#import tnt_s_patch16_224, tnt_b_patch16_224
model = tnt_s_patch16_224(img_size=520, num_classes=2)
model = paddle.Model(model)

lr = optimizer.lr.LinearWarmup(
    learning_rate=Train_Paramdict['lr'],
    warmup_steps = 2000,
    start_lr = 0, 
    end_lr = Train_Paramdict['lr']
)

O = optimizer.Adam(lr, parameters=model.parameters(), weight_decay=regularizer.L2Decay(Train_Paramdict['l2']))
L = loss.CrossEntropyLoss()
M = metric.Accuracy()

model.prepare(O, L, M)

In [None]:
model.fit(
    Fit_dataloader,
    Eval_dataloader,
    epochs=Train_Paramdict['epoches']
)

## 预测

In [None]:
# 数据加载
Test_dataset = Test_Dataset(Test_data)
Test_dataloader = DataLoader(Test_dataset, batch_size=Train_Paramdict['batchsize'])

In [None]:
results = model.predict(Test_dataloader)
results = np.asarray(results)

In [None]:
import paddle.nn.functional as F

submit_result = []
for i in results[0]:
    i = paddle.to_tensor(i)
    i = F.softmax(i)
    result = i[:, 1]
    submit_result += result.numpy().tolist()
len(submit_result)

submit_result = np.asarray(submit_result)

Test_data.iloc[:, 1] = submit_result
Test_data.head()

Submit_data = Test_data.copy()
Submit_data.head()

Submit_data.columns = ['FileName', 'PM Risk']
Submit_data.head()

for i in range(len(Submit_data)):
    Submit_data.iloc[i, 0] = Submit_data.iloc[i, 0][-9:]
Submit_data.head()

# Submit_data.to_csv('PALM_PaddleX_10/Classification_Results.csv', index=False, float_format="%.1f")
Submit_data.to_csv('Yifu/Classification_Results.csv', index=False, float_format="%.1f")