<a href="https://colab.research.google.com/github/njucs/med/blob/master/PM/PALM_PaddleX_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## PaddleX配置

### paddlex安装

In [None]:
# 查看CUDA版本
!nvcc --version

In [None]:
# install PaddlePaddle-GPU
!python -m pip install paddlepaddle-gpu==2.2.2 -i https://mirror.baidu.com/pypi/simple

In [None]:
!pip install paddlex==1.3.11 -i https://mirror.baidu.com/pypi/simple

### GPU设置、包引入

In [None]:
# 设置使用0号GPU卡（如无GPU，执行此代码后仍然会使用CPU训练模型）
import matplotlib
matplotlib.use('Agg') 
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx

## 准备数据集

### 准备数据

In [3]:
# 授权 Colab 访问 Google Drive
from google.colab import drive
drive.mount('/content/drive')

%cd drive/MyDrive/'Colab Notebooks'/Ophthalmology/PathologicMyopia/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks/Ophthalmology/PathologicMyopia


In [8]:
!ls dataset/Train | wc -w
!ls dataset/Train/fundus_image/ | wc -w
!ls dataset/PALM-Testing400-Images/ | wc -w

2
800
400


In [None]:
# download dataset from website (already done!)
!wget https://bj.bcebos.com/v1/dataset-bj/%E5%8C%BB%E7%96%97%E6%AF%94%E8%B5%9B/%E5%B8%B8%E8%A7%84%E8%B5%9B%EF%BC%9APALM%E7%9C%BC%E5%BA%95%E5%BD%A9%E7%85%A7%E8%A7%86%E7%9B%98%E6%8E%A2%E6%B5%8B%E4%B8%8E%E5%88%86%E5%89%B2.zip -O dataset.zip

### 划分数据集和测试集

In [4]:
# 划分训练集和测试集

import pandas as pd
import random

train_excel_file = 'dataset/Train/Classification.xlsx'
pd_list=pd.read_excel(train_excel_file)

pd_list_lenght=len(pd_list)
# 乱序
pd_list=pd_list.sample(frac=1)
offset=int(pd_list_lenght*0.9)
trian_list=pd_list[:offset]
eval_list=pd_list[offset:]
trian_list.to_csv("PALM_PaddleX_2/train_list.txt", index=None, header=None, sep=' ')
eval_list.to_csv("PALM_PaddleX_2/eval_list.txt", index=None, header=None, sep=' ')

### 数据增强配置

In [5]:
from paddlex.cls import transforms
train_transforms = transforms.Compose([
    transforms.RandomCrop(crop_size=1440),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize()
])
eval_transforms = transforms.Compose([
    transforms.ResizeByShort(short_size=1444),
    transforms.CenterCrop(crop_size=1440),
    transforms.Normalize()
])

### 数据集配置

In [6]:
# 这里面的labels.txt记录了可能的label是哪些
# 在本实验中，就0和1两行

train_dataset = pdx.datasets.ImageNet(
    data_dir='dataset/Train/fundus_image',
    file_list='PALM_PaddleX_2/train_list.txt',
    label_list='PALM_PaddleX_2/labels.txt',
    transforms=train_transforms,
    shuffle=True)
eval_dataset = pdx.datasets.ImageNet(
    data_dir='dataset/Train/fundus_image',
    file_list='PALM_PaddleX_2/eval_list.txt',
    label_list='PALM_PaddleX_2/labels.txt',
    transforms=eval_transforms)

2022-04-19 13:29:43 [INFO]	Starting to read file list from dataset...
2022-04-19 13:29:43 [INFO]	720 samples in file train_list.txt
2022-04-19 13:29:43 [INFO]	Starting to read file list from dataset...
2022-04-19 13:29:43 [INFO]	80 samples in file eval_list.txt


## 训练

In [None]:
model = pdx.cls.MobileNetV3_small_ssld(num_classes=2)
model.train(num_epochs=64,
            train_dataset=train_dataset,
            train_batch_size=32,
            eval_dataset=eval_dataset,
            lr_decay_epochs=[4, 6, 8],
            save_interval_epochs=1,
            learning_rate=0.025,
            save_dir='PALM_PaddleX_2/output/mobilenetv3_small_ssld',
            # resume_checkpoint='output/mobilenetv3_small_ssld/epoch_18',
            use_vdl=True)

## 预测

### 环境配置

In [None]:
# 设置使用0号GPU卡（如无GPU，执行此代码后仍然会使用CPU训练模型）
import matplotlib
matplotlib.use('Agg') 
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx

### 单张图片预测

In [None]:
# 单张预测测试
import paddlex as pdx
model = pdx.load_model('PALM_PaddleX_2/output/mobilenetv3_small_ssld/epoch_9')
image_name = 'dataset/PALM-Testing400-Images/T0001.jpg'
result = model.predict(image_name, topk=2)
print("Predict Result:", result)


image_name = 'dataset/PALM-Testing400-Images/T0002.jpg'
result = model.predict(image_name, topk=2)
print("Predict Result:", result)

### 预测数据集生成

In [None]:
# 预测数据集val_list
val_list=[]
for i in range(1,401,1):
# for i in range(1,201,1):
    filename='T'+ str(i).zfill(4)+'.jpg'
    # print(filename)
    val_list.append(filename+'\n')

with open('PALM_PaddleX_2/val_list.txt','w') as f:
    f.writelines(val_list)
    
val_list=[]
with open('PALM_PaddleX_2/val_list.txt', 'r') as f:
    for line in f:
        line='dataset/PALM-Testing400-Images/'+line
        val_list.append(line.split('\n')[0])
        # print(line.split('\n')[0])
print(len(val_list))

### 批量预测

In [None]:
import paddlex as pdx

result_list=[]
model = pdx.load_model('PALM_PaddleX_2/output/mobilenetv3_small_ssld/best_model')
for image_name in val_list:
    result = model.predict(image_name, topk=2)
    result_list.append(result)
    print("Predict Result:", result)

### 结果检查

In [None]:
item = result_list[0]
print(item)
print(item[0]['category_id'],item[0]['score'])
print(item[1]['category_id'],item[1]['score'])

### 保存结果

In [None]:
# 结果列
pd_B=[]
for item in result_list:
    # print(item)
    if item[0]['category_id']==1:
        pd_B.append(item[0]['score'])
    else:
        pd_B.append(item[1]['score'])

# 文件名列
pd_A=[]
with open('PALM_PaddleX_2/val_list.txt', 'r') as f:
    for line in f:
        pd_A.append(line.split('\n')[0])
        # print(line.split('\n')[0])
  
# 构造pandas的DataFrame
import pandas as pd
df= pd.DataFrame({'FileName': pd_A, 'PM Risk':pd_B})

# 保存为提交文件
df.to_csv("PALM_PaddleX_2/Classification_Results.csv", index=None)