In [1]:

import os
import numpy as np
import cv2
# import matplotlib
# matplotlib.use('agg')
# import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import auc

import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.vision.models import resnet50, resnet101

from torchvision import transforms as trans # fro torchvision
import albumentations as trans # for albumentations
import PIL

import warnings
warnings.filterwarnings('ignore')

In [2]:
from PIL import Image
def numpy_to_pil(array):
    return Image.fromarray(array)


In [3]:
import torchvision.transforms as trans

batchsize = 8 
image_size = 256
iters = 1000 
val_ratio = 0.2 # 80 / 20
trainset_root = "../data-PALM/Training/Images"
val_root = "../data-PALM/Training/Images"
num_workers = 4
init_lr = 1e-6
optimizer_type = "adam"


filelists = os.listdir(trainset_root)
train_filelists, val_filelists = train_test_split(filelists, test_size=val_ratio, random_state=42)
print("Total Nums: {}, train: {}, val: {}".format(len(filelists), len(train_filelists), len(val_filelists)))


class GOALS_sub2_dataset(paddle.io.Dataset):
    def __init__(self,
                img_transforms,
                dataset_root,
                label_file='',
                filelists=None,
                numclasses=2,
                mode='train'):
        self.dataset_root = dataset_root
        self.img_transforms = img_transforms
        self.mode = mode.lower()
        self.num_classes = numclasses

        if self.mode == 'train':
            label = {row['imgName']:row[1]
                    for _, row in pd.read_excel(label_file).iterrows()}            
            self.file_list = [[f, label[f]] for f in os.listdir(dataset_root)]


        elif self.mode == "test":
            self.file_list = [[f, None] for f in os.listdir(dataset_root)]
        
        if filelists is not None:
            self.file_list = [item for item in self.file_list if item[0] in filelists]
    
    def __getitem__(self, idx):

        real_index, label = self.file_list[idx]
        img_path = os.path.join(self.dataset_root, real_index)    
        img = cv2.imread(img_path)

        # Conversion: If img is a NumPy array, convert to PIL Image
        if isinstance(img, np.ndarray):
        img = Image.fromarray(img)

        if self.img_transforms is not None:
            img = self.img_transforms(img)

        # Convert PIL Image to Tensor
        img = transforms.ToTensor()(img) 
            
        if self.__getitem__ is not None:
            img = self.img_transforms(img)
            #print("Image transformed:", img)  # Add this line

        
        
 
        # normlize on GPU to save CPU Memory and IO consuming.
        # img = (img / 255.).astype("float32")

        img = img.transpose(PIL.Image.TRANSPOSE) # H, W, C -> C, H, W

        if self.mode == 'test':
            return img, real_index

        if self.mode == "train":            
            return img, label

    def __len__(self):
        return len(self.file_list)


img_train_transforms = trans.Compose([
    trans.RandomResizedCrop(
        image_size, scale=(0.90, 1.1), ratio=(0.90, 1.1)),
    trans.RandomHorizontalFlip(),
    trans.RandomVerticalFlip(),
    trans.RandomRotation(30)
])

img_val_transforms = trans.Compose([
    trans.CenterCrop(image_size),
    trans.Resize((image_size, image_size))
])



class Model(nn.Layer):
    def __init__(self):
        super(Model, self).__init__()
        self.feature = resnet50(pretrained=True, num_classes=2) # 移除最后一层全连接
        # self.feature = resnet101(pretrained=True, num_classes=2) # 移除最后一层全连接
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, 2)

    def forward(self, img):
        feature = self. feature(img)
        out1 = self.fc1(feature)
        logit = self.fc2(out1)

        return logit



def train(model, iters, train_dataloader, val_dataloader, optimizer, criterion, log_interval, eval_interval):
    iter = 0
    model.train()
    avg_loss_list = []
    avg_acc_list = []
    best_acc = 0.
    while iter < iters:
        for data in train_dataloader:
            iter += 1
            if iter > iters:
                break
            imgs = (data[0] / 255.).astype("float32")
            labels = data[1].astype('int64')
            # print(labels)
            labels_ = paddle.unsqueeze(labels, axis=1)
            logits = model(imgs) 
            m = paddle.nn.Softmax()
            pred = m(logits)
            # print(pred.numpy())
            # print(pred.numpy().argmax(1))            
            acc = paddle.metric.accuracy(input=pred, label=labels_)
            one_hot_labels = paddle.fluid.layers.one_hot(labels_, 2, allow_out_of_range=False)
            loss = criterion(pred, one_hot_labels)            
            # print(loss.numpy())
            loss.backward()
            optimizer.step()

            model.clear_gradients()
            avg_loss_list.append(loss.numpy()[0])
            avg_acc_list.append(acc.numpy())
            

            if iter % log_interval == 0:
                avg_loss = np.array(avg_loss_list).mean()
                avg_acc = np.array(avg_acc_list).mean()
                avg_loss_list = []
                avg_acc_list = []
                print("[TRAIN] iter={}/{} avg_loss={:.4f} avg_acc={:.4f}".format(iter, iters, avg_loss, avg_acc))

            if iter % eval_interval == 0:
                avg_loss, avg_acc = val(model, val_dataloader, criterion)
                print("[EVAL] iter={}/{} avg_loss={:.4f} acc={:.4f}".format(iter, iters, avg_loss, avg_acc))
                if avg_acc >= best_acc:
                    best_acc = avg_acc
                    paddle.save(model.state_dict(),
                            os.path.join(".../classification/best_model_{:.4f}".format(best_acc), 'model.pdparams'))
                model.train()

def val(model, val_dataloader, criterion):
    model.eval()
    avg_loss_list = []
    avg_acc_list = []
    cache = []
    with paddle.no_grad():
        for data in val_dataloader:
            imgs = (data[0] / 255.).astype("float32")
            labels = data[1].astype('int64')
            labels_ = paddle.unsqueeze(labels, axis=1)
            logits = model(imgs)
            m = paddle.nn.Softmax()
            pred = m(logits)            
            acc = paddle.metric.accuracy(input=pred, label=labels_)
            one_hot_labels = paddle.fluid.layers.one_hot(labels_, 2, allow_out_of_range=False)
            loss = criterion(pred, one_hot_labels) 
            avg_loss_list.append(loss.numpy()[0])
            avg_acc_list.append(acc.numpy())        

    avg_loss = np.array(avg_loss_list).mean()
    acc = np.array(avg_acc_list).mean()

    return avg_loss, acc


img_train_transforms = trans.Compose([
    trans.RandomResizedCrop(
        image_size, scale=(0.90, 1.1), ratio=(0.90, 1.1)),
    trans.RandomHorizontalFlip(),
    trans.RandomVerticalFlip(),
    trans.RandomRotation(30)
])


img_val_transforms = trans.Compose([
    trans.CenterCrop(image_size),
    trans.Resize((image_size, image_size))
])

train_dataset = GOALS_sub2_dataset(dataset_root=trainset_root, 
                        img_transforms=img_train_transforms,
                        filelists=train_filelists,
                        label_file='../data-PALM/Training/Classification Labels.xlsx')

val_dataset = GOALS_sub2_dataset(dataset_root=trainset_root, 
                        img_transforms=img_val_transforms,
                        filelists=val_filelists,
                        label_file='../data-PALM/Training/Classification Labels.xlsx')

train_loader = paddle.io.DataLoader(
    train_dataset,
    batch_sampler=paddle.io.DistributedBatchSampler(train_dataset, batch_size=batchsize, shuffle=True, drop_last=False),
    num_workers=num_workers,
    return_list=True,
    use_shared_memory=False
)

val_loader = paddle.io.DataLoader(
    val_dataset,
    batch_sampler=paddle.io.DistributedBatchSampler(val_dataset, batch_size=batchsize, shuffle=True, drop_last=False),
    num_workers=num_workers,
    return_list=True,
    use_shared_memory=False
)

model = Model()

if optimizer_type == "adam":
    optimizer = paddle.optimizer.Adam(init_lr, parameters=model.parameters())

criterion = nn.BCEWithLogitsLoss()
train(model, iters, train_loader, val_loader, optimizer, criterion, log_interval=10, eval_interval=100)



# 预测阶段
best_model_path = './classification/best_model_0.9875/model.pdparams'
model = Model()
para_state_dict = paddle.load(best_model_path)
model.set_state_dict(para_state_dict)
model.eval()

test_root = "../data-PALM/Validation/Images"
img_test_transforms = trans.Compose([
    trans.CropCenterSquare(),
    trans.Resize((image_size, image_size))
])

test_dataset = GOALS_sub2_dataset(dataset_root=test_root, 
                        img_transforms=img_test_transforms,
                        mode='test')
cache = []
for img, idx in test_dataset:
    img = img[np.newaxis, ...]
    img = paddle.to_tensor((img / 255.).astype("float32"))
    logits = model(img) 
    m = paddle.nn.Softmax()
    pred = m(logits)
    print(pred.numpy())
    cache.append([idx, pred.numpy()[0][1]])

submission_result = pd.DataFrame(cache, columns=['imgName', 'GC_Pred'])
submission_result[['imgName', 'GC_Pred']].to_csv("./results/submission_val.csv", index=False)

Total Nums: 400, train: 320, val: 80


Exception in thread Thread-4 (_thread_loop):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/home/nate/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 761, in run_closure
    _threading_Thread_run(self)
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/home/nate/.local/lib/python3.10/site-packages/paddle/io/dataloader/dataloader_iter.py", line 603, in _thread_loop
    batch = self._get_data()
  File "/home/nate/.local/lib/python3.10/site-packages/paddle/io/dataloader/dataloader_iter.py", line 751, in _get_data
    batch.reraise()
  File "/home/nate/.local/lib/python3.10/site-packages/paddle/io/dataloader/worker.py", line 187, in reraise
    raise self.exc_type(msg)
TypeError: DataLoader worker(1) caught TypeError with message:
Traceback (most recent call last):
  File "/home/nate/.local/lib/python3.10/site-packages/paddle

SystemError: (Fatal) Blocking queue is killed because the data reader raises an exception.
  [Hint: Expected killed_ != true, but received killed_:1 == true:1.] (at /paddle/paddle/fluid/operators/reader/blocking_queue.h:175)
