In [1]:
import io
import os.path

import h5py
import numpy as np
import pandas as pd
import pandas.api.types
from PIL import Image

import torch
import torch.nn as nn
from torchvision import models, transforms

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, auc


torch.set_printoptions(precision=5, sci_mode=False)

In [2]:
class ParticipantVisibleError(Exception):
    pass


def score(solution: pd.DataFrame, submission: pd.DataFrame, min_tpr: float = 0.80) -> float:
    if not pandas.api.types.is_numeric_dtype(submission.values):
        raise ParticipantVisibleError('Submission target column must be numeric')

    v_gt = abs(np.asarray(solution.values) - 1)

    v_pred = -1.0 * np.asarray(submission.values)

    max_fpr = abs(1 - min_tpr)

    # fpr 假正例， tpr真正例（召回率）
    fpr, tpr, _ = roc_curve(v_gt, v_pred, sample_weight=None)
    if max_fpr is None or max_fpr == 1:
        return auc(fpr, tpr)
    if max_fpr <= 0 or max_fpr > 1:
        raise ValueError("Expected min_tpr in range [0, 1), got: %r" % min_tpr)

    # 从排好序的fpr(由小到大)中返回小于 max_fpr=0.2 的索引值
    stop = np.searchsorted(fpr, max_fpr, "right")
    x_interp = [fpr[stop - 1], fpr[stop]]
    y_interp = [tpr[stop - 1], tpr[stop]]
    tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp))
    fpr = np.append(fpr[:stop], max_fpr)
    partial_auc = auc(fpr, tpr)

    return partial_auc

In [3]:
class NetWork(nn.Module):
    def __init__(self, in_features, num_classes):
        super(NetWork, self).__init__()
        resnet = models.resnet34()
        resnet.load_state_dict(torch.load(r'./models/resnet34-b627a593.pth'))
        # for param in net.parameters():
        #     param.requires_grad = False
        resnet.fc = nn.Linear(512, 512)
        self.resnet = resnet
        self.layer = nn.Sequential(
            nn.Linear(in_features, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
        )
        self.merge_linear = nn.Linear(1024, num_classes)

    def forward(self, img_, meta_):
        img_output = self.resnet(img_)
        meta_output = self.layer(meta_)
        new_data = torch.concat([img_output, meta_output], dim=1)
        output = self.merge_linear(new_data)
        return output

## 加载数据

In [4]:
data_path = r"./isic-2024-challenge"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

net = NetWork(in_features=27, num_classes=2)
auc_path = r'./models/best_auc_score.pkl'
missing_keys, unexpected_keys = net.load_state_dict(
    torch.load(auc_path, map_location=torch.device('cpu'))['net_state_dict']
)

use_cols = ['isic_id', 'tbp_lv_A', 'tbp_lv_Aext', 'tbp_lv_B', 'tbp_lv_Bext', 
            'tbp_lv_C', 'tbp_lv_Cext', 'tbp_lv_H', 'tbp_lv_Hext', 'tbp_lv_L',
            'tbp_lv_Lext', 'tbp_lv_areaMM2', 'tbp_lv_area_perim_ratio', 
            'tbp_lv_color_std_mean', 'tbp_lv_deltaA', 'tbp_lv_deltaB', 
            'tbp_lv_deltaL', 'tbp_lv_deltaLB', 'tbp_lv_deltaLBnorm',
            'tbp_lv_eccentricity', 'tbp_lv_nevi_confidence', 'tbp_lv_norm_border', 
            'tbp_lv_norm_color', 'tbp_lv_radial_color_std_max', 'tbp_lv_stdL', 
            'tbp_lv_stdLExt', 'tbp_lv_symm_2axis', 'tbp_lv_symm_2axis_angle']
train_metadata = pd.read_csv(os.path.join(data_path, 'train-metadata.csv'), usecols=use_cols)
test_metadata = pd.read_csv(os.path.join(data_path, 'test-metadata.csv'), usecols=use_cols)
test_img_h5 = h5py.File(os.path.join(data_path, 'test-image.hdf5'), 'r')


In [5]:
pred_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(size=(128, 128), antialias=True),
])

train_metadata.drop('isic_id', axis=1, inplace=True)

# 用训练元数据训练StandardScaler
standard = StandardScaler()
standard.fit(train_metadata)

test_isic_id = test_metadata['isic_id']
test_metadata.drop('isic_id', axis=1, inplace=True)
test_metadata.index = test_isic_id

In [6]:
preds = []
pred_len = len(test_isic_id)
pred_batch = 128
net.eval().to(device=device)
batches = (pred_len // pred_batch) if (pred_len % pred_batch == 0) else (pred_len // pred_batch + 1)
with torch.no_grad():
    for i in range(batches):
        pred_images = []
        pred_metas = []
        si = i * pred_batch
        ei = (si + pred_batch) if (si + pred_batch) < pred_len else pred_len
        for j in range(si, ei):
            isic_id = test_isic_id[j]
            pred_img = Image.open(io.BytesIO(test_img_h5[isic_id][...]))
            pred_img = np.array(pred_img)
            pred_img = pred_transform(pred_img)[None, ...]
            pred_images.append(pred_img)

            meta = test_metadata.loc[isic_id]
            pred_meta = pd.DataFrame(meta.values.reshape(1, -1), columns=use_cols[1:])
            pred_meta = standard.transform(pred_meta)
            pred_metas.append(torch.tensor(pred_meta, dtype=torch.float32))

        batch_img = torch.concat(pred_images, dim=0).to(device=device, dtype=torch.float32)
        batch_meta = torch.concat(pred_metas, dim=0).to(device=device, dtype=torch.float32)

        pred = net(batch_img, batch_meta)
        pred_proba = torch.softmax(pred, dim=1)
#         pred_proba = torch.sigmoid(pred)
        pred_proba = pred_proba[:, 1]
        preds.append(pred_proba)
        if (i + 1) % 20 == 0:
            print(f'{i + 1}/{batches}')

In [7]:
if len(preds) > 1:
    preds = torch.concat(preds, dim=0)
else:
    preds = preds[0]
result = pd.DataFrame([test_isic_id.tolist(), preds.tolist()]).T
result.columns = ['isic_id', 'target']
print(result)
result.to_csv('submission.csv', index=False, header=True)
test_img_h5.close()

        isic_id    target
0  ISIC_0015657  0.001436
1  ISIC_0015729   0.00002
2  ISIC_0015740   0.00013
