In [1]:
from pathlib import Path

import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from isic_helper import get_folds
from isic_helper import compute_pauc, compute_auc

In [2]:
model_names = ["cb", "lgb", "lgb", "resnet18"]
versions = ["v1", "v1", "v2", "v1"]
paths = ["/kaggle/input/isic-scd-cb-train",
         "/kaggle/input/isic-scd-lgb-v1-train",
         "/kaggle/input/isic-scd-lgb-v2-train",
         "/kaggle/input/isic-scd-resnet18-train"]
target_column = "final_target"
wts = [0.26, 0.26, 0.22, 0.26]

# model_names = ["lgb", "lgb"]
# versions = ["v1", "v2"]
# paths = ["/kaggle/input/isic-scd-lgb-v1-train",
#          "/kaggle/input/isic-scd-lgb-v2-train"]
# target_column = "final_target"
# wts = [0.7, 0.3]

# model_names = ["cb", "lgb", "lgb"]
# versions = ["v1", "v1", "v2"]
# paths = ["/kaggle/input/isic-scd-cb-train",
#          "/kaggle/input/isic-scd-lgb-v1-train",
#          "/kaggle/input/isic-scd-lgb-v2-train"]
# target_column = "final_target"
# wts = [0.35, 0.35, 0.3]

# model_names = ["lgb", "resnet18"]
# versions = ["v1", "v1"]
# paths = ["/kaggle/input/isic-scd-lgb-train", "/kaggle/input/isic-scd-resnet18-train"]
# target_column = "final_target"
# wts = [0.65, 0.35]

In [3]:
subs = {}
cv_pauc_oof_avg = {}
for i in range(len(model_names)):
    model_name = model_names[i]
    version = versions[i]
    path = paths[i]
    subs[f"{model_name}_{version}"] = pd.read_csv(f"{path}/oof_preds_{model_name}_{version}.csv")
    cv_pauc_oof_avg[f"{model_name}_{version}"] = {}
cv_pauc_oof_avg["ensemble"] = {}

folds = np.sort(subs[list(subs.keys())[0]]["fold"].unique())
for fold in folds:
    fold_ens_preds = []
    for idx, (version, (model_name, df)) in enumerate(zip(versions, subs.items())):
        fold_df = df[df["fold"] == fold]
        fold_model_preds = fold_df[f"oof_{model_name}"]
        cv_pauc_oof_avg[model_name][f"fold_{fold}"] = compute_pauc(fold_df[target_column], fold_model_preds)
        fold_ens_preds.append(wts[idx] * fold_df[f"oof_{model_name}"].rank(pct=True).values)
    cv_pauc_oof_avg["ensemble"][f"fold_{fold}"] = compute_pauc(fold_df[target_column], np.sum(fold_ens_preds, axis=0))

for key in subs.keys():
    print(f"{key}:\nFold scores: {cv_pauc_oof_avg[key]}\n Avg score: {np.mean(list(cv_pauc_oof_avg[key].values()))}\n Std score: {np.std(list(cv_pauc_oof_avg[key].values()))}")
print(f"Ensemble:\nFold scores: {cv_pauc_oof_avg['ensemble']}\n Avg score: {np.mean(list(cv_pauc_oof_avg['ensemble'].values()))}\n Std score: {np.std(list(cv_pauc_oof_avg['ensemble'].values()))}")

print(wts)

cb_v1:
Fold scores: {'fold_1': 0.1575768055646264, 'fold_2': 0.16761408614928966, 'fold_3': 0.16231764236063442, 'fold_4': 0.16628848385768302, 'fold_5': 0.16223697973913237}
 Avg score: 0.16320679953427314
 Std score: 0.00353125860250401
lgb_v1:
Fold scores: {'fold_1': 0.16189363377553598, 'fold_2': 0.16460742359112585, 'fold_3': 0.1566555858531654, 'fold_4': 0.17275461431905254, 'fold_5': 0.16268287305687626}
 Avg score: 0.16371882611915117
 Std score: 0.005229320467405825
lgb_v2:
Fold scores: {'fold_1': 0.1536264925535799, 'fold_2': 0.1538011011043681, 'fold_3': 0.15813215235052597, 'fold_4': 0.1689839138410852, 'fold_5': 0.16497719976436606}
 Avg score: 0.15990417192278505
 Std score: 0.006131461649982304
resnet18_v1:
Fold scores: {'fold_1': 0.15586849727633634, 'fold_2': 0.15323922173948376, 'fold_3': 0.14612993098800708, 'fold_4': 0.15783404674844936, 'fold_5': 0.15046869195347348}
 Avg score: 0.15270807774115
 Std score: 0.00411904812453264
Ensemble:
Fold scores: {'fold_1': 0.16