Imports

In [1]:
import argparse
import gc

import joblib
import numpy as np
import pandas as pd
import torch
import wandb
import yaml
import sklearn.metrics
from pytorch_lightning import seed_everything
from scipy.optimize import minimize
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
from tqdm import tqdm

config

In [2]:
with open("./configs/exp296.yaml", encoding="utf-8") as f:
    cfg1 = yaml.safe_load(f)

with open("./configs/exp302.yaml", encoding="utf-8") as f:
    cfg2 = yaml.safe_load(f)
    
with open("./configs/exp288.yaml", encoding="utf-8") as f:
    cfg3 = yaml.safe_load(f)
    
with open("./configs/exp305.yaml", encoding="utf-8") as f:
    cfg4 = yaml.safe_load(f)

Data Preparation

In [3]:
# read csv
train = pd.read_csv(f"../../../data/input/train.csv")
    
# for auc targets
aux_target = ["site_id", "laterality", "view", "implant", "biopsy", "invasive", "BIRADS", "density", "difficult_negative_case", "age"]
train.age.fillna(train.age.mean(), inplace=True)
train["age"] = pd.qcut(train.age, 10, labels=range(10), retbins=False).astype(int)
train[aux_target] = train[aux_target].apply(LabelEncoder().fit_transform)

train_X = train.drop("cancer", axis=1)
train_y = train["cancer"]

function

In [4]:
def pfbeta(labels, predictions, beta=1):
    y_true_count = 0
    ctp = 0
    cfp = 0

    for idx in range(len(labels)):
        prediction = min(max(predictions[idx], 0), 1)
        if labels[idx]:
            y_true_count += 1
            ctp += prediction
        else:
            cfp += prediction

    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp + 1e-7)
    c_recall = ctp / (y_true_count + 1e-7)
    if c_precision > 0 and c_recall > 0:
        result = (
            (1 + beta_squared)
            * (c_precision * c_recall)
            / (beta_squared * c_precision + c_recall + 1e-7)
        )
        return result
    else:
        return 0
    
def optimal_f1(labels, predictions):
    thres = np.arange(0, 1, 0.01)
    f1s = [pfbeta(labels, predictions > thr) for thr in thres]
    idx = np.argmax(f1s)
    return f1s[idx], thres[idx]

optimize weights only

In [11]:
cfg_list = [cfg1, cfg2, cfg3]

In [12]:
optimized_pfbeta_score_list = []
auc_score_list = []
recall_list = []
specificity_list = []
precision_list = []
threshold_list = []

fold_list = range(cfg1["general"]["n_splits"])
weights_list = []
sgkf = StratifiedGroupKFold(
    n_splits=cfg1["general"]["n_splits"],
    shuffle=True,
    random_state=cfg1["general"]["seed"],
)
for j, fold_n in enumerate(fold_list):
    preds_list = []
    _, valid_indices = list(
        sgkf.split(train_X, train_y, groups=train_X["patient_id"]) #train_X["stratify"].values
    )[fold_n]
    valid_X_cv, valid_y_cv = (
        train_X.iloc[valid_indices].reset_index(drop=True),
        train_y.iloc[valid_indices].reset_index(drop=True),
    )
    
    # retrieve anomaly data
    if not valid_X_cv[valid_X_cv["patient_id"]==27770].empty:
        drop_idx = valid_X_cv[valid_X_cv["patient_id"]==27770].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)
    if not valid_X_cv[valid_X_cv["image_id"]==1942326353].empty:
        drop_idx = valid_X_cv[valid_X_cv["image_id"]==1942326353].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)

    for i, cfg in enumerate(cfg_list):
        valid_preds = joblib.load(f"../../data/output/preds/valid_{cfg['general']['seed']}_{cfg['general']['save_name']}_{fold_n}.preds")
        preds_list.append(valid_preds)
        optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
        print(optimized_pfbeta_score)
        del valid_preds
        gc.collect()
        
    print(f"simple_mean")
    valid_preds = np.mean(preds_list, axis=0)
    print(f"[fold_{fold_n}]")
    pfbeta_score = pfbeta(valid_y_cv.values.flatten(), valid_preds.flatten())
    optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
    auc_score = roc_auc_score(valid_y_cv.values.flatten(), valid_preds.flatten())
    recall = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    specificity = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    print(f"[per image], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_1}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation max
    valid_X_cv["true_target"] = valid_y_cv
    valid_X_cv["preds_target"] = valid_preds
    valid_X_cv_1 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).max().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_2 = optimal_f1(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    specificity = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    print(f"[per patient laterality max], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_2}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean
    valid_X_cv_2 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_3 = optimal_f1(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    specificity = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    print(f"[per patient laterality mean], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_3}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean top3
    valid_X_cv_3 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(3).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_4 = optimal_f1(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    specificity = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    print(f"[per patient laterality mean top3], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_4}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    optimized_pfbeta_score_list.append(optimized_pfbeta_score)
    auc_score_list.append(auc_score)
    recall_list.append(recall)
    specificity_list.append(specificity)
    precision_list.append(precision)
    threshold_list.append(threshold_4)
    # aggregation mean top2
    valid_X_cv_4 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(2).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_5 = optimal_f1(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    specificity = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    print(f"[per patient laterality mean top2], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_5}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")

    """
    def f(x):
        pred = 0
        for i, p in enumerate(preds_list):
            pred += p * x[i]
        score, thr = optimal_f1(valid_y_cv.values.flatten(), pred.flatten())
        return score * -1.0

    init_state = np.ones((len(preds_list))) / len(preds_list)
    bounds = [(0.0, 1.0)] * len(preds_list)
    result = minimize(f, init_state, method="Nelder-Mead", bounds=bounds)
    print(f"optimized_:{-result['fun']}")

    weights = [0] * len(preds_list)
    for i in range(len(preds_list)):
        weights[i] = result["x"][i]
    weights_list.append(weights)
    print(f"weights:{weights}")
    """

"""
avg_weights = np.mean(weights_list, axis=0)
print(f"averaged_weights:{avg_weights}")
"""
print("optimized_pfbeta_score", np.mean(optimized_pfbeta_score_list, axis=0))
print("auc_score", np.mean(auc_score_list, axis=0))
print("recall", np.mean(recall_list, axis=0))
print("specificity", np.mean(specificity_list, axis=0))
print("precision", np.mean(precision_list, axis=0))
print("threshold", np.mean(threshold_list, axis=0))

0.3719007765692301
0.4089775103090206
0.379232456982717
simple_mean
[fold_0]
[per image], pfbeta:0.22741132488858545, AUC:0.8670627354523566
optimize: 0.4339622165094391, threshold:0.59
optimize recall 0.35384615384615387
optimize specificity 0.9946083570465778
optimize precision 0.5609756097560976
[per patient laterality max], pfbeta:0.21311724592276293, AUC:0.9116245540296173
optimize: 0.5029239308231632, threshold:0.79
optimize recall 0.38392857142857145
optimize specificity 0.9972630858706808
optimize precision 0.7288135593220338
[per patient laterality mean], pfbeta:0.23316902500192502, AUC:0.9148998399393968
optimize: 0.4951455809689936, threshold:0.42
optimize recall 0.45535714285714285
optimize specificity 0.9926445432774547
optimize precision 0.5425531914893617
[per patient laterality mean top3], pfbeta:0.23376071481176944, AUC:0.9154985399051854
optimize: 0.5024154087890083, threshold:0.42
optimize recall 0.4642857142857143
optimize specificity 0.9926445432774547
optimize pre

In [13]:
cfg_list = [cfg1]

optimized_pfbeta_score_list = []
auc_score_list = []
recall_list = []
specificity_list = []
precision_list = []
threshold_list = []

fold_list = range(cfg1["general"]["n_splits"])
weights_list = []
sgkf = StratifiedGroupKFold(
    n_splits=cfg1["general"]["n_splits"],
    shuffle=True,
    random_state=cfg1["general"]["seed"],
)
for j, fold_n in enumerate(fold_list):
    preds_list = []
    _, valid_indices = list(
        sgkf.split(train_X, train_y, groups=train_X["patient_id"]) #train_X["stratify"].values
    )[fold_n]
    valid_X_cv, valid_y_cv = (
        train_X.iloc[valid_indices].reset_index(drop=True),
        train_y.iloc[valid_indices].reset_index(drop=True),
    )
    
    # retrieve anomaly data
    if not valid_X_cv[valid_X_cv["patient_id"]==27770].empty:
        drop_idx = valid_X_cv[valid_X_cv["patient_id"]==27770].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)
    if not valid_X_cv[valid_X_cv["image_id"]==1942326353].empty:
        drop_idx = valid_X_cv[valid_X_cv["image_id"]==1942326353].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)

    for i, cfg in enumerate(cfg_list):
        valid_preds = joblib.load(f"../../data/output/preds/valid_{cfg['general']['seed']}_{cfg['general']['save_name']}_{fold_n}.preds")
        preds_list.append(valid_preds)
        optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
        print(optimized_pfbeta_score)
        del valid_preds
        gc.collect()
        
    print(f"simple_mean")
    valid_preds = np.mean(preds_list, axis=0)
    print(f"[fold_{fold_n}]")
    pfbeta_score = pfbeta(valid_y_cv.values.flatten(), valid_preds.flatten())
    optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
    auc_score = roc_auc_score(valid_y_cv.values.flatten(), valid_preds.flatten())
    recall = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    specificity = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    print(f"[per image], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_1}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation max
    valid_X_cv["true_target"] = valid_y_cv
    valid_X_cv["preds_target"] = valid_preds
    valid_X_cv_1 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).max().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_2 = optimal_f1(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    specificity = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    print(f"[per patient laterality max], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_2}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean
    valid_X_cv_2 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_3 = optimal_f1(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    specificity = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    print(f"[per patient laterality mean], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_3}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean top3
    valid_X_cv_3 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(3).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_4 = optimal_f1(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    specificity = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    print(f"[per patient laterality mean top3], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_4}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    optimized_pfbeta_score_list.append(optimized_pfbeta_score)
    auc_score_list.append(auc_score)
    recall_list.append(recall)
    specificity_list.append(specificity)
    precision_list.append(precision)
    threshold_list.append(threshold_4)
    # aggregation mean top2
    valid_X_cv_4 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(2).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_5 = optimal_f1(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    specificity = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    print(f"[per patient laterality mean top2], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_5}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")

    """
    def f(x):
        pred = 0
        for i, p in enumerate(preds_list):
            pred += p * x[i]
        score, thr = optimal_f1(valid_y_cv.values.flatten(), pred.flatten())
        return score * -1.0

    init_state = np.ones((len(preds_list))) / len(preds_list)
    bounds = [(0.0, 1.0)] * len(preds_list)
    result = minimize(f, init_state, method="Nelder-Mead", bounds=bounds)
    print(f"optimized_:{-result['fun']}")

    weights = [0] * len(preds_list)
    for i in range(len(preds_list)):
        weights[i] = result["x"][i]
    weights_list.append(weights)
    print(f"weights:{weights}")
    """

"""
avg_weights = np.mean(weights_list, axis=0)
print(f"averaged_weights:{avg_weights}")
"""
print("optimized_pfbeta_score", np.mean(optimized_pfbeta_score_list, axis=0))
print("auc_score", np.mean(auc_score_list, axis=0))
print("recall", np.mean(recall_list, axis=0))
print("specificity", np.mean(specificity_list, axis=0))
print("precision", np.mean(precision_list, axis=0))
print("threshold", np.mean(threshold_list, axis=0))

0.3719007765692301
simple_mean
[fold_0]
[per image], pfbeta:0.22885598488076966, AUC:0.8353213096623311
optimize: 0.3719007765692301, threshold:0.6900000000000001
optimize recall 0.34615384615384615
optimize specificity 0.989965553392242
optimize precision 0.4017857142857143
[per patient laterality max], pfbeta:0.20884187291051787, AUC:0.8936650823517912
optimize: 0.44067791912924625, threshold:0.92
optimize recall 0.3482142857142857
optimize specificity 0.9955525145398563
optimize precision 0.6
[per patient laterality mean], pfbeta:0.23504020653503166, AUC:0.8976749975563267
optimize: 0.47398839310368307, threshold:0.5700000000000001
optimize recall 0.36607142857142855
optimize specificity 0.996578857338351
optimize precision 0.6721311475409836
[per patient laterality mean top3], pfbeta:0.23502937313698785, AUC:0.8984875189384682
optimize: 0.471264321403096, threshold:0.5700000000000001
optimize recall 0.36607142857142855
optimize specificity 0.9964078002052685
optimize precision 0.66

In [14]:
cfg_list = [cfg2]

optimized_pfbeta_score_list = []
auc_score_list = []
recall_list = []
specificity_list = []
precision_list = []
threshold_list = []

fold_list = range(cfg1["general"]["n_splits"])
weights_list = []
sgkf = StratifiedGroupKFold(
    n_splits=cfg1["general"]["n_splits"],
    shuffle=True,
    random_state=cfg1["general"]["seed"],
)
for j, fold_n in enumerate(fold_list):
    preds_list = []
    _, valid_indices = list(
        sgkf.split(train_X, train_y, groups=train_X["patient_id"]) #train_X["stratify"].values
    )[fold_n]
    valid_X_cv, valid_y_cv = (
        train_X.iloc[valid_indices].reset_index(drop=True),
        train_y.iloc[valid_indices].reset_index(drop=True),
    )
    
    # retrieve anomaly data
    if not valid_X_cv[valid_X_cv["patient_id"]==27770].empty:
        drop_idx = valid_X_cv[valid_X_cv["patient_id"]==27770].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)
    if not valid_X_cv[valid_X_cv["image_id"]==1942326353].empty:
        drop_idx = valid_X_cv[valid_X_cv["image_id"]==1942326353].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)

    for i, cfg in enumerate(cfg_list):
        valid_preds = joblib.load(f"../../data/output/preds/valid_{cfg['general']['seed']}_{cfg['general']['save_name']}_{fold_n}.preds")
        preds_list.append(valid_preds)
        optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
        print(optimized_pfbeta_score)
        del valid_preds
        gc.collect()
        
    print(f"simple_mean")
    valid_preds = np.mean(preds_list, axis=0)
    print(f"[fold_{fold_n}]")
    pfbeta_score = pfbeta(valid_y_cv.values.flatten(), valid_preds.flatten())
    optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
    auc_score = roc_auc_score(valid_y_cv.values.flatten(), valid_preds.flatten())
    recall = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    specificity = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    print(f"[per image], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_1}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation max
    valid_X_cv["true_target"] = valid_y_cv
    valid_X_cv["preds_target"] = valid_preds
    valid_X_cv_1 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).max().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_2 = optimal_f1(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    specificity = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    print(f"[per patient laterality max], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_2}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean
    valid_X_cv_2 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_3 = optimal_f1(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    specificity = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    print(f"[per patient laterality mean], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_3}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean top3
    valid_X_cv_3 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(3).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_4 = optimal_f1(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    specificity = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    print(f"[per patient laterality mean top3], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_4}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    optimized_pfbeta_score_list.append(optimized_pfbeta_score)
    auc_score_list.append(auc_score)
    recall_list.append(recall)
    specificity_list.append(specificity)
    precision_list.append(precision)
    threshold_list.append(threshold_4)
    # aggregation mean top2
    valid_X_cv_4 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(2).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_5 = optimal_f1(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    specificity = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    print(f"[per patient laterality mean top2], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_5}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")

    """
    def f(x):
        pred = 0
        for i, p in enumerate(preds_list):
            pred += p * x[i]
        score, thr = optimal_f1(valid_y_cv.values.flatten(), pred.flatten())
        return score * -1.0

    init_state = np.ones((len(preds_list))) / len(preds_list)
    bounds = [(0.0, 1.0)] * len(preds_list)
    result = minimize(f, init_state, method="Nelder-Mead", bounds=bounds)
    print(f"optimized_:{-result['fun']}")

    weights = [0] * len(preds_list)
    for i in range(len(preds_list)):
        weights[i] = result["x"][i]
    weights_list.append(weights)
    print(f"weights:{weights}")
    """

"""
avg_weights = np.mean(weights_list, axis=0)
print(f"averaged_weights:{avg_weights}")
"""
print("optimized_pfbeta_score", np.mean(optimized_pfbeta_score_list, axis=0))
print("auc_score", np.mean(auc_score_list, axis=0))
print("recall", np.mean(recall_list, axis=0))
print("specificity", np.mean(specificity_list, axis=0))
print("precision", np.mean(precision_list, axis=0))
print("threshold", np.mean(threshold_list, axis=0))

0.4089775103090206
simple_mean
[fold_0]
[per image], pfbeta:0.2614931608244036, AUC:0.8390977638506469
optimize: 0.4089775103090206, threshold:0.88
optimize recall 0.3153846153846154
optimize specificity 0.9955818481353902
optimize precision 0.5815602836879432
[per patient laterality max], pfbeta:0.2425098332531516, AUC:0.8940278151116758
optimize: 0.4727272223140549, threshold:0.88
optimize recall 0.4642857142857143
optimize specificity 0.9904208005473828
optimize precision 0.48148148148148145
[per patient laterality mean], pfbeta:0.27159718924848797, AUC:0.8927173952885978
optimize: 0.49999994968850614, threshold:0.49
optimize recall 0.4732142857142857
optimize specificity 0.9919603147451249
optimize precision 0.53
[per patient laterality mean top3], pfbeta:0.2720428122000737, AUC:0.89348180685206
optimize: 0.49765253182570107, threshold:0.49
optimize recall 0.4732142857142857
optimize specificity 0.9917892576120424
optimize precision 0.5247524752475248
[per patient laterality mean t

In [15]:
cfg_list = [cfg3]

optimized_pfbeta_score_list = []
auc_score_list = []
recall_list = []
specificity_list = []
precision_list = []
threshold_list = []

fold_list = range(cfg1["general"]["n_splits"])
weights_list = []
sgkf = StratifiedGroupKFold(
    n_splits=cfg1["general"]["n_splits"],
    shuffle=True,
    random_state=cfg1["general"]["seed"],
)
for j, fold_n in enumerate(fold_list):
    preds_list = []
    _, valid_indices = list(
        sgkf.split(train_X, train_y, groups=train_X["patient_id"]) #train_X["stratify"].values
    )[fold_n]
    valid_X_cv, valid_y_cv = (
        train_X.iloc[valid_indices].reset_index(drop=True),
        train_y.iloc[valid_indices].reset_index(drop=True),
    )
    
    # retrieve anomaly data
    if not valid_X_cv[valid_X_cv["patient_id"]==27770].empty:
        drop_idx = valid_X_cv[valid_X_cv["patient_id"]==27770].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)
    if not valid_X_cv[valid_X_cv["image_id"]==1942326353].empty:
        drop_idx = valid_X_cv[valid_X_cv["image_id"]==1942326353].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)

    for i, cfg in enumerate(cfg_list):
        valid_preds = joblib.load(f"../../data/output/preds/valid_{cfg['general']['seed']}_{cfg['general']['save_name']}_{fold_n}.preds")
        preds_list.append(valid_preds)
        optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
        print(optimized_pfbeta_score)
        del valid_preds
        gc.collect()
        
    print(f"simple_mean")
    valid_preds = np.mean(preds_list, axis=0)
    print(f"[fold_{fold_n}]")
    pfbeta_score = pfbeta(valid_y_cv.values.flatten(), valid_preds.flatten())
    optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
    auc_score = roc_auc_score(valid_y_cv.values.flatten(), valid_preds.flatten())
    recall = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    specificity = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    print(f"[per image], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_1}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation max
    valid_X_cv["true_target"] = valid_y_cv
    valid_X_cv["preds_target"] = valid_preds
    valid_X_cv_1 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).max().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_2 = optimal_f1(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    specificity = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    print(f"[per patient laterality max], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_2}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean
    valid_X_cv_2 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_3 = optimal_f1(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    specificity = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    print(f"[per patient laterality mean], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_3}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean top3
    valid_X_cv_3 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(3).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_4 = optimal_f1(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    specificity = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    print(f"[per patient laterality mean top3], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_4}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    optimized_pfbeta_score_list.append(optimized_pfbeta_score)
    auc_score_list.append(auc_score)
    recall_list.append(recall)
    specificity_list.append(specificity)
    precision_list.append(precision)
    threshold_list.append(threshold_4)
    # aggregation mean top2
    valid_X_cv_4 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(2).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_5 = optimal_f1(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    specificity = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    print(f"[per patient laterality mean top2], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_5}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")

    """
    def f(x):
        pred = 0
        for i, p in enumerate(preds_list):
            pred += p * x[i]
        score, thr = optimal_f1(valid_y_cv.values.flatten(), pred.flatten())
        return score * -1.0

    init_state = np.ones((len(preds_list))) / len(preds_list)
    bounds = [(0.0, 1.0)] * len(preds_list)
    result = minimize(f, init_state, method="Nelder-Mead", bounds=bounds)
    print(f"optimized_:{-result['fun']}")

    weights = [0] * len(preds_list)
    for i in range(len(preds_list)):
        weights[i] = result["x"][i]
    weights_list.append(weights)
    print(f"weights:{weights}")
    """

"""
avg_weights = np.mean(weights_list, axis=0)
print(f"averaged_weights:{avg_weights}")
"""
print("optimized_pfbeta_score", np.mean(optimized_pfbeta_score_list, axis=0))
print("auc_score", np.mean(auc_score_list, axis=0))
print("recall", np.mean(recall_list, axis=0))
print("specificity", np.mean(specificity_list, axis=0))
print("precision", np.mean(precision_list, axis=0))
print("threshold", np.mean(threshold_list, axis=0))

0.379232456982717
simple_mean
[fold_0]
[per image], pfbeta:0.19833015567801507, AUC:0.8491673483024389
optimize: 0.379232456982717, threshold:0.7000000000000001
optimize recall 0.3230769230769231
optimize specificity 0.9925864909390445
optimize precision 0.45901639344262296
[per patient laterality max], pfbeta:0.18540966670198733, AUC:0.8881385623869802
optimize: 0.4390243464009561, threshold:0.89
optimize recall 0.32142857142857145
optimize specificity 0.9972630858706808
optimize precision 0.6923076923076923
[per patient laterality mean], pfbeta:0.20024016333282862, AUC:0.8931885660524901
optimize: 0.4485980805310563, threshold:0.44
optimize recall 0.42857142857142855
optimize specificity 0.9907629148135477
optimize precision 0.47058823529411764
[per patient laterality mean top3], pfbeta:0.20141135335057408, AUC:0.8934489699916915
optimize: 0.44651157757923765, threshold:0.44
optimize recall 0.42857142857142855
optimize specificity 0.9905918576804653
optimize precision 0.4660194174757

In [None]:
cfg_list = [cfg1, cfg2]

In [None]:
optimized_pfbeta_score_list = []
auc_score_list = []
recall_list = []
specificity_list = []
precision_list = []
threshold_list = []

fold_list = range(cfg1["general"]["n_splits"])
weights_list = []
sgkf = StratifiedGroupKFold(
    n_splits=cfg1["general"]["n_splits"],
    shuffle=True,
    random_state=cfg1["general"]["seed"],
)
for j, fold_n in enumerate(fold_list):
    preds_list = []
    _, valid_indices = list(
        sgkf.split(train_X, train_y, groups=train_X["patient_id"]) #train_X["stratify"].values
    )[fold_n]
    valid_X_cv, valid_y_cv = (
        train_X.iloc[valid_indices].reset_index(drop=True),
        train_y.iloc[valid_indices].reset_index(drop=True),
    )
    
    # retrieve anomaly data
    if not valid_X_cv[valid_X_cv["patient_id"]==27770].empty:
        drop_idx = valid_X_cv[valid_X_cv["patient_id"]==27770].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)
    if not valid_X_cv[valid_X_cv["image_id"]==1942326353].empty:
        drop_idx = valid_X_cv[valid_X_cv["image_id"]==1942326353].index
        valid_X_cv = valid_X_cv.drop(valid_X_cv.index[drop_idx]).reset_index(drop=True)
        valid_y_cv = valid_y_cv.drop(valid_y_cv.index[drop_idx]).reset_index(drop=True)

    for i, cfg in enumerate(cfg_list):
        valid_preds = joblib.load(f"../../data/output/preds/valid_{cfg['general']['seed']}_{cfg['general']['save_name']}_{fold_n}.preds")
        preds_list.append(valid_preds)
        optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
        print(optimized_pfbeta_score)
        del valid_preds
        gc.collect()
        
    print(f"simple_mean")
    valid_preds = np.mean(preds_list, axis=0)
    print(f"[fold_{fold_n}]")
    pfbeta_score = pfbeta(valid_y_cv.values.flatten(), valid_preds.flatten())
    optimized_pfbeta_score, threshold_1 = optimal_f1(valid_y_cv.values.flatten(), valid_preds.flatten())
    auc_score = roc_auc_score(valid_y_cv.values.flatten(), valid_preds.flatten())
    recall = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    specificity = sklearn.metrics.recall_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_y_cv.values.flatten(), valid_preds.flatten() > threshold_1)
    print(f"[per image], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_1}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation max
    valid_X_cv["true_target"] = valid_y_cv
    valid_X_cv["preds_target"] = valid_preds
    valid_X_cv_1 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).max().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_2 = optimal_f1(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    specificity = sklearn.metrics.recall_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_1["true_target"].values.flatten(), valid_X_cv_1["preds_target"].values.flatten() > threshold_2)
    print(f"[per patient laterality max], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_2}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean
    valid_X_cv_2 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_3 = optimal_f1(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    specificity = sklearn.metrics.recall_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_2["true_target"].values.flatten(), valid_X_cv_2["preds_target"].values.flatten() > threshold_3)
    print(f"[per patient laterality mean], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_3}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    # aggregation mean top3
    valid_X_cv_3 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(3).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_4 = optimal_f1(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    specificity = sklearn.metrics.recall_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_3["true_target"].values.flatten(), valid_X_cv_3["preds_target"].values.flatten() > threshold_4)
    print(f"[per patient laterality mean top3], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_4}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")
    optimized_pfbeta_score_list.append(optimized_pfbeta_score)
    auc_score_list.append(auc_score)
    recall_list.append(recall)
    specificity_list.append(specificity)
    precision_list.append(precision)
    threshold_list.append(threshold_4)
    # aggregation mean top2
    valid_X_cv_4 = valid_X_cv[["patient_id", "laterality", "true_target", "preds_target"]].groupby(["patient_id", "laterality"]).head(2).groupby(["patient_id", "laterality"]).mean().reset_index()
    pfbeta_score = pfbeta(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    optimized_pfbeta_score, threshold_5 = optimal_f1(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    auc_score = roc_auc_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten())
    recall = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    specificity = sklearn.metrics.recall_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5, pos_label=0)
    precision = sklearn.metrics.precision_score(valid_X_cv_4["true_target"].values.flatten(), valid_X_cv_4["preds_target"].values.flatten() > threshold_5)
    print(f"[per patient laterality mean top2], pfbeta:{pfbeta_score}, AUC:{auc_score}")
    print(f"optimize: {optimized_pfbeta_score}, threshold:{threshold_5}")
    print(f"optimize recall {recall}")
    print(f"optimize specificity {specificity}")
    print(f"optimize precision {precision}")

    """
    def f(x):
        pred = 0
        for i, p in enumerate(preds_list):
            pred += p * x[i]
        score, thr = optimal_f1(valid_y_cv.values.flatten(), pred.flatten())
        return score * -1.0

    init_state = np.ones((len(preds_list))) / len(preds_list)
    bounds = [(0.0, 1.0)] * len(preds_list)
    result = minimize(f, init_state, method="Nelder-Mead", bounds=bounds)
    print(f"optimized_:{-result['fun']}")

    weights = [0] * len(preds_list)
    for i in range(len(preds_list)):
        weights[i] = result["x"][i]
    weights_list.append(weights)
    print(f"weights:{weights}")
    """

"""
avg_weights = np.mean(weights_list, axis=0)
print(f"averaged_weights:{avg_weights}")
"""
print("optimized_pfbeta_score", np.mean(optimized_pfbeta_score_list, axis=0))
print("auc_score", np.mean(auc_score_list, axis=0))
print("recall", np.mean(recall_list, axis=0))
print("specificity", np.mean(specificity_list, axis=0))
print("precision", np.mean(precision_list, axis=0))
print("threshold", np.mean(threshold_list, axis=0))