In [1]:
import os
import pandas as pd
import numpy as np
import yaml
from tqdm import tqdm

#セットアップ
tqdm.pandas()

import sys
sys.path.append("/kaggle/src")
from kaggle_metrics.kaggle_kl_div import compute_each_score

In [2]:
model1_oof_path = "/kaggle/working/open_resnet34d/oof.csv"
model2_oof_path = "/kaggle/working/open_tf_efficientnet_b0_ns/oof.csv"
model3_oof_path = "/kaggle/working/open_tf_efficientnet_b1_ns/oof.csv"


model1_oof = pd.read_csv(model1_oof_path)
model2_oof = pd.read_csv(model2_oof_path)
model3_oof = pd.read_csv(model3_oof_path)

In [3]:
# *_voteの最大値の列名を取得
vote_cols = ["seizure_vote", "lpd_vote", "gpd_vote", "lrda_vote", "grda_vote", "other_vote"]
model1_oof["expert_consensus"] = model1_oof[vote_cols].idxmax(axis=1)
model2_oof["expert_consensus"] = model2_oof[vote_cols].idxmax(axis=1)
model3_oof["expert_consensus"] = model3_oof[vote_cols].idxmax(axis=1)

In [4]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [5]:
pred_cols = [col for col in model1_oof.columns if "pred" in col]
print(pred_cols)

['pred_seizure_vote', 'pred_lpd_vote', 'pred_gpd_vote', 'pred_lrda_vote', 'pred_grda_vote', 'pred_other_vote']


In [6]:
model1_oof[pred_cols] = model1_oof[pred_cols].progress_apply(softmax)
model1_oof[pred_cols] = model1_oof[pred_cols].progress_apply(lambda x: x / np.sum(x), axis=1)

model2_oof[pred_cols] = model2_oof[pred_cols].progress_apply(softmax)
model2_oof[pred_cols] = model2_oof[pred_cols].progress_apply(lambda x: x / np.sum(x), axis=1)

model3_oof[pred_cols] = model3_oof[pred_cols].progress_apply(softmax)
model3_oof[pred_cols] = model3_oof[pred_cols].progress_apply(lambda x: x / np.sum(x), axis=1)

100%|██████████| 6/6 [00:00<00:00, 3192.01it/s]
100%|██████████| 8873/8873 [00:00<00:00, 12386.03it/s]
100%|██████████| 6/6 [00:00<00:00, 3145.33it/s]
100%|██████████| 8873/8873 [00:00<00:00, 12848.01it/s]
100%|██████████| 6/6 [00:00<00:00, 3247.62it/s]
100%|██████████| 8873/8873 [00:00<00:00, 12954.49it/s]


In [7]:
model1_oof["score"] = model1_oof.progress_apply(compute_each_score, axis=1)
model2_oof["score"] = model2_oof.progress_apply(compute_each_score, axis=1)
model3_oof["score"] = model3_oof.progress_apply(compute_each_score, axis=1)

model1_dir = os.path.dirname(model1_oof_path)
model1_oof.to_csv(os.path.join(model1_dir, "oof_with_score.csv"), index=False)

model2_dir = os.path.dirname(model2_oof_path)
model2_oof.to_csv(os.path.join(model2_dir, "oof_with_score.csv"), index=False)

model3_dir = os.path.dirname(model3_oof_path)
model3_oof.to_csv(os.path.join(model3_dir, "oof_with_score.csv"), index=False)

100%|██████████| 8873/8873 [01:23<00:00, 106.46it/s]
100%|██████████| 8873/8873 [01:23<00:00, 105.87it/s]
100%|██████████| 8873/8873 [01:23<00:00, 105.85it/s]


# model1 resnet34d oof check

In [9]:
oof = model1_oof.copy()

score_mean = oof["score"].mean()
print(score_mean)
for fold in range(5):
    print("===")
    valid_ids_path = f"/kaggle/input/valid_spec_ids_fold{fold}.yaml"
    with open(valid_ids_path, 'r') as file:
        valid_ids = yaml.load(file, Loader=yaml.FullLoader)
    valid_oof = oof[oof.spectrogram_id.isin(valid_ids)]
    print(f"fold{fold}:", valid_oof["score"].mean())
    display(valid_oof.groupby("expert_consensus")["score"].mean())
    

0.8783644845160807
===
fold0: 0.8389188659224357


expert_consensus
gpd_vote        0.502438
grda_vote       0.653067
lpd_vote        0.779315
lrda_vote       0.906065
other_vote      0.893501
seizure_vote    0.999929
Name: score, dtype: float64

===
fold1: 0.9271046712356483


expert_consensus
gpd_vote        0.415668
grda_vote       0.939705
lpd_vote        0.569378
lrda_vote       0.640666
other_vote      1.064702
seizure_vote    1.147250
Name: score, dtype: float64

===
fold2: 0.8656428244225776


expert_consensus
gpd_vote        0.532319
grda_vote       1.065567
lpd_vote        0.657901
lrda_vote       0.730884
other_vote      0.877023
seizure_vote    1.010856
Name: score, dtype: float64

===
fold3: 0.9148641117054068


expert_consensus
gpd_vote        0.408033
grda_vote       0.645622
lpd_vote        0.954910
lrda_vote       0.670622
other_vote      0.839314
seizure_vote    1.490212
Name: score, dtype: float64

===
fold4: 0.8456555924099048


expert_consensus
gpd_vote        0.451707
grda_vote       0.650409
lpd_vote        0.619544
lrda_vote       0.666982
other_vote      0.868134
seizure_vote    1.273352
Name: score, dtype: float64

# model2 tf_efficientnet_b0_ns oof check

In [10]:
poof = model2_oof.copy()

score_mean = oof["score"].mean()
print(score_mean)
for fold in range(5):
    print("===")
    valid_ids_path = f"/kaggle/input/valid_spec_ids_fold{fold}.yaml"
    with open(valid_ids_path, 'r') as file:
        valid_ids = yaml.load(file, Loader=yaml.FullLoader)
    valid_oof = oof[oof.spectrogram_id.isin(valid_ids)]
    print(f"fold{fold}:", valid_oof["score"].mean())
    display(valid_oof.groupby("expert_consensus")["score"].mean())

0.8783644845160807
===
fold0: 0.8389188659224357


expert_consensus
gpd_vote        0.502438
grda_vote       0.653067
lpd_vote        0.779315
lrda_vote       0.906065
other_vote      0.893501
seizure_vote    0.999929
Name: score, dtype: float64

===
fold1: 0.9271046712356483


expert_consensus
gpd_vote        0.415668
grda_vote       0.939705
lpd_vote        0.569378
lrda_vote       0.640666
other_vote      1.064702
seizure_vote    1.147250
Name: score, dtype: float64

===
fold2: 0.8656428244225776


expert_consensus
gpd_vote        0.532319
grda_vote       1.065567
lpd_vote        0.657901
lrda_vote       0.730884
other_vote      0.877023
seizure_vote    1.010856
Name: score, dtype: float64

===
fold3: 0.9148641117054068


expert_consensus
gpd_vote        0.408033
grda_vote       0.645622
lpd_vote        0.954910
lrda_vote       0.670622
other_vote      0.839314
seizure_vote    1.490212
Name: score, dtype: float64

===
fold4: 0.8456555924099048


expert_consensus
gpd_vote        0.451707
grda_vote       0.650409
lpd_vote        0.619544
lrda_vote       0.666982
other_vote      0.868134
seizure_vote    1.273352
Name: score, dtype: float64

# model3 tf_efficientnet_b1_ns oof check

In [11]:
oof = model3_oof.copy()

score_mean = oof["score"].mean()
print(score_mean)
for fold in range(5):
    print("===")
    valid_ids_path = f"/kaggle/input/valid_spec_ids_fold{fold}.yaml"
    with open(valid_ids_path, 'r') as file:
        valid_ids = yaml.load(file, Loader=yaml.FullLoader)
    valid_oof = oof[oof.spectrogram_id.isin(valid_ids)]
    print(f"fold{fold}:", valid_oof["score"].mean())
    display(valid_oof.groupby("expert_consensus")["score"].mean())

17.596495920892515
===
fold0: 17.818742787260696


expert_consensus
gpd_vote         5.964445
grda_vote        3.107936
lpd_vote         5.788968
lrda_vote       12.573892
other_vote      28.547948
seizure_vote    17.817416
Name: score, dtype: float64

===
fold1: 17.87935934555252


expert_consensus
gpd_vote         4.843603
grda_vote        3.901545
lpd_vote         4.994613
lrda_vote       13.306241
other_vote      28.885665
seizure_vote    18.738896
Name: score, dtype: float64

===
fold2: 17.50673401373947


expert_consensus
gpd_vote         6.147580
grda_vote        2.973813
lpd_vote         5.178141
lrda_vote       12.860388
other_vote      27.894078
seizure_vote    18.538254
Name: score, dtype: float64

===
fold3: 17.71081056905641


expert_consensus
gpd_vote         4.841262
grda_vote        3.188274
lpd_vote         6.750874
lrda_vote       13.023358
other_vote      28.553765
seizure_vote    18.029043
Name: score, dtype: float64

===
fold4: 17.092968034835817


expert_consensus
gpd_vote         5.407816
grda_vote        3.780723
lpd_vote         3.931087
lrda_vote       14.102914
other_vote      29.046603
seizure_vote    17.945991
Name: score, dtype: float64