In [1]:
import pandas as pd
import yaml
import sys
import os
from glob import glob
from pathlib import Path
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt
import numpy as np

import warnings
warnings.filterwarnings('ignore')

sys.path.append('/kaggle/src')
from utils.xgb import fit_xgb
from utils.metric import compute_comptetition_metric
from utils.postprocess import post_process
from utils.set_seed import seed_base
from feature_engineering.ranker import generate_ranker_features

PACKAGE_DIR = Path("/kaggle/src")
CFG = yaml.safe_load(open(PACKAGE_DIR / "config.yaml", "r"))
print(CFG["ranker"]["execution"]["exp_id"])

CFG["output_dir"] = f"/kaggle/output/{CFG['ranker']['execution']['exp_id']}"
seed_base(CFG["env"]["seed"])

2023-10-06 14:00:34.930876: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-06 14:00:34.997487: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-06 14:00:35.441542: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/lib/x86_64-linux-gnu:/usr/local/nvidia/lib:/u

exp_075


In [2]:
sub = pd.read_csv(os.path.join(CFG["output_dir"], "submission.csv"))
sub.head()

Unnamed: 0,score,key_step,step,event,level,series_id
0,0.039067,4331.5,4275,onset,13.0,038441c925bb
1,0.75357,4691.5,4635,onset,11.0,038441c925bb
2,2.486544,4811.5,4755,onset,9.0,038441c925bb
3,5.779183,4931.5,4875,onset,7.0,038441c925bb
4,7.027495,4979.5,4923,onset,5.0,038441c925bb


In [3]:
import scipy
score2range = scipy.interpolate.interp1d([-100, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100], [0, 0, 12, 36, 60, 90, 120, 150, 180, 240, 300, 360, 360])
range2score = scipy.interpolate.interp1d([0, 12, 36, 60, 90, 120, 150, 180, 240, 300, 360], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
score2range(1.5)

array(24.)

In [4]:
from multiprocessing import Pool

def process_group(df):
    dfs = []
    df = df.sort_values("score", ascending=False).reset_index(drop=True)
    used = np.zeros(len(df))
    reduce_rate = np.ones(df["step"].max() + 500)
    for _ in range(len(df)):
        best_score = -1e10
        best_idx = -1
        best_step = -1
        best_row = -1
        for i, row in df.iterrows():
            if used[i]:
                continue
            score = row["score"] / reduce_rate[row["step"]]
            if score > best_score:
                best_score = score
                best_idx = i
                best_step = row["step"]
                row["reduced_score"] = score
                best_row = row
        dfs.append(best_row)
        used[best_idx] = True

        range_ = score2range(best_score)
        for r in range(1, int(range_)):
            reduce = range2score(range_ - r) + 1
            reduce_rate[best_step + r] = max(reduce_rate[best_step + r], reduce)
            if best_step - r >= 0:
                reduce_rate[best_step - r] = max(reduce_rate[best_step - r], reduce)
    return dfs

In [5]:
groups = [group for _, group in sub.groupby("series_id")]
with Pool(30) as p:
    results = list(tqdm(p.imap(process_group, groups), total=len(groups)))
all_results = [item for sublist in results for item in sublist]
df = pd.DataFrame(all_results)

100%|██████████| 269/269 [02:53<00:00,  1.55it/s]


In [6]:
sub = df.copy()
sub["score"] = sub["reduced_score"]

In [7]:
# スコア計算
labels = pd.read_csv(f"{CFG['dataset']['competition_dir']}/train_events.csv").dropna()
score, ap_table = compute_comptetition_metric(labels, sub)
print(f"score: {score:.4f}")
display(ap_table)

score: 0.7338


event   tolerance
onset   12           0.174365
        36           0.560188
        60           0.693139
        90           0.765581
        120          0.806428
        150          0.821884
        180          0.835242
        240          0.849685
        300          0.858759
        360          0.866141
wakeup  12           0.197140
        36           0.574481
        60           0.711930
        90           0.785798
        120          0.821630
        150          0.842537
        180          0.857021
        240          0.872056
        300          0.887292
        360          0.895328
dtype: float64