# exp101

blending

In [1]:
import os
import sys
import traceback
import gc
import re
import time
import random
import pickle
import pathlib
import subprocess
from dataclasses import dataclass
from collections import defaultdict

import pandas as pd
import numpy as np
import polars as pl
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.model_selection import GroupKFold
import lightgbm as lgb
from xgboost import XGBClassifier

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import itertools

import Levenshtein

import warnings
warnings.simplefilter('ignore')



In [2]:
@dataclass
class Cfg:
    mode = "local_cv" # "local_cv" or "kaggle_inf" 
    exp_name = "exp101-blending"
    input_dir = "/mnt/predict-student-performance-from-game-play/input/"
    output_dir = "/mnt/predict-student-performance-from-game-play/output/"
    prep_dir = "/mnt/predict-student-performance-from-game-play/prep/"
    seed = 42
    n_splits = 5
    best_threshold = 0.630 # local_cvの結果を入れる
    base_exp = None # 特徴量重要度を使う元のexp
    n_features = 500 # 特徴量削減の数
cfg = Cfg()

if cfg.mode == "local_cv":
    os.makedirs(os.path.join(cfg.output_dir, cfg.exp_name), exist_ok=True)
    os.makedirs(os.path.join(cfg.output_dir, cfg.exp_name, "cache"), exist_ok=True)
    import cudf

elif cfg.mode == "kaggle_inf":
    import jo_wilder_310

In [18]:
def calc_metrics(oof):
    logloss = log_loss(oof["correct"], oof["pred"])

    # find best th
    scores = []; thresholds = []
    best_score = 0; best_threshold = 0

    for threshold in np.arange(0.4,0.81,0.01):
        preds = (oof["pred"].values>threshold).astype(int)
        m = f1_score(oof["correct"].values, preds, average='macro')   
        scores.append(m)
        thresholds.append(threshold)
        if m>best_score:
            best_score = m
            best_threshold = threshold
    #print("logloss", format(logloss, ".6f"))
    #print("best_score", format(best_score, ".6f"))
    #print("best_threshold", format(best_threshold, ".3f"))

    # Q別スコア
    #print("---"*10)
    for q in range(18):
        q = q + 1
        preds = (oof[oof["question"]==q]["pred"].values>threshold).astype(int)
        m = f1_score(oof[oof["question"]==q]["correct"].values, preds, average='macro')
        #print(f"Q{q} : F1 = {format(m, '.6f')}")
    return best_score

In [19]:
exp = "exp101"
oof = pd.read_csv(cfg.output_dir + f"{exp}/oof.csv.gz")
oof = oof[["session_id", "question", "correct"]].copy()

In [20]:
exps = ["exp101", "exp101-xgb", "exp101-cat"]
for exp in exps:
    oof[exp] = pd.read_csv(cfg.output_dir + f"{exp}/oof.csv.gz")["pred"]

In [21]:
oof

Unnamed: 0,session_id,question,correct,exp101,exp101-xgb,exp101-cat
0,20090312433251036,1,0,0.752240,0.764636,0.747594
1,20090314121766812,1,1,0.704933,0.762277,0.691551
2,20090315085850788,1,1,0.866910,0.893824,0.866094
3,20090316190523732,1,1,0.950349,0.920370,0.899543
4,20100006432365430,1,0,0.706717,0.686448,0.680925
...,...,...,...,...,...,...
424111,22090619362224080,18,1,0.802060,0.759788,0.846154
424112,22100208551963804,18,1,0.978937,0.984232,0.976224
424113,22100211280762644,18,1,0.984846,0.986823,0.980628
424114,22100213133089136,18,1,0.987298,0.992067,0.990833


In [22]:
oof

Unnamed: 0,session_id,question,correct,exp101,exp101-xgb,exp101-cat
0,20090312433251036,1,0,0.752240,0.764636,0.747594
1,20090314121766812,1,1,0.704933,0.762277,0.691551
2,20090315085850788,1,1,0.866910,0.893824,0.866094
3,20090316190523732,1,1,0.950349,0.920370,0.899543
4,20100006432365430,1,0,0.706717,0.686448,0.680925
...,...,...,...,...,...,...
424111,22090619362224080,18,1,0.802060,0.759788,0.846154
424112,22100208551963804,18,1,0.978937,0.984232,0.976224
424113,22100211280762644,18,1,0.984846,0.986823,0.980628
424114,22100213133089136,18,1,0.987298,0.992067,0.990833


In [32]:
best_w = None
best_score = 0.0
ws = [0,1,2,3,4,5]
for w in itertools.product(ws, repeat=3):
    if np.sum(w) == 0:
        pass
    else:
        oof["pred"] = (oof[exps[0]]*w[0] + oof[exps[1]]*w[1] + oof[exps[2]]*w[2]) / (w[0]+w[1]+w[2])
        score = calc_metrics(oof)
        print(w, score)
        if score > best_score:
            best_w = w 
            best_score = score

(0, 0, 1) 0.6966856501692378
(0, 0, 2) 0.6966856501692378
(0, 0, 3) 0.6966856501692378
(0, 0, 4) 0.6966856501692378
(0, 0, 5) 0.6966856501692378
(0, 1, 0) 0.7004685351829975
(0, 1, 1) 0.6997714853873784
(0, 1, 2) 0.6988496997663075
(0, 1, 3) 0.6984916049780597
(0, 1, 4) 0.6981700023555084
(0, 1, 5) 0.6980358241398308
(0, 2, 0) 0.7004685351829975
(0, 2, 1) 0.7003551660092238
(0, 2, 2) 0.6997714853873784
(0, 2, 3) 0.6992198153779481
(0, 2, 4) 0.6988496997663075
(0, 2, 5) 0.6987772155822418
(0, 3, 0) 0.7004685351829975
(0, 3, 1) 0.7004693017254573
(0, 3, 2) 0.7002787109222404
(0, 3, 3) 0.6997714853873784
(0, 3, 4) 0.6993118021231755
(0, 3, 5) 0.6991195882195597
(0, 4, 0) 0.7004685351829975
(0, 4, 1) 0.7005249161461777
(0, 4, 2) 0.7003551660092238
(0, 4, 3) 0.7000799488114862
(0, 4, 4) 0.6997714853873784
(0, 4, 5) 0.6993865942522983
(0, 5, 0) 0.7004685351829975
(0, 5, 1) 0.7005323424227982
(0, 5, 2) 0.7003228722204717
(0, 5, 3) 0.7002066166688483
(0, 5, 4) 0.700021537308499
(0, 5, 5) 0.699

In [33]:
best_score

0.7021077454084718

In [34]:
best_w

(4, 5, 0)

In [35]:
def calc_metrics2(oof):
    logloss = log_loss(oof["correct"], oof["pred"])

    # find best th
    scores = []; thresholds = []
    best_score = 0; best_threshold = 0

    for threshold in np.arange(0.4,0.81,0.01):
        preds = (oof["pred"].values>threshold).astype(int)
        m = f1_score(oof["correct"].values, preds, average='macro')   
        scores.append(m)
        thresholds.append(threshold)
        if m>best_score:
            best_score = m
            best_threshold = threshold
    print("logloss", format(logloss, ".6f"))
    print("best_score", format(best_score, ".6f"))
    print("best_threshold", format(best_threshold, ".3f"))

    # Q別スコア
    print("---"*10)
    for q in range(18):
        q = q + 1
        preds = (oof[oof["question"]==q]["pred"].values>threshold).astype(int)
        m = f1_score(oof[oof["question"]==q]["correct"].values, preds, average='macro')
        print(f"Q{q} : F1 = {format(m, '.6f')}")
    return best_score

In [37]:
oof["pred"] = (oof[exps[0]]*4 + oof[exps[1]]*5 + oof[exps[2]]*0) / 9
score = calc_metrics2(oof)

logloss 0.475336
best_score 0.702108
best_threshold 0.630
------------------------------
Q1 : F1 = 0.637847
Q2 : F1 = 0.552315
Q3 : F1 = 0.589335
Q4 : F1 = 0.667745
Q5 : F1 = 0.422504
Q6 : F1 = 0.631641
Q7 : F1 = 0.587021
Q8 : F1 = 0.354834
Q9 : F1 = 0.590399
Q10 : F1 = 0.366386
Q11 : F1 = 0.421523
Q12 : F1 = 0.593596
Q13 : F1 = 0.423628
Q14 : F1 = 0.542339
Q15 : F1 = 0.376810
Q16 : F1 = 0.437121
Q17 : F1 = 0.379759
Q18 : F1 = 0.546471
