In [1]:
import os
import sys
import traceback
import gc
import time
import random
import pickle
import pathlib
import subprocess
from dataclasses import dataclass
from collections import defaultdict

import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.model_selection import GroupKFold
import lightgbm as lgb

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import itertools

import warnings
warnings.simplefilter('ignore')



In [2]:
@dataclass
class Cfg:
    mode = "local_cv" # "local_cv" or "kaggle_inf" 
    exp_name = "exp075"
    input_dir = "/mnt/predict-student-performance-from-game-play/input/"
    output_dir = "/mnt/predict-student-performance-from-game-play/output/"
    prep_dir = "/mnt/predict-student-performance-from-game-play/prep/"
    seed = 42
    n_splits = 5
    best_threshold = 0.630 # local_cvの結果を入れる
    base_exp = "exp074" # 特徴量重要度を使う元のexp
    n_features = 500 # 特徴量削減の数
cfg = Cfg()

if cfg.mode == "local_cv":
    os.makedirs(os.path.join(cfg.output_dir, cfg.exp_name), exist_ok=True)
    os.makedirs(os.path.join(cfg.output_dir, cfg.exp_name, "cache"), exist_ok=True)

elif cfg.mode == "kaggle_inf":
    import jo_wilder_310

In [3]:
def calc_metrics(oof):
    logloss = log_loss(oof["correct"], oof["pred"])

    # find best th
    scores = []; thresholds = []
    best_score = 0; best_threshold = 0

    for threshold in np.arange(0.4,0.81,0.01):
        preds = (oof["pred"].values>threshold).astype(int)
        m = f1_score(oof["correct"].values, preds, average='macro')   
        scores.append(m)
        thresholds.append(threshold)
        if m>best_score:
            best_score = m
            best_threshold = threshold
    #print("logloss", format(logloss, ".6f"))
    #print("best_score", format(best_score, ".6f"))
    #print("best_threshold", format(best_threshold, ".3f"))

    # Q別スコア
    #print("---"*10)
    for q in range(18):
        q = q + 1
        preds = (oof[oof["question"]==q]["pred"].values>threshold).astype(int)
        m = f1_score(oof[oof["question"]==q]["correct"].values, preds, average='macro')
        #print(f"Q{q} : F1 = {format(m, '.6f')}")
    return best_score

In [28]:
exp = "exp100"
oof = pd.read_csv(cfg.output_dir + f"{exp}/oof.csv.gz")
oof["pred"] = (oof["pred"]>0.62).astype(int)

In [29]:
oof["TP"] = 0
oof["FP"] = 0
oof["TN"] = 0
oof["FN"] = 0

oof.loc[(oof["correct"]==1)&(oof["pred"]==1), "TP"] = 1
oof.loc[(oof["correct"]==0)&(oof["pred"]==1), "FP"] = 1
oof.loc[(oof["correct"]==0)&(oof["pred"]==0), "TN"] = 1
oof.loc[(oof["correct"]==1)&(oof["pred"]==0), "FN"] = 1

In [30]:
df = oof.groupby("session_id")[["TP", "FP", "TN", "FN"]].sum().reset_index()

In [31]:
df["precision"] = df["TP"] / (df["TP"] + df["FP"])
df["recall"] = df["TP"] / (df["TP"] + df["FN"])
df["F1"] = (2*(df["recall"]*df["precision"])) / (df["recall"]+df["precision"])

In [32]:
df["F1"] = df["F1"].fillna(0)

In [33]:
df = df.sort_values("F1")

In [35]:
df.to_csv("chceck.csv")

In [36]:
df.head(20)

Unnamed: 0,session_id,TP,FP,TN,FN,precision,recall,F1
7012,21020008201271976,0,3,14,1,0.0,0.0,0.0
22691,22080212225118936,0,4,11,3,0.0,0.0,0.0
15932,21080614332146020,0,5,12,1,0.0,0.0,0.0
20730,22030413193618136,0,4,11,3,0.0,0.0,0.0
8895,21020511403643896,0,2,12,4,0.0,0.0,0.0
20761,22030419543220600,0,5,10,3,0.0,0.0,0.0
19731,22010509130820644,0,4,12,2,0.0,0.0,0.0
2417,20110408102520604,0,4,11,3,0.0,0.0,0.0
7925,21020219325225296,0,2,12,4,0.0,0.0,0.0
23433,22090510463732496,0,4,10,4,0.0,0.0,0.0


In [37]:
df.tail()

Unnamed: 0,session_id,TP,FP,TN,FN,precision,recall,F1
19275,22010108254903044,16,0,2,0,1.0,1.0,1.0
15968,21090007450375424,17,0,1,0,1.0,1.0,1.0
15965,21090007080991732,17,0,1,0,1.0,1.0,1.0
10977,21040008520402116,18,0,0,0,1.0,1.0,1.0
11299,21040113392235224,17,0,1,0,1.0,1.0,1.0
