In [2]:
import numpy as np
import polars as pl
import pandas as pd
from sklearn.base import clone
from copy import deepcopy
import optuna
from scipy.optimize import minimize
import os
import matplotlib.pyplot as plt
import seaborn as sns

import re
from colorama import Fore, Style

from tqdm import tqdm
from IPython.display import clear_output
from concurrent.futures import ThreadPoolExecutor

import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None

import lightgbm as lgb
from catboost import CatBoostRegressor, CatBoostClassifier
from xgboost import XGBRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import *
from sklearn.metrics import *

SEED = 920
n_splits = 5

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def process_file(filename, dirname):
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    df.drop('step', axis=1, inplace=True)
    return df.describe().values.reshape(-1), filename.split('=')[1]

def load_time_series(dirname) -> pd.DataFrame:
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    df = pd.DataFrame(stats, columns=[f"Stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    
    return df

train = pd.read_csv('../input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('../input/child-mind-institute-problematic-internet-use/test.csv')
sample = pd.read_csv('../input/child-mind-institute-problematic-internet-use/sample_submission.csv')

train_ts = load_time_series("../input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("../input/child-mind-institute-problematic-internet-use/series_test.parquet")

100%|██████████| 996/996 [01:11<00:00, 13.90it/s]
100%|██████████| 2/2 [00:00<00:00, 12.10it/s]


In [8]:
#sliが欠損してないもの
train_sii = train[train["sii"].notnull()]

In [10]:
train_sii.value_counts("sii")

sii
0.0    1594
1.0     730
2.0     378
3.0      34
Name: count, dtype: int64

In [14]:
train_sii[train_sii["PCIAT-PCIAT_Total"]==0]

Unnamed: 0,id,Basic_Demos-Enroll_Season,Basic_Demos-Age,Basic_Demos-Sex,CGAS-Season,CGAS-CGAS_Score,Physical-Season,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,Fitness_Endurance-Season,Fitness_Endurance-Max_Stage,Fitness_Endurance-Time_Mins,Fitness_Endurance-Time_Sec,FGC-Season,FGC-FGC_CU,FGC-FGC_CU_Zone,FGC-FGC_GSND,FGC-FGC_GSND_Zone,FGC-FGC_GSD,FGC-FGC_GSD_Zone,FGC-FGC_PU,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_SRR,FGC-FGC_SRR_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-Season,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,PAQ_A-Season,PAQ_A-PAQ_A_Total,PAQ_C-Season,PAQ_C-PAQ_C_Total,PCIAT-Season,PCIAT-PCIAT_01,PCIAT-PCIAT_02,PCIAT-PCIAT_03,PCIAT-PCIAT_04,PCIAT-PCIAT_05,PCIAT-PCIAT_06,PCIAT-PCIAT_07,PCIAT-PCIAT_08,PCIAT-PCIAT_09,PCIAT-PCIAT_10,PCIAT-PCIAT_11,PCIAT-PCIAT_12,PCIAT-PCIAT_13,PCIAT-PCIAT_14,PCIAT-PCIAT_15,PCIAT-PCIAT_16,PCIAT-PCIAT_17,PCIAT-PCIAT_18,PCIAT-PCIAT_19,PCIAT-PCIAT_20,PCIAT-PCIAT_Total,SDS-Season,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-Season,PreInt_EduHx-computerinternet_hoursday,sii
1,000fd460,Summer,9,0,,,Fall,14.035590,48.00,46.0,22.0,75.0,70.0,122.0,,,,,Fall,3.0,0.0,,,,,5.0,0.0,11.0,1.0,11.0,1.0,3.0,0.0,Winter,2.0,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.211720,3.970850,1.0,21.0352,14.9740,39.4497,15.4107,27.0552,,,Fall,2.34,Fall,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fall,46.0,64.0,Summer,0.0,0.0
16,00d56d4b,Spring,5,1,Summer,80.0,Spring,17.284504,44.00,47.6,,61.0,76.0,109.0,Spring,,,,Spring,0.0,0.0,,,,,0.0,0.0,10.5,1.0,10.0,1.0,7.0,1.0,,,,,,,,,,,,,,,,,,,,,,Summer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Spring,37.0,53.0,Spring,0.0,0.0
26,012e3869,Summer,6,0,Winter,60.0,Summer,,,,,73.0,86.0,110.0,,,,,Summer,0.0,0.0,,,,,0.0,0.0,9.5,1.0,9.5,1.0,6.0,1.0,Fall,2.0,2.78600,25.2668,1027.230,1643.57,11.05050,51.6765,15.7693,9.497480,31.123500,2.0,26.7417,13.8843,48.8905,21.1339,37.7922,,,,,Summer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,Summer,0.0,0.0
36,02073150,Winter,7,0,Spring,65.0,Winter,17.603120,50.00,62.6,,54.0,92.0,90.0,Winter,5.0,5.0,21.0,Winter,4.0,1.0,,,,,8.0,1.0,7.5,1.0,8.0,1.0,6.0,1.0,Spring,5.0,5.91061,17.6050,1006.070,2414.57,10.36920,49.4226,13.8991,3.705900,13.177400,2.0,26.7413,12.3120,43.5120,23.8351,37.1105,,,,,Spring,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Winter,39.0,55.0,Winter,0.0,0.0
56,034f923b,Spring,14,1,Summer,51.0,Spring,23.173083,64.75,138.2,33.0,53.0,60.0,116.0,,,,,Spring,15.0,0.0,23.7,2.0,23.9,2.0,0.0,0.0,15.0,1.0,14.5,1.0,11.0,1.0,Spring,4.0,5.41636,23.1756,1435.880,2728.18,33.06170,95.2034,15.9653,7.210360,42.996600,2.0,37.9593,24.1824,89.7871,47.1436,71.0210,Spring,2.79,,,Spring,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Spring,52.0,72.0,Spring,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3925,fd0d9bca,Fall,8,1,Spring,80.0,Winter,12.635281,52.00,48.6,,59.0,85.0,118.0,Winter,7.0,10.0,59.0,Winter,13.0,1.0,,,,,7.0,1.0,10.0,1.0,11.0,1.0,6.5,1.0,Winter,3.0,8.78954,12.6367,999.121,1598.59,12.45690,48.6825,12.6581,-0.021456,-0.082519,1.0,22.1321,14.0936,39.8930,20.3503,34.5889,,,Winter,2.76,Winter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Winter,29.0,42.0,Fall,0.0,0.0
3928,fd5589f6,Spring,8,0,Summer,70.0,Summer,15.547182,50.50,56.4,,67.0,80.0,99.0,Summer,7.0,11.0,43.0,Summer,5.0,1.0,,,,,8.0,1.0,10.0,1.0,10.0,1.0,11.0,1.0,Summer,2.0,3.01028,15.5489,987.953,1580.73,9.15999,47.4930,13.0933,2.455560,8.906980,1.0,24.8707,13.4624,44.4827,20.8569,34.0306,,,,,Summer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Summer,26.0,38.0,Spring,2.0,0.0
3940,fe3cbf02,Fall,7,0,Spring,61.0,Winter,16.590800,50.00,59.0,,57.0,68.0,114.0,Winter,2.0,1.0,37.0,Winter,3.0,0.0,,,,,3.0,0.0,8.0,1.0,6.0,0.0,0.0,0.0,Winter,2.0,2.95213,16.5926,988.566,1581.71,9.21315,47.5583,13.3749,3.217760,11.441700,1.0,24.8441,13.5010,44.6062,20.4279,34.0573,,,,,Winter,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Winter,38.0,54.0,Fall,0.0,0.0
3948,fee07f32,Spring,6,0,Fall,55.0,Spring,15.996115,46.50,49.2,,52.0,86.0,109.0,Spring,2.0,1.0,40.0,Spring,0.0,0.0,,,,,0.0,0.0,10.0,1.0,9.0,1.0,8.0,1.0,,,,,,,,,,,,,,,,,,,,,,Fall,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fall,33.0,47.0,Spring,1.0,0.0
