In [1]:
import numpy as np
import pandas as pd
import os
import re
from sklearn.base import clone
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import StratifiedKFold, ShuffleSplit
from scipy.optimize import minimize
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import polars as pl
import polars.selectors as cs
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator, FormatStrFormatter, PercentFormatter
import seaborn as sns

from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

from colorama import Fore, Style
from IPython.display import clear_output
import warnings
import lightgbm as lgb
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.pipeline import Pipeline
#from pytorch_tabnet.tab_model import TabNetRegressor

import optuna
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_rank
from optuna.visualization import plot_slice
from optuna.visualization import plot_timeline

import shap

import gc
gc.enable()

optuna.logging.set_verbosity(optuna.logging.WARNING)
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)

pl.Config.set_tbl_rows(-1)
pl.Config.set_tbl_cols(-1)
pl.Config.set_fmt_str_lengths(10000)

polars.config.Config

# Preprocessing & Feature Engineering

In [2]:
#from IPython.display import display as ipydisplay, HTML
#ipydisplay(HTML("<style>.jp-CodeCell.jp-mod-outputsScrolled .jp-Cell-outputArea { max-height: 80em; }</style>"))

In [3]:
#SEED = 42
n_splits = 5

In [4]:
path = "I:/Kaggle/child-mind-institute-problematic-internet-use/"

In [5]:
train = pd.read_csv(path + 'train.csv', dtype={'id': str})
test = pd.read_csv(path + 'test.csv', dtype={'id': str})
#sample = pd.read_csv(path + 'sample_submission.csv', dtype={'id': str})

In [6]:
data_dict = pd.read_csv(path + 'data_dictionary.csv')

In [7]:
train_target = train[test.columns]

In [8]:
concat_df = pd.concat([train_target, test], ignore_index=True)

In [9]:
concat_df['total_num_nan'] = concat_df.isna().sum(axis=1)

In [10]:
del train_target
gc.collect()

0

In [12]:
for col in concat_df.columns:
    if col == 'id':
        continue
    new_col_name = col + '_isnan'
    concat_df[new_col_name] = concat_df[col].isna().astype(int)

In [13]:
instruments = data_dict['Instrument'].unique().tolist()
instruments.remove('Identifier')

In [14]:
for instrument in instruments:
    if 'Parent-Child Internet Addiction Test' == instrument:
        continue
    instrument_df = data_dict[data_dict['Instrument'] == instrument]
    field_list = instrument_df['Field'].unique().tolist()
    concat_df[instrument + '_num_nan'] = concat_df[field_list].isna().sum(axis=1)

del instrument_df, field_list
gc.collect()

0

In [15]:
train_parquet_id_folders_list = os.listdir(path + 'series_train.parquet/')

In [16]:
train_parquet_id_list = [v.replace('id=', '') for v in train_parquet_id_folders_list]

In [17]:
test_parquet_id_folders_list = os.listdir(path + 'series_test.parquet/')

In [18]:
test_parquet_id_list = [v.replace('id=', '') for v in test_parquet_id_folders_list]

In [19]:
train_temp_df = concat_df.head(train.shape[0]).reset_index(drop=True)

In [20]:
test_temp_df = concat_df.tail(test.shape[0]).reset_index(drop=True)

In [21]:
del concat_df
gc.collect()

0

In [22]:
train_temp_df['has_parquet'] = train_temp_df['id'].isin(train_parquet_id_list).astype(int)

In [23]:
test_temp_df['has_parquet'] = test_temp_df['id'].isin(test_parquet_id_list).astype(int)

In [24]:
# https://www.kaggle.com/code/antoninadolgorukova/cmi-piu-actigraphy-data-eda
entropy = lambda x: -(x / x.sum() * np.log(x / x.sum() + 1e-9)).sum()

In [25]:
def feat_engi(df, target_cols):
    stats_df = df[target_cols].describe(percentiles=[0.01, 0.03] + [(i+1)*0.05 for i in range(19)] + [0.97, 0.99]).T
    count = stats_df['count'][0]
    stats_df.drop('count', axis=1, inplace=True)
    
    column_names_list = []
    for idx in stats_df.index:
        for col in stats_df.columns:
            column_names_list.append(str(idx) + '_' + str(col))

    reshaped_df = pd.DataFrame(stats_df.values.reshape(-1)).T.reset_index(drop=True)
    reshaped_df.columns = column_names_list

    reshaped_df['count'] = count

    return reshaped_df

In [26]:
def groupby_feat_engi(df, col_to_group, target_col):
    stats_df = pd.DataFrame(
        df.groupby(col_to_group)[target_col].describe(percentiles=[0.01, 0.03] + [(i+1)*0.05 for i in range(19)] + [0.97, 0.99])
    )

    stats_df['sum'] = df.groupby(col_to_group)[target_col].sum()
    stats_df['entropy'] = df.groupby(col_to_group)[target_col].apply(entropy)
    
    column_names_list = []
    for idx in stats_df.index:
        for col in stats_df.columns:
            column_names_list.append(target_col + '_' + col_to_group + '_' + str(idx) + '_' + str(col))

    reshaped_df = pd.DataFrame(stats_df.values.reshape(-1)).T.reset_index(drop=True)
    reshaped_df.columns = column_names_list

    return reshaped_df

In [27]:
def uncommon_groupby_feat_engi(df, col_to_group, target_col):
    stats_df = pd.DataFrame(
        df.groupby(col_to_group)[target_col].describe()
        #df.groupby(col_to_group)[target_col].describe(percentiles=[0.01, 0.03] + [(i+1)*0.05 for i in range(19)] + [0.97, 0.99])
    )

    stats_df['sum'] = df.groupby(col_to_group)[target_col].sum()
    stats_df['entropy'] = df.groupby(col_to_group)[target_col].apply(entropy)

    
    #stats_stats_df = stats_df.describe(percentiles=[0.01, 0.03] + [(i+1)*0.05 for i in range(19)] + [0.97, 0.99])
    stats_stats_df = stats_df.describe()
    stats_stats_count = stats_stats_df['count'][0]
    stats_stats_df.drop('count', axis=0, inplace=True)
    stats_stats_df = stats_stats_df.T
    
    column_names_list = []
    for idx in stats_stats_df.index:
        for col in stats_stats_df.columns:
            column_names_list.append(target_col + '_' + col_to_group + '_' + str(idx) + '_' + str(col))
    
    reshaped_df = pd.DataFrame(stats_stats_df.values.reshape(-1)).T.reset_index(drop=True)
    reshaped_df.columns = column_names_list

    current_cols = reshaped_df.columns.tolist()
    count_col_name = target_col + '_' + col_to_group + '_count'
    reshaped_df[count_col_name] = stats_stats_count

    reshaped_df = reshaped_df[[count_col_name] + current_cols]

    return reshaped_df

In [28]:
def read_parquet(dataset='train'):
    parquet_id_folders_list = os.listdir(path + 'series_' + dataset + '.parquet/')
    
    ts_features_list = []
    for id_folder in tqdm(parquet_id_folders_list):
        iid = id_folder.replace('id=', '')
    
        parquet_files_path = path + 'series_' + dataset + '.parquet/' + id_folder + '/'
        parquet_files_list = os.listdir(parquet_files_path)
    
        dfs_list = []
        for parquet_filename in parquet_files_list:
            if '.parquet' in parquet_filename:
                parquet_df = pd.read_parquet(parquet_files_path + parquet_filename)
                dfs_list.append(parquet_df)
        parquet_concat_df = pd.concat(dfs_list, ignore_index=True)
        current_cols = parquet_concat_df.columns.tolist()
        
        parquet_concat_df['id'] = iid
        
        parquet_concat_df = parquet_concat_df[['id'] + current_cols]
        
        parquet_concat_df['day_since_wear'] = (parquet_concat_df['relative_date_PCIAT'] - parquet_concat_df['relative_date_PCIAT'].min()).astype(int)
        parquet_concat_df['time_of_day_sec'] = parquet_concat_df['time_of_day'] / 1e9
        parquet_concat_df.drop('time_of_day', axis=1, inplace=True)
        parquet_concat_df['time_of_day_min'] = parquet_concat_df['time_of_day_sec'] / 60
        parquet_concat_df['time_of_day_hour'] = parquet_concat_df['time_of_day_min'] / 60
        parquet_concat_df['time_of_day_day'] = parquet_concat_df['time_of_day_hour'] / 24
        parquet_concat_df['time_of_day_hour_window'] = parquet_concat_df['time_of_day_hour'].astype(int)
        parquet_concat_df['time_of_day_min_window'] = parquet_concat_df['time_of_day_min'].astype(int)
        parquet_concat_df['time_of_day_15_min_window'] = (parquet_concat_df['time_of_day_min'] / 15).astype(int)
        parquet_concat_df['timestamp_day'] = parquet_concat_df['day_since_wear'] + parquet_concat_df['time_of_day_day']
    
        if parquet_concat_df['timestamp_day'].nunique() != parquet_concat_df.shape[0]:
            print('yes')
    
        parquet_concat_df['timestamp_hour'] = parquet_concat_df['timestamp_day'] * 24
        parquet_concat_df['timestamp_min'] = parquet_concat_df['timestamp_day'] * 24 * 60
        parquet_concat_df['timestamp_sec'] = parquet_concat_df['timestamp_day'] * 24 * 60 * 60
        parquet_concat_df['timestamp_15_min'] = parquet_concat_df['timestamp_day'] * 24 * (60/15)
    
        parquet_concat_df['timestamp_hour_window'] = parquet_concat_df['timestamp_hour'].astype(int)
        parquet_concat_df['timestamp_min_window'] = parquet_concat_df['timestamp_min'].astype(int)
        parquet_concat_df['timestamp_15_min_window'] = parquet_concat_df['timestamp_15_min'].astype(int)
    
        battery_voltage_start = parquet_concat_df['battery_voltage'][0]
        parquet_concat_df['battery_use_since_wear'] = -(parquet_concat_df['battery_voltage'] - battery_voltage_start)
        
    
        worn_df = parquet_concat_df[parquet_concat_df['non-wear_flag'] == 0].reset_index(drop=True)
    
        
        feature_cols = ['X', 'Y', 'Z', 'enmo', 'anglez', 'light', 'battery_use_since_wear']
    
        features_df = feat_engi(worn_df, feature_cols)
    
        groupby_cols = ['weekday', 'time_of_day_hour_window']#, 'time_of_day_15_min_window']
    
        groupby_df_list = []
        for groupby_col in groupby_cols:
            for feature_col in feature_cols:
                groupby_features_df = groupby_feat_engi(worn_df, groupby_col, feature_col)
                groupby_df_list.append(groupby_features_df)
    
        groupby_concat_df = pd.concat(groupby_df_list, axis=1)
    
        uncommon_groupby_cols = ['day_since_wear', 'timestamp_hour_window']#, 'timestamp_15_min_window']#, 'timestamp_min_window']
    
        uncommon_groupby_df_list = []
        for uncommon_groupby_col in uncommon_groupby_cols:
            for unc_feature_col in feature_cols:
                uncommon_groupby_df = uncommon_groupby_feat_engi(worn_df, uncommon_groupby_col, unc_feature_col)
                uncommon_groupby_df_list.append(uncommon_groupby_df)
    
        features_concat_df = pd.concat([features_df, groupby_concat_df, uncommon_groupby_df], axis=1)
        features_current_cols = features_concat_df.columns.tolist()
        features_concat_df['id'] = iid
        features_concat_df = features_concat_df[['id'] + features_current_cols]
    
        for q in [1,2,3,4]:
            if q in worn_df['quarter'].tolist():
                features_concat_df['data_collected_in_quarter_' + str(q)] = 1
            else:
                features_concat_df['data_collected_in_quarter_' + str(q)] = 0

        features_concat_df['id'] = features_concat_df['id'].astype(str)

        #features_concat_df.to_csv(path + 'series_' + dataset + '.parquet/id=' + iid + '/features.csv', index=False)
        
        
        ts_features_list.append(features_concat_df)

        gc.collect()
    gc.collect()
    
    return ts_features_list

In [29]:
test_ts_features_list = read_parquet(dataset='test')

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:04<00:00,  2.30s/it]


In [30]:
train_ts_features_concat_df = pd.read_csv(path + 'train_ts_features_concat.csv', dtype={'id': str})
test_ts_features_concat_df = pd.concat(test_ts_features_list, ignore_index=True)

In [31]:
del test_ts_features_list
gc.collect()

0

In [32]:
train_merge_df = train_temp_df.merge(train_ts_features_concat_df, how='left', on='id')
test_merge_df = test_temp_df.merge(test_ts_features_concat_df, how='left', on='id')

In [33]:
del train_ts_features_concat_df, test_ts_features_concat_df
gc.collect()

0

In [34]:
cat_cols = []
for i in range(data_dict.shape[0]):
    col = data_dict['Field'][i]
    if col == 'id':
        continue
    if (data_dict['Type'][i] == 'str') | ('categori' in data_dict['Type'][i]):
        cat_cols.append(col)

In [35]:
ordinal_variables = ['FGC-FGC_CU_Zone', 'FGC-FGC_CU_Zone', 'FGC-FGC_GSND_Zone', 'FGC-FGC_GSD_Zone', 'FGC-FGC_PU_Zone',
                     'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR_Zone', 'FGC-FGC_TL_Zone', 'BIA-BIA_Activity_Level_num', 'BIA-BIA_Frame_num',
                     'PreInt_EduHx-computerinternet_hoursday']

ordinal_variables += [col for col in cat_cols if (('PCIAT' in col)&('Season' not in col))]

In [36]:
cat_cols = list(set(cat_cols) - set(ordinal_variables))
cat_cols = [col for col in cat_cols if 'PCIAT' not in col]

In [37]:
merge_concat_df = pd.concat([train_merge_df, test_merge_df], ignore_index=True)

In [38]:
del train_merge_df, test_merge_df
gc.collect()

0

In [39]:
def remove_pciat_cols(df):
    cols_to_drop = []
    for col in df.columns:
        if 'PCIAT' in col:
            cols_to_drop.append(col)

    df.drop(cols_to_drop, axis=1, inplace=True)
    
    return df

In [40]:
merge_concat_df = remove_pciat_cols(merge_concat_df)

In [41]:
def one_hot_cat_cols(df):
    for col in tqdm(cat_cols):
        df[col] = df[col].fillna('Missing').apply(str)
        unique_list = sorted(df[col].unique().tolist())

        for v in unique_list:
            new_col_name = col + '_' + v
            df[new_col_name] = (df[col] == v).astype(int)
    
    df = df.drop(cat_cols, axis=1)

    return df

In [42]:
merge_concat_df = one_hot_cat_cols(merge_concat_df)

100%|█████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 305.55it/s]


In [43]:
# https://www.kaggle.com/code/ichigoe/lb0-494-with-tabnet
def feature_engineering(df):
    df['BMI_Age'] = df['Physical-BMI'] * df['Basic_Demos-Age']
    df['Internet_Hours_Age'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['Basic_Demos-Age']
    df['BMI_Internet_Hours'] = df['Physical-BMI'] * df['PreInt_EduHx-computerinternet_hoursday']
    df['BFP_BMI'] = df['BIA-BIA_Fat'] / df['BIA-BIA_BMI']
    df['FFMI_BFP'] = df['BIA-BIA_FFMI'] / df['BIA-BIA_Fat']
    df['FMI_BFP'] = df['BIA-BIA_FMI'] / df['BIA-BIA_Fat']
    df['LST_TBW'] = df['BIA-BIA_LST'] / df['BIA-BIA_TBW']
    df['BFP_BMR'] = df['BIA-BIA_Fat'] * df['BIA-BIA_BMR']
    df['BFP_DEE'] = df['BIA-BIA_Fat'] * df['BIA-BIA_DEE']
    df['BMR_Weight'] = df['BIA-BIA_BMR'] / df['Physical-Weight']
    df['DEE_Weight'] = df['BIA-BIA_DEE'] / df['Physical-Weight']
    df['SMM_Height'] = df['BIA-BIA_SMM'] / df['Physical-Height']
    df['Muscle_to_Fat'] = df['BIA-BIA_SMM'] / df['BIA-BIA_FMI']
    df['Hydration_Status'] = df['BIA-BIA_TBW'] / df['Physical-Weight']
    df['ICW_TBW'] = df['BIA-BIA_ICW'] / df['BIA-BIA_TBW']
    
    return df

In [44]:
merge_concat_df = feature_engineering(merge_concat_df)

In [45]:
no_id_cols = [col for col in merge_concat_df.columns if col != 'id']

In [46]:
if np.any(np.isinf(merge_concat_df[no_id_cols])):
    print('inf in data')
    merge_concat_df[no_id_cols] = merge_concat_df[no_id_cols].replace([np.inf, -np.inf], np.nan)

inf in data


In [47]:
imputer = KNNImputer(n_neighbors=5)
merge_concat_df[no_id_cols] = imputer.fit_transform(merge_concat_df[no_id_cols])

In [48]:
del imputer
gc.collect()

0

In [49]:
train_df = merge_concat_df.head(train.shape[0]).reset_index(drop=True)
test_df = merge_concat_df.tail(test.shape[0]).reset_index(drop=True)

In [50]:
train_df['sii'] = train['sii']

In [51]:
del train, test
gc.collect()

0

In [52]:
train_nonan_df = train_df[~train_df['sii'].isna()].reset_index(drop=True)

In [53]:
del train_df
gc.collect()

0

In [54]:
selected_cols_df = pd.read_csv(path + 'selected_cols.csv')

In [55]:
train_cols = selected_cols_df['cols'].tolist()

In [56]:
del selected_cols_df
gc.collect()

0

In [57]:
test_cols = train_cols.copy()
test_cols.remove('sii')

In [58]:
train_final_df = train_nonan_df[train_cols].copy()
test_final_df = test_df[test_cols].copy()

In [59]:
del train_nonan_df, test_df
gc.collect()

0

In [60]:
print(train_final_df.shape)
train_final_df.head()

(2736, 105)


Unnamed: 0,id,Basic_Demos-Age,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Systolic_BP,FGC-FGC_CU,FGC-FGC_GSND,FGC-FGC_GSD,FGC-FGC_PU,BIA-BIA_Activity_Level_num,BIA-BIA_Fat,SDS-SDS_Total_Raw,PreInt_EduHx-computerinternet_hoursday,Physical-Waist_Circumference_isnan,PAQ_C-Season_isnan,enmo_mean,anglez_max,X_weekday_4_entropy,X_weekday_5_entropy,Y_weekday_4_entropy,Z_weekday_1_min,Z_weekday_4_std,Z_weekday_5_std,enmo_weekday_1_15%,enmo_weekday_3_mean,anglez_weekday_3_entropy,light_weekday_1_std,light_weekday_3_max,light_weekday_4_max,light_weekday_4_entropy,battery_use_since_wear_weekday_3_mean,battery_use_since_wear_weekday_3_30%,battery_use_since_wear_weekday_5_max,X_time_of_day_hour_window_1_sum,X_time_of_day_hour_window_2_sum,X_time_of_day_hour_window_4_mean,X_time_of_day_hour_window_15_max,X_time_of_day_hour_window_19_entropy,X_time_of_day_hour_window_22_sum,X_time_of_day_hour_window_23_mean,Y_time_of_day_hour_window_0_sum,Y_time_of_day_hour_window_2_std,Y_time_of_day_hour_window_5_entropy,Y_time_of_day_hour_window_6_mean,Y_time_of_day_hour_window_8_sum,Y_time_of_day_hour_window_9_sum,Y_time_of_day_hour_window_9_entropy,Y_time_of_day_hour_window_11_mean,Y_time_of_day_hour_window_13_3%,Y_time_of_day_hour_window_14_min,Y_time_of_day_hour_window_15_min,Y_time_of_day_hour_window_20_sum,Y_time_of_day_hour_window_21_sum,Y_time_of_day_hour_window_21_entropy,Z_time_of_day_hour_window_0_min,Z_time_of_day_hour_window_1_mean,Z_time_of_day_hour_window_2_mean,Z_time_of_day_hour_window_5_mean,Z_time_of_day_hour_window_5_sum,Z_time_of_day_hour_window_9_sum,Z_time_of_day_hour_window_11_std,Z_time_of_day_hour_window_15_std,Z_time_of_day_hour_window_15_entropy,Z_time_of_day_hour_window_16_max,Z_time_of_day_hour_window_21_mean,Z_time_of_day_hour_window_22_min,Z_time_of_day_hour_window_23_min,enmo_time_of_day_hour_window_3_std,enmo_time_of_day_hour_window_4_std,enmo_time_of_day_hour_window_8_mean,enmo_time_of_day_hour_window_14_mean,enmo_time_of_day_hour_window_15_10%,enmo_time_of_day_hour_window_16_max,enmo_time_of_day_hour_window_18_mean,enmo_time_of_day_hour_window_18_max,enmo_time_of_day_hour_window_19_10%,anglez_time_of_day_hour_window_8_entropy,anglez_time_of_day_hour_window_12_entropy,light_time_of_day_hour_window_7_std,light_time_of_day_hour_window_7_70%,light_time_of_day_hour_window_8_mean,light_time_of_day_hour_window_8_std,light_time_of_day_hour_window_10_max,light_time_of_day_hour_window_12_1%,light_time_of_day_hour_window_13_std,light_time_of_day_hour_window_13_max,light_time_of_day_hour_window_19_35%,light_time_of_day_hour_window_19_max,light_time_of_day_hour_window_20_std,light_time_of_day_hour_window_21_std,battery_use_since_wear_time_of_day_hour_window_8_mean,battery_use_since_wear_time_of_day_hour_window_14_1%,battery_use_since_wear_time_of_day_hour_window_16_min,battery_use_since_wear_time_of_day_hour_window_16_5%,battery_use_since_wear_time_of_day_hour_window_18_5%,battery_use_since_wear_timestamp_hour_window_mean_min,PAQ_C-Season_Winter,Basic_Demos-Sex_0,Basic_Demos-Sex_1,FMI_BFP,Muscle_to_Fat,sii
0,00008ff9,5.0,51.0,16.877316,46.0,50.8,25.0,130.2,0.0,20.5,21.18,0.0,2.0,9.21377,41.6,3.0,1.0,1.0,0.045088,89.296684,39.926039,34.137011,32.881972,-1.001339,0.545945,0.502684,0.002152,0.046002,26.211493,30.577977,1519.44003,929.05,9.275002,213.972368,136.85,696.71665,-211.553589,239.102159,-0.011758,1.14036,22.976666,-150.355033,-0.020638,-43.818953,0.304632,23.115072,-0.007757,-332.684457,-156.169044,52.527369,0.077756,-0.815943,-1.395185,-1.295111,-18.909606,228.457314,16.746607,-0.989829,-0.219695,-0.243437,-0.026439,232.212733,-752.21275,0.494078,0.513271,18.664069,1.46159,0.019698,-0.601909,-0.97388,0.022801,0.021542,0.044125,0.061529,0.001617,2.518643,0.052438,1.7423,0.001565,55.225293,17.738485,30.704474,38.370415,15.21469,25.508007,1343.000024,0.073666,39.113261,1403.999985,2.858217,86.159849,13.298962,24.520308,286.112347,6.2,9.6,30.2,33.334136,0.297317,0.0,1.0,0.0,0.332267,6.383063,2.0
1,000fd460,9.0,63.2,14.03559,48.0,46.0,22.0,122.0,3.0,23.08,24.94,5.0,2.0,3.97085,46.0,0.0,0.0,0.0,0.052679,89.456932,18.0037,18.304327,53.06119,-1.00069,0.529854,0.540959,0.003071,0.043606,22.763406,62.688214,1090.32002,1148.777557,8.891857,316.855106,284.86665,507.16665,-98.813717,383.965568,0.007261,1.058457,17.475854,561.547091,-0.103908,-306.613574,0.404812,25.279001,0.055939,151.477521,184.09317,59.785731,0.028424,-0.88572,-1.567546,-1.615314,-51.054434,223.56519,17.279529,-0.984249,-0.052453,-0.103421,0.090573,539.487087,-806.503931,0.577153,0.514021,18.904598,1.241939,-0.071049,-0.978247,-0.754005,0.017955,0.021796,0.058982,0.082104,0.002075,1.770745,0.057765,2.416968,0.00147,20.438158,12.446348,32.719406,13.489636,19.015883,49.706047,706.656366,0.806746,155.748372,1562.68092,3.737898,92.163605,10.445207,8.533079,251.225998,25.25,7.0,36.183301,34.116699,1.02361,0.0,1.0,0.0,0.305154,12.718037,0.0
2,00105258,10.0,71.0,16.648696,56.5,75.6,27.0,117.0,20.0,10.2,14.7,7.0,2.4,-24.811978,38.0,2.0,1.0,0.0,0.051544,89.528796,72.632825,64.814719,132.689714,-1.001535,0.58118,0.508,0.001253,0.046373,15.11326,82.795443,1914.673352,2174.52002,8.914334,280.658752,204.99334,475.76665,-406.720744,-27.948322,-0.15711,1.251344,20.592004,426.822872,-0.05508,-525.753946,0.353414,17.038335,-0.100803,-287.683429,48.40976,73.123344,0.018078,-0.865696,-2.338892,-1.792149,-189.59945,-69.776136,81.312502,-0.907346,0.00065,0.000123,-0.083917,-782.812247,-1515.544268,0.5184,0.454301,31.276516,1.612058,-0.107542,-0.997746,-0.993624,0.012943,0.013302,0.049712,0.075791,0.004364,2.637164,0.077506,3.443518,0.001415,45.355287,34.054314,65.634326,20.28821,22.907549,44.251033,2086.549777,0.118659,196.358112,1703.840015,4.245884,179.400383,10.215226,12.21795,285.961932,23.036171,9.0,49.449165,38.0,3.893122,0.0,0.0,1.0,0.24442,9.75587,0.0
3,00115b9f,9.0,71.0,18.292347,56.0,81.6,26.4,117.0,18.0,17.66,19.24,5.0,3.0,18.8243,31.0,0.0,1.0,0.0,0.047388,89.751656,11.136038,10.005599,12.51692,-1.013281,0.664023,0.581047,0.001123,0.047503,9.7659,20.560743,2509.25,2318.199951,6.922735,178.45845,95.0,359.0,170.965439,-12.88521,-0.110789,1.023125,8.228292,17.042252,-0.028194,38.194061,0.202894,22.678757,-0.112217,163.961899,712.567383,9.328824,0.16217,-0.899747,-1.449167,-1.696042,-162.726105,10.201592,113.857529,-1.007734,0.001678,0.481678,0.174127,88.456406,-556.077881,0.578686,0.545946,14.08558,0.999349,-0.258508,-0.994922,-1.004505,0.039797,0.022768,0.040718,0.045689,0.000264,3.57304,0.058005,1.27303,0.000296,8.397519,14.401575,54.115761,12.572705,17.601913,42.592899,2633.25,0.0,177.174103,2409.5,4.770065,577.5,5.821051,17.45595,145.146576,30.0,7.0,7.0,13.0,6.585714,1.0,1.0,0.0,0.224196,6.274343,1.0
4,001f3379,13.0,50.0,22.279952,59.5,112.2,32.0,102.0,12.0,16.5,17.9,6.0,2.0,67.9715,40.0,0.0,1.0,0.0,0.016461,89.476036,13.664883,28.633865,13.94132,-1.011558,0.595851,0.636366,0.0,0.010586,632.172974,19.922426,2179.0,2578.0,7.511941,139.089218,64.0,432.0,-1119.969482,-740.448975,-0.098163,1.006678,14.030548,1375.584717,0.253121,-80.10498,0.276741,14.871807,0.002315,150.408401,735.050537,15.849753,0.051869,-0.856981,-1.502655,-1.52269,100.479034,-963.386169,13.613046,-0.988334,-0.035043,-0.011308,-0.114499,-577.073242,447.497681,0.489062,0.562955,12.45776,1.126889,0.258789,-1.014619,-1.006673,0.009234,0.008684,0.020525,0.033071,0.001325,0.710641,0.01875,2.952888,0.000188,69.492538,13.413841,25.219738,10.659063,23.471333,38.886051,442.799988,0.0,274.752411,2576.600098,0.878989,1481.599976,6.074192,4.627489,279.66156,28.834326,32.0,35.0,37.666504,21.541666,0.0,0.0,1.0,0.198595,2.621003,1.0


In [61]:
print(test_final_df.shape)
test_final_df.head()

(20, 104)


Unnamed: 0,id,Basic_Demos-Age,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Systolic_BP,FGC-FGC_CU,FGC-FGC_GSND,FGC-FGC_GSD,FGC-FGC_PU,BIA-BIA_Activity_Level_num,BIA-BIA_Fat,SDS-SDS_Total_Raw,PreInt_EduHx-computerinternet_hoursday,Physical-Waist_Circumference_isnan,PAQ_C-Season_isnan,enmo_mean,anglez_max,X_weekday_4_entropy,X_weekday_5_entropy,Y_weekday_4_entropy,Z_weekday_1_min,Z_weekday_4_std,Z_weekday_5_std,enmo_weekday_1_15%,enmo_weekday_3_mean,anglez_weekday_3_entropy,light_weekday_1_std,light_weekday_3_max,light_weekday_4_max,light_weekday_4_entropy,battery_use_since_wear_weekday_3_mean,battery_use_since_wear_weekday_3_30%,battery_use_since_wear_weekday_5_max,X_time_of_day_hour_window_1_sum,X_time_of_day_hour_window_2_sum,X_time_of_day_hour_window_4_mean,X_time_of_day_hour_window_15_max,X_time_of_day_hour_window_19_entropy,X_time_of_day_hour_window_22_sum,X_time_of_day_hour_window_23_mean,Y_time_of_day_hour_window_0_sum,Y_time_of_day_hour_window_2_std,Y_time_of_day_hour_window_5_entropy,Y_time_of_day_hour_window_6_mean,Y_time_of_day_hour_window_8_sum,Y_time_of_day_hour_window_9_sum,Y_time_of_day_hour_window_9_entropy,Y_time_of_day_hour_window_11_mean,Y_time_of_day_hour_window_13_3%,Y_time_of_day_hour_window_14_min,Y_time_of_day_hour_window_15_min,Y_time_of_day_hour_window_20_sum,Y_time_of_day_hour_window_21_sum,Y_time_of_day_hour_window_21_entropy,Z_time_of_day_hour_window_0_min,Z_time_of_day_hour_window_1_mean,Z_time_of_day_hour_window_2_mean,Z_time_of_day_hour_window_5_mean,Z_time_of_day_hour_window_5_sum,Z_time_of_day_hour_window_9_sum,Z_time_of_day_hour_window_11_std,Z_time_of_day_hour_window_15_std,Z_time_of_day_hour_window_15_entropy,Z_time_of_day_hour_window_16_max,Z_time_of_day_hour_window_21_mean,Z_time_of_day_hour_window_22_min,Z_time_of_day_hour_window_23_min,enmo_time_of_day_hour_window_3_std,enmo_time_of_day_hour_window_4_std,enmo_time_of_day_hour_window_8_mean,enmo_time_of_day_hour_window_14_mean,enmo_time_of_day_hour_window_15_10%,enmo_time_of_day_hour_window_16_max,enmo_time_of_day_hour_window_18_mean,enmo_time_of_day_hour_window_18_max,enmo_time_of_day_hour_window_19_10%,anglez_time_of_day_hour_window_8_entropy,anglez_time_of_day_hour_window_12_entropy,light_time_of_day_hour_window_7_std,light_time_of_day_hour_window_7_70%,light_time_of_day_hour_window_8_mean,light_time_of_day_hour_window_8_std,light_time_of_day_hour_window_10_max,light_time_of_day_hour_window_12_1%,light_time_of_day_hour_window_13_std,light_time_of_day_hour_window_13_max,light_time_of_day_hour_window_19_35%,light_time_of_day_hour_window_19_max,light_time_of_day_hour_window_20_std,light_time_of_day_hour_window_21_std,battery_use_since_wear_time_of_day_hour_window_8_mean,battery_use_since_wear_time_of_day_hour_window_14_1%,battery_use_since_wear_time_of_day_hour_window_16_min,battery_use_since_wear_time_of_day_hour_window_16_5%,battery_use_since_wear_time_of_day_hour_window_18_5%,battery_use_since_wear_timestamp_hour_window_mean_min,PAQ_C-Season_Winter,Basic_Demos-Sex_0,Basic_Demos-Sex_1,FMI_BFP,Muscle_to_Fat
0,00008ff9,5.0,51.0,16.877316,46.0,50.8,25.0,130.2,0.0,20.5,21.18,0.0,2.0,9.21377,41.6,3.0,1.0,1.0,0.045088,89.296684,39.926039,34.137011,32.881972,-1.001339,0.545945,0.502684,0.002152,0.046002,26.211493,30.577977,1519.44003,929.05,9.275002,213.972368,136.85,696.71665,-211.553589,239.102159,-0.011758,1.14036,22.976666,-150.355033,-0.020638,-43.818953,0.304632,23.115072,-0.007757,-332.684457,-156.169044,52.527369,0.077756,-0.815943,-1.395185,-1.295111,-18.909606,228.457314,16.746607,-0.989829,-0.219695,-0.243437,-0.026439,232.212733,-752.21275,0.494078,0.513271,18.664069,1.46159,0.019698,-0.601909,-0.97388,0.022801,0.021542,0.044125,0.061529,0.001617,2.518643,0.052438,1.7423,0.001565,55.225293,17.738485,30.704474,38.370415,15.21469,25.508007,1343.000024,0.073666,39.113261,1403.999985,2.858217,86.159849,13.298962,24.520308,286.112347,6.2,9.6,30.2,33.334136,0.297317,0.0,1.0,0.0,0.332267,6.383063
1,000fd460,9.0,63.2,14.03559,48.0,46.0,22.0,122.0,3.0,23.08,24.94,5.0,2.0,3.97085,46.0,0.0,0.0,0.0,0.052679,89.456932,18.0037,18.304327,53.06119,-1.00069,0.529854,0.540959,0.003071,0.043606,22.763406,62.688214,1090.32002,1148.777557,8.891857,316.855106,284.86665,507.16665,-98.813717,383.965568,0.007261,1.058457,17.475854,561.547091,-0.103908,-306.613574,0.404812,25.279001,0.055939,151.477521,184.09317,59.785731,0.028424,-0.88572,-1.567546,-1.615314,-51.054434,223.56519,17.279529,-0.984249,-0.052453,-0.103421,0.090573,539.487087,-806.503931,0.577153,0.514021,18.904598,1.241939,-0.071049,-0.978247,-0.754005,0.017955,0.021796,0.058982,0.082104,0.002075,1.770745,0.057765,2.416968,0.00147,20.438158,12.446348,32.719406,13.489636,19.015883,49.706047,706.656366,0.806746,155.748372,1562.68092,3.737898,92.163605,10.445207,8.533079,251.225998,25.25,7.0,36.183301,34.116699,1.02361,0.0,1.0,0.0,0.305154,12.718037
2,00105258,10.0,71.0,16.648696,56.5,75.6,27.0,117.0,20.0,10.2,14.7,7.0,2.4,-24.811978,38.0,2.0,1.0,0.0,0.051544,89.528796,72.632825,64.814719,132.689714,-1.001535,0.58118,0.508,0.001253,0.046373,15.11326,82.795443,1914.673352,2174.52002,8.914334,280.658752,204.99334,475.76665,-406.720744,-27.948322,-0.15711,1.251344,20.592004,426.822872,-0.05508,-525.753946,0.353414,17.038335,-0.100803,-287.683429,48.40976,73.123344,0.018078,-0.865696,-2.338892,-1.792149,-189.59945,-69.776136,81.312502,-0.907346,0.00065,0.000123,-0.083917,-782.812247,-1515.544268,0.5184,0.454301,31.276516,1.612058,-0.107542,-0.997746,-0.993624,0.012943,0.013302,0.049712,0.075791,0.004364,2.637164,0.077506,3.443518,0.001415,45.355287,34.054314,65.634326,20.28821,22.907549,44.251033,2086.549777,0.118659,196.358112,1703.840015,4.245884,179.400383,10.215226,12.21795,285.961932,23.036171,9.0,49.449165,38.0,3.893122,0.0,0.0,1.0,0.24442,9.75587
3,00115b9f,9.0,71.0,18.292347,56.0,81.6,26.4,117.0,18.0,17.66,19.24,5.0,3.0,18.8243,31.0,0.0,1.0,0.0,0.047388,89.751656,11.136038,10.005599,12.51692,-1.013281,0.664023,0.581047,0.001123,0.047503,9.7659,20.560743,2509.25,2318.199951,6.922735,178.45845,95.0,359.0,170.965439,-12.88521,-0.110789,1.023125,8.228292,17.042252,-0.028194,38.194061,0.202894,22.678757,-0.112217,163.961899,712.567383,9.328824,0.16217,-0.899747,-1.449167,-1.696042,-162.726105,10.201592,113.857529,-1.007734,0.001678,0.481678,0.174127,88.456406,-556.077881,0.578686,0.545946,14.08558,0.999349,-0.258508,-0.994922,-1.004505,0.039797,0.022768,0.040718,0.045689,0.000264,3.57304,0.058005,1.27303,0.000296,8.397519,14.401575,54.115761,12.572705,17.601913,42.592899,2633.25,0.0,177.174103,2409.5,4.770065,577.5,5.821051,17.45595,145.146576,30.0,7.0,7.0,13.0,6.585714,1.0,1.0,0.0,0.224196,6.274343
4,0016bb22,18.0,65.8,22.367377,66.878,20.0,31.7,113.6,5.8,28.28,27.96,4.2,3.0,26.554596,65.2,2.2,1.0,1.0,0.04671,88.557333,30.512318,34.729864,46.09555,-0.601322,0.513992,0.363256,0.006451,0.059423,23.805539,45.642145,2073.289981,1052.849274,8.676221,208.150089,166.18335,586.0,-458.982312,-346.688521,-0.027934,1.01073,40.286681,-925.913538,-0.036889,311.321217,0.327984,34.068309,0.035641,-430.698573,-310.559863,21.064805,0.094592,-0.87456,-1.485158,-1.314069,179.947,302.276663,23.480862,-0.986945,-0.156401,-0.091592,-0.194812,-199.920056,-714.911292,0.376802,0.491894,11.501916,1.059301,-0.320264,-1.001321,-0.951278,0.029679,0.018844,0.060554,0.057553,0.001492,1.551091,0.04317,1.558729,0.000776,17.597503,18.880747,54.11608,13.750061,33.682291,81.571663,1129.72998,0.143672,212.683105,2043.03999,2.632023,121.178068,13.463926,14.782193,232.641109,17.101328,15.4,38.6,41.91,5.256589,0.0,0.0,1.0,0.195376,10.738625


# Parameter tuning, training, prediction & submission

In [62]:
# https://www.kaggle.com/code/ichigoe/lb0-494-with-tabnet
def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

In [63]:
def ensemble(train_data, test_data, xgb_params, lgb_params, cat_params):
    X = train_data.drop(['id', 'sii'], axis=1)
    y = train_data['sii']

    test_data = test_data.drop('id', axis=1)

    xgb_oof_non_rounded = np.zeros(len(y), dtype=float)
    lgb_oof_non_rounded = np.zeros(len(y), dtype=float)
    cat_oof_non_rounded = np.zeros(len(y), dtype=float)
    
    xgb_test_preds = np.zeros((len(test_data), n_splits), dtype=float)
    lgb_test_preds = np.zeros((len(test_data), n_splits), dtype=float)
    cat_test_preds = np.zeros((len(test_data), n_splits), dtype=float)

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=6)
    
    for fold, (train_idx, test_idx) in enumerate(SKF.split(X, y)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        xgb_model = XGBRegressor(**xgb_params)
        lgb_model = LGBMRegressor(**lgb_params)
        cat_model = CatBoostRegressor(**cat_params)

        xgb_model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], verbose=0)
        lgb_model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)])
        cat_model.fit(X_train, y_train, eval_set=(X_val, y_val), plot=True)
        
        xgb_y_val_pred = xgb_model.predict(X_val)
        lgb_y_val_pred = lgb_model.predict(X_val)
        cat_y_val_pred = cat_model.predict(X_val)

        xgb_oof_non_rounded[test_idx] = xgb_y_val_pred
        lgb_oof_non_rounded[test_idx] = lgb_y_val_pred
        cat_oof_non_rounded[test_idx] = cat_y_val_pred
        
        xgb_test_preds[:, fold] = xgb_model.predict(test_data)
        lgb_test_preds[:, fold] = lgb_model.predict(test_data)
        cat_test_preds[:, fold] = cat_model.predict(test_data)

    xgb_tpm = xgb_test_preds.mean(axis=1)
    lgb_tpm = lgb_test_preds.mean(axis=1)
    cat_tpm = cat_test_preds.mean(axis=1)

    return xgb_oof_non_rounded, lgb_oof_non_rounded, cat_oof_non_rounded, xgb_tpm, lgb_tpm, cat_tpm

In [64]:
xgb_params_df = pd.read_csv(path + 'xgb_params.csv')
lgb_params_df = pd.read_csv(path + 'lgb_params.csv')
cat_params_df = pd.read_csv(path + 'cat_params.csv')

In [65]:
def create_params_dict(params_df):
    params_dict = {}
    for col in params_df.columns:
        v = params_df[col][0]
        if type(v) == np.int64:
            v = int(v)
        params_dict[col] = v

    return params_dict

In [66]:
xgb_params_dict = create_params_dict(xgb_params_df)
lgb_params_dict = create_params_dict(lgb_params_df)
cat_params_dict = create_params_dict(cat_params_df)

In [67]:
cat_params_dict['iterations'] = 2000

In [75]:
xgb_train, lgb_train, cat_train, xgb_test, lgb_test, cat_test = ensemble(
    train_final_df, test_final_df, xgb_params_dict, lgb_params_dict, cat_params_dict
)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

In [69]:
meta_params_df = pd.read_csv(path + 'meta_params.csv')

In [70]:
meta_params_dict = create_params_dict(meta_params_df)

In [71]:
meta_params_dict

{'iterations': 1500,
 'verbose': 0,
 'task_type': 'GPU',
 'use_best_model': True,
 'depth': 3,
 'learning_rate': 0.0553304866969034,
 'l2_leaf_reg': 134,
 'early_stopping_rounds': 18,
 'bagging_temperature': 0.8803760252020252,
 'random_strength': 3.9277891225565433}

In [72]:
def meta_learner(y, xgb_train_meta, lgb_train_meta, cat_train_meta, xgb_test_meta, lgb_test_meta, cat_test_meta, meta_params):
    y = y.copy()

    X = pd.DataFrame({
        'xgb_meta': xgb_train_meta,
        'lgb_meta': lgb_train_meta,
        'cat_meta': cat_train_meta,
    })

    test_data = pd.DataFrame({
        'xgb_meta': xgb_test_meta,
        'lgb_meta': lgb_test_meta,
        'cat_meta': cat_test_meta,
    })

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=6)
    
    oof_non_rounded = np.zeros(len(y), dtype=float)
    test_preds = np.zeros((len(xgb_test_meta), n_splits))

    for fold, (train_idx, test_idx) in enumerate(SKF.split(X, y)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = CatBoostRegressor(
            **meta_params
        )
        
        model.fit(X_train, y_train, eval_set=(X_val, y_val), plot=True)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred

        train_y_preds_df = pd.DataFrame({'y':y_train, 'preds':y_train_pred})
        train_oof_initial_thresholds = train_y_preds_df.groupby('y')['preds'].mean().iloc[1:].values.tolist()

        train_KappaOPtimizer = minimize(evaluate_predictions,
                                  x0=train_oof_initial_thresholds, args=(y_train, y_train_pred), 
                                  method='Nelder-Mead')
        assert train_KappaOPtimizer.success, "Train Optimization did not converge."

        train_pred_tuned = threshold_Rounder(y_train_pred, train_KappaOPtimizer.x)
        train_Kappa = quadratic_weighted_kappa(y_train, train_pred_tuned)

        print(f"----> || Train Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {train_Kappa:.3f}{Style.RESET_ALL}")

        val_y_preds_df = pd.DataFrame({'y':y_val, 'preds':y_val_pred})
        val_oof_initial_thresholds = val_y_preds_df.groupby('y')['preds'].mean().iloc[1:].values.tolist()

        val_KappaOPtimizer = minimize(evaluate_predictions,
                                  x0=val_oof_initial_thresholds, args=(y_val, y_val_pred), 
                                  method='Nelder-Mead')
        assert val_KappaOPtimizer.success, "Val Optimization did not converge."

        val_pred_tuned = threshold_Rounder(y_val_pred, val_KappaOPtimizer.x)
        val_Kappa = quadratic_weighted_kappa(y_val, val_pred_tuned)

        print(f"----> || Val Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {val_Kappa:.3f}{Style.RESET_ALL}")

        train_optimized_val_pred_tuned = threshold_Rounder(y_val_pred, train_KappaOPtimizer.x)
        train_optimized_val_Kappa = quadratic_weighted_kappa(y_val, train_optimized_val_pred_tuned)

        print(f"----> || Train Optimized Val QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {train_optimized_val_Kappa:.3f}{Style.RESET_ALL}")

        val_optimized_train_pred_tuned = threshold_Rounder(y_train_pred, val_KappaOPtimizer.x)
        val_optimized_train_Kappa = quadratic_weighted_kappa(y_train, val_optimized_train_pred_tuned)

        print(f"----> || Val Optimized Train QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {val_optimized_train_Kappa:.3f}{Style.RESET_ALL}")
        
        test_preds[:, fold] = model.predict(test_data)

    y_preds_df = pd.DataFrame({'y':y, 'preds':oof_non_rounded})
    oof_initial_thresholds = y_preds_df.groupby('y')['preds'].mean().iloc[1:].values.tolist()

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=oof_initial_thresholds, args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tpTuned = threshold_Rounder(tpm, KappaOPtimizer.x)
    
    submission = pd.DataFrame({
        'id': test_final_df['id'],
        'sii': tpTuned
    })

    return submission

In [73]:
submission_df = meta_learner(
    train_final_df['sii'], xgb_train, lgb_train, cat_train, xgb_test, lgb_test, cat_test, meta_params_dict
)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

----> || Train Optimized QWK SCORE :: [36m[1m 0.528[0m
----> || Val Optimized QWK SCORE :: [36m[1m 0.475[0m
----> || Train Optimized Val QWK SCORE :: [36m[1m 0.512[0m
----> || Val Optimized Train QWK SCORE :: [36m[1m 0.478[0m


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

----> || Train Optimized QWK SCORE :: [36m[1m 0.536[0m
----> || Val Optimized QWK SCORE :: [36m[1m 0.525[0m
----> || Train Optimized Val QWK SCORE :: [36m[1m 0.499[0m
----> || Val Optimized Train QWK SCORE :: [36m[1m 0.518[0m


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

----> || Train Optimized QWK SCORE :: [36m[1m 0.519[0m
----> || Val Optimized QWK SCORE :: [36m[1m 0.564[0m
----> || Train Optimized Val QWK SCORE :: [36m[1m 0.533[0m
----> || Val Optimized Train QWK SCORE :: [36m[1m 0.515[0m


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

----> || Train Optimized QWK SCORE :: [36m[1m 0.523[0m
----> || Val Optimized QWK SCORE :: [36m[1m 0.561[0m
----> || Train Optimized Val QWK SCORE :: [36m[1m 0.583[0m
----> || Val Optimized Train QWK SCORE :: [36m[1m 0.507[0m


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

----> || Train Optimized QWK SCORE :: [36m[1m 0.549[0m
----> || Val Optimized QWK SCORE :: [36m[1m 0.468[0m
----> || Train Optimized Val QWK SCORE :: [36m[1m 0.458[0m
----> || Val Optimized Train QWK SCORE :: [36m[1m 0.543[0m
----> || Optimized QWK SCORE :: [36m[1m 0.525[0m


In [74]:
submission_df

Unnamed: 0,id,sii
0,00008ff9,1
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,1
5,001f3379,1
6,0038ba98,1
7,0068a485,1
8,0069fbed,2
9,0083e397,2


In [None]:
submission_df.to_csv('submission.csv', index=False)