In [1]:
# import lib
import numpy as np
import pandas as pd
import os
import re

from colorama import Fore, Style
from sklearn.base import clone, BaseEstimator, RegressorMixin
from sklearn.metrics import cohen_kappa_score, accuracy_score, mean_squared_error
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.decomposition import PCA
from sklearn.datasets import make_classification
from scipy.optimize import minimize
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam
import torch
import torch.nn as nn
import torch.optim as optim

from IPython.display import clear_output
import warnings
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.pipeline import Pipeline

import random


# 1. Initial setup and seed
setup params of LightGBM, XGBoost, CatBoost

In [2]:
warnings.filterwarnings('ignore')
pd.options.display.max_columns = None
SEED = 42
n_splits = 5

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
seed_everything(2024)

#Basic_demos_sex, cgas-cgas_score, max_stage, endurance-time_mins, fgc-fgc_srl_zone

train_featuresCols = ['Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-CGAS_Score', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T',
                'PreInt_EduHx-computerinternet_hoursday', 'sii', 'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',
                'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE', 'BMR_Weight', 'DEE_Weight',
                'SMM_Height', 'Muscle_to_Fat', 'Hydration_Status', 'ICW_TBW', 'BMI_PHR']

test_featuresCols = ['Basic_Demos-Age', 'Basic_Demos-Sex',
                'CGAS-CGAS_Score', 'Physical-BMI',
                'Physical-Height', 'Physical-Weight', 'Physical-Waist_Circumference',
                'Physical-Diastolic_BP', 'Physical-HeartRate', 'Physical-Systolic_BP',
                'FGC-FGC_CU', 'FGC-FGC_CU_Zone', 'FGC-FGC_PU',
                'FGC-FGC_PU_Zone', 'FGC-FGC_SRL', 'FGC-FGC_SRL_Zone', 'FGC-FGC_SRR',
                'FGC-FGC_SRR_Zone', 'FGC-FGC_TL', 'FGC-FGC_TL_Zone',
                'BIA-BIA_Activity_Level_num', 'BIA-BIA_BMC', 'BIA-BIA_BMI',
                'BIA-BIA_BMR', 'BIA-BIA_DEE', 'BIA-BIA_ECW', 'BIA-BIA_FFM',
                'BIA-BIA_FFMI', 'BIA-BIA_FMI', 'BIA-BIA_Fat', 'BIA-BIA_Frame_num',
                'BIA-BIA_ICW', 'BIA-BIA_LDM', 'BIA-BIA_LST', 'BIA-BIA_SMM',
                'BIA-BIA_TBW', 'SDS-SDS_Total_Raw',
                'SDS-SDS_Total_T',
                'PreInt_EduHx-computerinternet_hoursday', 'BMI_Age','Internet_Hours_Age','BMI_Internet_Hours',
                'BFP_BMI', 'FFMI_BFP', 'FMI_BFP', 'LST_TBW', 'BFP_BMR', 'BFP_DEE', 'BMR_Weight', 'DEE_Weight',
                'SMM_Height', 'Muscle_to_Fat', 'Hydration_Status', 'ICW_TBW', 'BMI_PHR']


Params = {
    'learning_rate': 0.046,
    'max_depth': 12,
    'num_leaves': 478,
    'min_data_in_leaf': 13,
    'feature_fraction': 0.893,
    'bagging_fraction': 0.784,
    'bagging_freq': 4,
    'lambda_l1': 10,  # Increased from 6.59
    'lambda_l2': 0.01,  # Increased from 2.68e-06
    'device': 'cpu',
}


XGB_Params = {
    'learning_rate': 0.05,
    'max_depth': 6,
    'n_estimators': 200,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'reg_alpha': 1,  # Increased from 0.1
    'reg_lambda': 5,  # Increased from 1
    'random_state': SEED,
    'tree_method': 'gpu_hist'
}


CatBoost_Params = {
    'learning_rate': 0.05,
    'depth': 6,
    'iterations': 200,
    'random_seed': SEED,
    'verbose': 0,
    'l2_leaf_reg': 10,  # Increase this value
    'task_type': 'GPU',
}


In [3]:
# functions

# thong ke parquet file
def pre_progress_parquet_data(file_name, parquet_dir):
    file_path = os.path.join(parquet_dir, file_name, 'part-0.parquet')
    df = pd.read_parquet(file_path)
    df.drop('step', axis=1, inplace=True)
    return df.describe().values.reshape(-1), file_name.split('=')[1]


# xu li tat ca parquet song song
# tra ve dfchua cac stat va id
def load_time_series(dirname) -> pd.DataFrame:
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: pre_progress_parquet_data(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df


**preprocess_feature**: tao cac feature moi dua tren cac feature hien co

**auto_encoder**: ma hoa va giam chieu du lieu

In [4]:
def preprocess_feature(df):
    season_cols = [col for col in df.columns if 'Season' in col]
    df = df.drop(season_cols, axis=1) 
    df['BMI_Age'] = df['Physical-BMI'] * df['Basic_Demos-Age']
    df['Internet_Hours_Age'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['Basic_Demos-Age']
    df['BMI_Internet_Hours'] = df['Physical-BMI'] * df['PreInt_EduHx-computerinternet_hoursday']
    df['BFP_BMI'] = df['BIA-BIA_Fat'] / df['BIA-BIA_BMI']
    df['FFMI_BFP'] = df['BIA-BIA_FFMI'] / df['BIA-BIA_Fat']
    df['FMI_BFP'] = df['BIA-BIA_FMI'] / df['BIA-BIA_Fat']
    df['LST_TBW'] = df['BIA-BIA_LST'] / df['BIA-BIA_TBW']
    df['BFP_BMR'] = df['BIA-BIA_Fat'] * df['BIA-BIA_BMR']
    df['BFP_DEE'] = df['BIA-BIA_Fat'] * df['BIA-BIA_DEE']
    df['BMR_Weight'] = df['BIA-BIA_BMR'] / df['Physical-Weight']
    df['DEE_Weight'] = df['BIA-BIA_DEE'] / df['Physical-Weight']
    df['SMM_Height'] = df['BIA-BIA_SMM'] / df['Physical-Height']
    df['Muscle_to_Fat'] = df['BIA-BIA_SMM'] / df['BIA-BIA_FMI']
    df['Hydration_Status'] = df['BIA-BIA_TBW'] / df['Physical-Weight']
    df['ICW_TBW'] = df['BIA-BIA_ICW'] / df['BIA-BIA_TBW']
    df['BMI_PHR'] = df['Physical-BMI'] * df['Physical-HeartRate']
    
    return df

class AutoEncoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim*3),
            nn.LeakyReLU(),
            nn.Linear(encoding_dim*3, encoding_dim*2+10),
            nn.LeakyReLU(),
            nn.Linear(encoding_dim*2+10, encoding_dim),
            nn.LeakyReLU()
        )

        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, encoding_dim+15),
            nn.LeakyReLU(),
            nn.Linear(encoding_dim+15, encoding_dim*3),
            nn.LeakyReLU(),
            nn.Linear(encoding_dim*3, input_dim),
            nn.Sigmoid()
        )
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# encoder du lieu
def auto_encoder(df, encoding_dim=50, epochs=50, batch_size=32):
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df)
    
    data_tensor = torch.FloatTensor(df_scaled)
    
    input_dim = data_tensor.shape[1]
    autoencoder = AutoEncoder(input_dim, encoding_dim)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(autoencoder.parameters())
    
    for epoch in range(epochs):
        for i in range(0, len(data_tensor), batch_size):
            batch = data_tensor[i : i + batch_size]
            optimizer.zero_grad()
            reconstructed = autoencoder(batch)
            loss = criterion(reconstructed, batch)
            loss.backward()
            optimizer.step()
            
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}]')
                 
    with torch.no_grad():
        encoded_data = autoencoder.encoder(data_tensor).numpy()
        
    df_encoded = pd.DataFrame(encoded_data, columns=[f'Enc_{i + 1}' for i in range(encoded_data.shape[1])])
    
    return df_encoded
        

In [5]:
# data loader
train = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')
#test = train.head(20)

import pandas as pd
import numpy as np

# Giả sử `df` là bảng dữ liệu
train_non_id = train.drop("id", axis=1)
train_non_id = preprocess_feature(train_non_id)

train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")
#test_ts = train_ts.head(20)
df_train = train_ts.drop('id', axis=1) 
df_test = test_ts.drop('id', axis=1) # bo cot id


encoded_df_train = auto_encoder(df_train)
encoded_df_test = auto_encoder(df_test)

# chuyen ve dang dataframe
train_ts_encoded = pd.DataFrame(encoded_df_train, columns=[f'Enc_{i + 1}' for i in range(encoded_df_train.shape[1])])
test_ts_encoded = pd.DataFrame(encoded_df_test, columns=[f'Enc_{i + 1}' for i in range(encoded_df_test.shape[1])])

time_series_cols = train_ts_encoded.columns.tolist()
train_ts_encoded["id"]=train_ts["id"]
test_ts_encoded['id']=test_ts["id"]

train = pd.merge(train, train_ts_encoded, how="left", on='id')
test = pd.merge(test, test_ts_encoded, how="left", on='id')

100%|██████████| 996/996 [01:11<00:00, 13.86it/s]
100%|██████████| 2/2 [00:00<00:00, 11.42it/s]


Epoch [10/50], Loss: 1.5837]
Epoch [20/50], Loss: 1.5409]
Epoch [30/50], Loss: 1.5265]
Epoch [40/50], Loss: 1.5112]
Epoch [50/50], Loss: 1.5038]
Epoch [10/50], Loss: 1.0764]
Epoch [20/50], Loss: 0.8212]
Epoch [30/50], Loss: 0.4822]
Epoch [40/50], Loss: 0.4271]
Epoch [50/50], Loss: 0.4271]


In [6]:
# correlation_matrix = train_non_id.corr()

# # Vẽ biểu đồ heatmap (tùy chọn)
# import seaborn as sns
# import matplotlib.pyplot as plt
# plt.figure(figsize=(40, 40))  # Thay đổi kích thước (20x20 là ví dụ)
# sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm', cbar=True)

# plt.title("Correlation Matrix", fontsize=16)
# plt.show()

In [7]:
# imputer de fill gia tri nan

train.replace([np.inf, -np.inf], np.nan, inplace=True)
test.replace([np.inf, -np.inf], np.nan, inplace=True)

imputer = KNNImputer(n_neighbors=6)

numeric_cols_train = train.select_dtypes(include=['int32', 'int64', 'float64']).columns
numeric_cols_test = test.select_dtypes(include=['int32', 'int64', 'float64']).columns

imputed_train_data = imputer.fit_transform(train[numeric_cols_train])
imputed_test_data = imputer.fit_transform(test[numeric_cols_test])

train_imputed = pd.DataFrame(imputed_train_data, columns=numeric_cols_train)
test_imputed = pd.DataFrame(imputed_test_data, columns=numeric_cols_test)

train_imputed['sii'] = train_imputed['sii'].round().astype(int)
for col in train.columns:
    if col not in numeric_cols_train:
        train_imputed[col] = train[col]

for col in test.columns:
    if col not in numeric_cols_test:
        test_imputed[col] = test[col]

train = train_imputed
test = test_imputed

train = preprocess_feature(train)
train = train.dropna(thresh=10, axis=0)
test = preprocess_feature(test)

In [8]:
train_featuresCols += time_series_cols

train = train[train_featuresCols]
# drop cac ban ghi co sii nan
train = train.dropna(subset='sii') 

test_featuresCols += time_series_cols
test = test[test_featuresCols]

# con cai nao to qua thi nan
if np.any(np.isinf(train)):
    train = train.replace([np.inf, -np.inf], np.nan)

if np.any(np.isinf(test)):
    test = test.replace([np.inf, -np.inf], np.nan)

In [9]:
def eval_models(thresholds, y, y_predict_non_rounded):
    y_predic = np.where(y_predict_non_rounded < thresholds[0], 0,
                    np.where(y_predict_non_rounded < thresholds[1], 1,
                             np.where(y_predict_non_rounded < thresholds[2], 2, 3)))
    return -cohen_kappa_score(y, y_predic, weights='quadratic')

def train_models(model_class, test_data):
    X = train.drop(['sii'], axis=1)
    y = train['sii']

    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]

        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        y_train_pred_rounded = y_train_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = cohen_kappa_score(y_train, y_train_pred_rounded, weights='quadratic')        
        val_kappa = cohen_kappa_score(y_val, y_val_pred_rounded, weights='quadratic')

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        clear_output(wait=True)

    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(eval_models,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = np.where(oof_non_rounded < (KappaOPtimizer.x)[0], 0,
                    np.where(oof_non_rounded < (KappaOPtimizer.x)[1], 1,
                             np.where(oof_non_rounded < (KappaOPtimizer.x)[2], 2, 3)))
    
    tKappa = cohen_kappa_score(y, oof_tuned, weights='quadratic')

    print(f"----> || Optimized SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tpTuned = np.where(tpm < (KappaOPtimizer.x)[0], 0,
                    np.where(tpm < (KappaOPtimizer.x)[1], 1,
                             np.where(tpm < (KappaOPtimizer.x)[2], 2, 3)))
    
    submission = pd.DataFrame({
        'id': sample['id'],
        'sii': tpTuned
    })

    return submission

In [10]:
# loading model
Light = LGBMRegressor(**Params, random_state=SEED, verbose=-1, n_estimators=300)
XGB_Model = XGBRegressor(**XGB_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Params)

voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model),
],weights=[4.0,4.0,6.0])

In [11]:
train

Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,FGC-FGC_CU,FGC-FGC_CU_Zone,FGC-FGC_PU,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_SRR,FGC-FGC_SRR_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-computerinternet_hoursday,sii,BMI_Age,Internet_Hours_Age,BMI_Internet_Hours,BFP_BMI,FFMI_BFP,FMI_BFP,LST_TBW,BFP_BMR,BFP_DEE,BMR_Weight,DEE_Weight,SMM_Height,Muscle_to_Fat,Hydration_Status,ICW_TBW,BMI_PHR,Enc_1,Enc_2,Enc_3,Enc_4,Enc_5,Enc_6,Enc_7,Enc_8,Enc_9,Enc_10,Enc_11,Enc_12,Enc_13,Enc_14,Enc_15,Enc_16,Enc_17,Enc_18,Enc_19,Enc_20,Enc_21,Enc_22,Enc_23,Enc_24,Enc_25,Enc_26,Enc_27,Enc_28,Enc_29,Enc_30,Enc_31,Enc_32,Enc_33,Enc_34,Enc_35,Enc_36,Enc_37,Enc_38,Enc_39,Enc_40,Enc_41,Enc_42,Enc_43,Enc_44,Enc_45,Enc_46,Enc_47,Enc_48,Enc_49,Enc_50
0,5.0,0.0,51.000000,16.877316,46.000000,50.800000,23.000000,62.166667,84.500000,107.666667,0.0,0.000000,0.000000,0.000000,7.000000,0.000000,6.000000,0.000000,6.000000,1.000000,2.000000,2.668550,16.879200,932.498000,1492.000000,8.255980,41.586200,13.817700,3.061430,9.213770,1.000000,24.434900,8.895360,38.917700,19.541300,32.690900,46.166667,60.166667,3.0,2,84.386578,15.0,50.631947,0.545865,1.499679,0.332267,1.190475,8591.822097,13746.944840,18.356260,29.370079,0.424811,6.383063,0.643522,0.747453,1426.133176,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,9.0,0.0,65.000000,14.035590,48.000000,46.000000,22.000000,75.000000,70.000000,122.000000,3.0,0.000000,5.000000,0.000000,11.000000,1.000000,11.000000,1.000000,3.000000,0.000000,2.000000,2.579490,14.037100,936.656000,1498.650000,6.019930,42.029100,12.825400,1.211720,3.970850,1.000000,21.035200,14.974000,39.449700,15.410700,27.055200,46.000000,64.000000,0.0,0,126.320313,0.0,0.000000,0.282883,3.229888,0.305154,1.458119,3719.320478,5950.914352,20.362087,32.579348,0.321056,12.718037,0.588157,0.777492,982.491320,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,10.0,1.0,71.000000,16.648696,56.500000,75.600000,25.000000,65.000000,94.000000,117.000000,20.0,1.000000,7.000000,1.000000,10.000000,1.000000,10.000000,1.000000,5.000000,0.000000,2.666667,3.324367,18.689850,1080.290500,1843.476667,15.851497,57.328083,14.669483,4.020353,16.171908,2.666667,28.177517,13.299080,54.003700,26.326717,44.029000,38.000000,54.000000,2.0,0,166.486961,20.0,33.297392,0.865278,0.907097,0.248601,1.226548,17470.358939,29812.535668,14.289557,24.384612,0.465960,6.548359,0.582394,0.639976,1564.977430,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,9.0,0.0,71.000000,18.292347,56.000000,81.600000,25.666667,60.000000,97.000000,117.000000,18.0,1.000000,5.000000,0.000000,7.000000,0.000000,7.000000,0.000000,7.000000,1.000000,3.000000,3.841910,18.294300,1131.430000,1923.440000,15.592500,62.775700,14.074000,4.220330,18.824300,2.000000,30.404100,16.779000,58.933800,26.479800,45.996600,31.000000,45.000000,0.0,1,164.631122,0.0,0.000000,1.028971,0.747651,0.224196,1.281264,21298.377749,36207.411592,13.865564,23.571569,0.472854,6.274343,0.563684,0.661008,1774.357653,8.329063,1.990211,7.612260,7.601535,9.108830,4.181010,5.54880,-0.075382,8.268166,6.624942,10.878230,3.629679,5.432630,-0.050110,5.599047,8.823846,4.702280,4.612934,-0.042706,8.717288,0.480118,4.182630,5.313142,5.986210,0.156017,7.876768,3.438159,-0.011624,0.684995,1.596176,4.939295,6.175050,6.526182,1.529165,-0.038657,1.157266,1.088691,0.400643,-0.016490,3.483673,3.280748,-0.056910,5.718052,-0.064816,1.947562,0.318691,6.438976,0.321885,0.645534,2.004941
4,18.0,1.0,66.166667,25.736403,63.033333,123.566667,33.166667,71.333333,77.333333,121.166667,14.0,0.333333,1.333333,0.000000,8.416667,0.500000,7.916667,0.500000,9.833333,0.666667,2.500000,4.554575,25.469817,1398.953333,2165.610000,30.194433,91.270050,15.899133,9.570722,54.529950,2.333333,35.549250,25.526433,86.715600,46.760600,65.743667,42.833333,60.000000,2.5,1,463.255245,45.0,64.341006,2.140964,0.291567,0.175513,1.318995,76284.855319,118090.605020,11.321446,17.525843,0.741839,4.885797,0.532050,0.540725,1990.281795,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3955,13.0,0.0,60.000000,16.362460,59.500000,82.400000,25.166667,71.000000,70.000000,104.000000,16.0,0.000000,10.000000,1.000000,8.000000,1.000000,9.000000,1.000000,12.000000,1.000000,3.000000,4.522770,16.364200,1206.880000,2051.700000,19.461100,70.811700,14.062900,2.301380,11.588300,1.000000,33.370900,17.979700,66.288900,29.779000,52.832000,35.000000,50.000000,1.0,1,212.711984,13.0,16.362460,0.708149,1.213543,0.198595,1.254711,13985.687504,23775.715110,14.646602,24.899272,0.500487,12.939628,0.641165,0.631642,1145.372220,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3956,10.0,0.0,61.333333,18.764678,53.500000,76.400000,27.000000,60.000000,78.000000,118.000000,0.0,0.000000,4.000000,0.000000,0.000000,0.000000,0.000000,0.000000,12.000000,1.000000,3.166667,3.552855,18.750433,1107.977167,2046.053333,16.130380,60.277133,14.563650,4.186780,17.556195,2.500000,29.387450,14.759307,56.724267,27.810833,45.517817,37.333333,53.166667,0.0,0,187.646781,0.0,0.000000,0.936309,0.829545,0.238479,1.246199,19451.863194,35920.911300,14.502319,26.780803,0.519829,6.642535,0.595783,0.645625,1463.644895,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3957,11.0,0.0,68.000000,21.441500,60.000000,109.800000,29.166667,79.000000,99.000000,116.000000,15.0,1.000000,0.000000,0.000000,10.000000,1.000000,10.000000,1.000000,14.000000,1.000000,2.000000,4.413050,21.443800,1253.740000,2005.990000,20.482500,75.803300,14.804300,6.639520,33.996700,2.000000,33.980500,21.340300,71.390300,28.779200,54.463000,56.000000,77.000000,0.0,1,235.856500,0.0,0.000000,1.585386,0.435463,0.195299,1.310804,42623.022658,68197.040233,11.418397,18.269490,0.479653,4.334530,0.496020,0.623919,2122.708500,5.600250,0.488724,1.413275,-0.032146,0.005404,-0.021304,2.78255,-0.056478,4.626410,7.742833,-0.006812,-0.019294,5.491472,-0.036490,3.972045,1.718989,6.211131,-0.020963,-0.071319,3.889128,1.315141,-0.009577,6.280964,0.795929,12.009753,2.359829,6.574596,-0.035731,11.397275,3.757905,9.290610,8.080282,-0.014613,7.499831,-0.097724,2.565684,4.102528,3.731076,2.746559,1.959653,-0.046649,-0.050347,11.379966,2.285534,1.852803,7.524246,5.504507,7.604279,-0.006682,3.056946
3958,13.0,0.0,70.000000,12.235895,70.700000,87.000000,27.166667,59.000000,61.000000,113.000000,17.0,0.666667,6.833333,0.500000,9.666667,0.500000,10.416667,0.833333,11.666667,1.000000,4.000000,6.661680,12.237200,1414.340000,2970.120000,26.532300,92.909200,13.068400,-0.831170,-5.909170,2.000000,41.371500,25.005400,86.247500,45.434000,67.903800,33.000000,47.000000,1.0,0,159.066638,13.0,12.235895,-0.482886,-2.211546,0.140658,1.270142,-8357.575498,-17550.944000,16.256782,34.139310,0.642631,-54.662704,0.780503,0.609266,746.389610,4.699757,-0.056717,8.999930,6.690673,-0.022400,-0.021678,4.03145,3.224769,5.772193,0.377203,-0.028969,4.520741,-0.034374,-0.062175,-0.017686,-0.023115,1.327072,-0.003779,-0.036170,1.548599,-0.034750,3.686563,6.755351,-0.007463,17.407072,5.012505,-0.001587,-0.039810,8.690086,2.608233,0.997701,2.667798,0.329496,4.419621,-0.022733,7.114868,4.847923,5.884799,6.202555,2.775476,-0.029588,-0.103709,0.941461,-0.005393,6.290661,9.448783,5.204377,12.714284,3.779461,5.810829


In [12]:
test

Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,FGC-FGC_CU,FGC-FGC_CU_Zone,FGC-FGC_PU,FGC-FGC_PU_Zone,FGC-FGC_SRL,FGC-FGC_SRL_Zone,FGC-FGC_SRR,FGC-FGC_SRR_Zone,FGC-FGC_TL,FGC-FGC_TL_Zone,BIA-BIA_Activity_Level_num,BIA-BIA_BMC,BIA-BIA_BMI,BIA-BIA_BMR,BIA-BIA_DEE,BIA-BIA_ECW,BIA-BIA_FFM,BIA-BIA_FFMI,BIA-BIA_FMI,BIA-BIA_Fat,BIA-BIA_Frame_num,BIA-BIA_ICW,BIA-BIA_LDM,BIA-BIA_LST,BIA-BIA_SMM,BIA-BIA_TBW,SDS-SDS_Total_Raw,SDS-SDS_Total_T,PreInt_EduHx-computerinternet_hoursday,BMI_Age,Internet_Hours_Age,BMI_Internet_Hours,BFP_BMI,FFMI_BFP,FMI_BFP,LST_TBW,BFP_BMR,BFP_DEE,BMR_Weight,DEE_Weight,SMM_Height,Muscle_to_Fat,Hydration_Status,ICW_TBW,BMI_PHR,Enc_1,Enc_2,Enc_3,Enc_4,Enc_5,Enc_6,Enc_7,Enc_8,Enc_9,Enc_10,Enc_11,Enc_12,Enc_13,Enc_14,Enc_15,Enc_16,Enc_17,Enc_18,Enc_19,Enc_20,Enc_21,Enc_22,Enc_23,Enc_24,Enc_25,Enc_26,Enc_27,Enc_28,Enc_29,Enc_30,Enc_31,Enc_32,Enc_33,Enc_34,Enc_35,Enc_36,Enc_37,Enc_38,Enc_39,Enc_40,Enc_41,Enc_42,Enc_43,Enc_44,Enc_45,Enc_46,Enc_47,Enc_48,Enc_49,Enc_50
0,5.0,0.0,51.0,16.877316,46.0,50.8,25.4,66.5,80.5,107.5,0.0,0.0,0.0,0.0,7.0,0.0,6.0,0.0,6.0,1.0,2.0,2.66855,16.8792,932.498,1492.0,8.25598,41.5862,13.8177,3.06143,9.21377,1.0,24.4349,8.89536,38.9177,19.5413,32.6909,39.5,55.833333,3.0,84.386578,15.0,50.631947,0.545865,1.499679,0.332267,1.190475,8591.822097,13746.94484,18.35626,29.370079,0.424811,6.383063,0.643522,0.747453,1358.623913,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,9.0,0.0,63.166667,14.03559,48.0,46.0,22.0,75.0,70.0,122.0,3.0,0.0,5.0,0.0,11.0,1.0,11.0,1.0,3.0,0.0,2.0,2.57949,14.0371,936.656,1498.65,6.01993,42.0291,12.8254,1.21172,3.97085,1.0,21.0352,14.974,39.4497,15.4107,27.0552,46.0,64.0,0.0,126.320313,0.0,0.0,0.282883,3.229888,0.305154,1.458119,3719.320478,5950.914352,20.362087,32.579348,0.321056,12.718037,0.588157,0.777492,982.49132,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,10.0,1.0,71.0,16.648696,56.5,75.6,25.4,65.0,94.0,117.0,20.0,1.0,7.0,1.0,10.0,1.0,10.0,1.0,5.0,0.0,2.833333,3.496302,17.404817,1070.525667,1880.245,14.484752,56.288233,14.012633,3.392192,14.178437,1.5,27.347717,14.455743,52.79195,24.385267,41.8325,38.0,54.0,2.0,166.486961,20.0,33.297392,0.814627,0.988306,0.23925,1.261984,15178.380365,26658.93465,14.160392,24.870966,0.431598,7.188646,0.55334,0.653743,1564.97743,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,9.0,0.0,71.0,18.292347,56.0,81.6,25.4,60.0,97.0,117.0,18.0,1.0,5.0,0.0,7.0,0.0,7.0,0.0,7.0,1.0,3.0,3.84191,18.2943,1131.43,1923.44,15.5925,62.7757,14.074,4.22033,18.8243,2.0,30.4041,16.779,58.9338,26.4798,45.9966,31.0,45.0,0.0,164.631122,0.0,0.0,1.028971,0.747651,0.224196,1.281264,21298.377749,36207.411592,13.865564,23.571569,0.472854,6.274343,0.563684,0.661008,1774.357653,-0.010104,15.968156,0.961367,-0.02136,11.445395,-0.075853,7.942317,6.920491,11.948594,-0.104204,15.3601,1.863419,9.896029,8.360224,16.795908,-0.090223,-0.08806,-0.103157,-0.049872,0.15492,0.752912,-0.052186,-0.069788,-0.072998,-0.033057,15.43061,-0.080733,-0.011817,0.971661,7.25104,-0.061932,14.31653,-0.020055,-0.066035,6.30823,13.130306,10.19689,-0.11998,5.708042,15.951609,8.277955,-0.031738,5.621408,-0.037216,-0.090588,1.248585,6.013106,14.484802,8.777797,7.557142
4,18.0,1.0,61.5,18.717711,58.0,89.533333,25.4,73.166667,81.333333,125.166667,11.833333,0.333333,6.666667,0.333333,5.833333,0.5,6.583333,0.666667,8.416667,0.5,2.833333,3.94533,20.020883,1159.069333,2016.448333,18.341138,65.719217,14.592667,5.428227,25.680792,1.833333,29.973483,17.404567,61.7739,28.0763,48.31465,37.666667,53.333333,1.166667,336.918804,21.0,21.83733,1.2827,0.568233,0.211373,1.278575,29765.818077,51783.989555,12.945674,22.521761,0.484074,5.172278,0.539628,0.620381,1522.373853,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,13.0,1.0,50.0,22.279952,59.5,112.2,25.4,60.0,73.0,102.0,12.0,0.0,6.0,0.0,10.0,1.0,11.0,1.0,8.0,0.0,2.0,4.33036,30.1865,1330.97,1996.45,30.2124,84.0285,16.6877,13.4988,67.9715,2.0,32.9141,20.902,79.6982,35.3804,63.1265,40.0,56.0,0.0,289.639376,0.0,0.0,2.251718,0.24551,0.198595,1.262516,90468.027355,135701.701175,11.862478,17.793672,0.594629,2.621003,0.562625,0.521399,1626.436495,18.231829,-0.07791,0.563911,13.779099,11.305266,14.550802,16.029932,6.212932,-0.052937,-0.020996,15.982466,-0.039786,-0.104878,-0.020118,-0.029154,0.164837,-0.114766,10.904015,12.139852,-0.112333,15.040173,16.327595,-0.091133,12.181795,-0.140619,-0.095207,17.37582,18.757954,4.016899,-0.029892,14.365492,-0.057817,7.40507,15.172128,-0.136247,-0.060643,-0.049377,9.988453,-0.067383,-0.082022,-0.067903,3.691231,-0.098174,-0.089461,24.729626,-0.041609,-0.093773,-0.088775,-0.074994,-0.025052
6,10.0,0.0,64.833333,19.66076,55.0,84.6,25.4,123.0,83.0,163.0,9.0,1.0,2.0,0.0,11.0,1.0,11.0,1.0,11.0,1.0,2.0,3.78271,19.6629,1135.86,1817.38,16.3275,63.247,14.7,4.96291,21.353,2.0,30.8936,16.0259,59.4643,26.1957,47.2211,27.0,40.0,3.0,196.607603,30.0,58.982281,1.085954,0.688428,0.232422,1.259274,24254.01858,38806.51514,13.426241,21.482033,0.476285,5.278294,0.558169,0.654233,1631.843107,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,10.0,1.0,64.833333,16.861286,59.25,84.2,27.0,71.0,90.0,116.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,3.0,4.05726,16.8631,1180.04,1888.06,21.94,67.9527,13.6092,3.25395,16.2474,2.0,28.5367,17.476,63.8954,28.768,50.4767,35.0,50.166667,2.0,168.612865,20.0,33.722573,0.963488,0.837623,0.200275,1.265839,19172.581896,30676.066044,14.014727,22.423515,0.485536,8.840947,0.599486,0.565344,1517.515782,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,15.0,0.0,61.5,18.678979,58.0,89.366667,25.4,73.166667,81.666667,125.166667,12.5,0.5,7.0,0.333333,7.666667,0.666667,8.416667,0.833333,8.75,0.5,2.833333,3.94533,20.020883,1159.069333,2016.448333,18.341138,65.719217,14.592667,5.428227,25.680792,1.833333,29.973483,17.404567,61.7739,28.0763,48.31465,37.666667,53.333333,2.0,280.184678,30.0,37.357957,1.2827,0.568233,0.211373,1.278575,29765.818077,51783.989555,12.969817,22.563764,0.484074,5.172278,0.540634,0.620381,1525.449913,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,19.0,1.0,61.5,18.717711,58.0,89.533333,25.4,73.166667,81.333333,125.166667,11.833333,0.333333,6.666667,0.333333,5.833333,0.5,6.583333,0.666667,8.416667,0.5,2.833333,3.94533,20.020883,1159.069333,2016.448333,18.341138,65.719217,14.592667,5.428227,25.680792,1.833333,29.973483,17.404567,61.7739,28.0763,48.31465,37.666667,53.333333,1.166667,355.636515,22.166667,21.83733,1.2827,0.568233,0.211373,1.278575,29765.818077,51783.989555,12.945674,22.521761,0.484074,5.172278,0.539628,0.620381,1522.373853,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [13]:
# train
sample = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/sample_submission.csv')
Submission1 = train_models(voting_model, test)

# Save submission
Submission1.to_csv('submission.csv', index=False)

Training Folds: 100%|██████████| 5/5 [00:27<00:00,  5.57s/it]

Mean Train QWK --> 0.7701
Mean Validation QWK ---> 0.4857
----> || Optimized SCORE :: [36m[1m 0.540[0m





In [14]:
Submission1

Unnamed: 0,id,sii
0,00008ff9,1
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,1
5,001f3379,1
6,0038ba98,1
7,0068a485,0
8,0069fbed,1
9,0083e397,1


In [15]:
Submission1['sii'].value_counts()

sii
1    10
0    10
Name: count, dtype: int64