In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/child-mind-institute-problematic-internet-use/sample_submission.csv
/kaggle/input/child-mind-institute-problematic-internet-use/data_dictionary.csv
/kaggle/input/child-mind-institute-problematic-internet-use/train.csv
/kaggle/input/child-mind-institute-problematic-internet-use/test.csv
/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet/id=00115b9f/part-0.parquet
/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet/id=001f3379/part-0.parquet
/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet/id=0745c390/part-0.parquet
/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet/id=eaab7a96/part-0.parquet
/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet/id=8ec2cc63/part-0.parquet
/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet/id=b2987a65/part-0.parquet
/kaggle/input/child-mind-institute-problematic-intern

In [2]:
import numpy as np
import pandas as pd
from colorama import Fore, Style

from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor

from sklearn.impute import SimpleImputer
from sklearn.base import clone
from sklearn.preprocessing import StandardScaler
from sklearn.impute import KNNImputer
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import cohen_kappa_score
from scipy.optimize import minimize
from sklearn.ensemble import VotingRegressor

import torch
import os
import torch.nn as nn
from concurrent.futures import ThreadPoolExecutor
import torch.optim as optim
from keras.optimizers import Adam

from tqdm import tqdm

In [3]:
df = pd.read_csv("/kaggle/input/child-mind-institute-problematic-internet-use/train.csv")
df = df.dropna(subset=['sii']) # keeping labeled values only
test = pd.read_csv("/kaggle/input/child-mind-institute-problematic-internet-use/test.csv")

In [4]:
season_mapping = {
    'Winter': -1,
    'Spring': -0.5,
    'Summer': 0.5,
    'Fall': 1
}

In [5]:
# mapping non-string values
df = df.replace(season_mapping)
test = test.replace(season_mapping)

  df = df.replace(season_mapping)
  test = test.replace(season_mapping)


In [6]:
# # dropping questions not in test dataset
test_missing_columns = set(df.columns) - set(test.columns)
for col in test_missing_columns:
    if col != 'sii':  # Retain the target column for training
        df.drop(columns=col, inplace=True)
# for later use
train_ids = df['id']
test_ids = test['id']
train_labels = df['sii']
df = df.drop(columns=['id'])

In [7]:
# Dropping weakly correlated features
df = df.drop(columns=['sii'])

In [8]:
# KNN missing data imputation
imputer = KNNImputer(n_neighbors=4)  # k=4
imputed_data = imputer.fit_transform(df)

train = pd.DataFrame(imputed_data, columns=df.columns)

In [9]:
# Parquet file opening functions

def load_time_series(dirname) -> pd.DataFrame:
    # opening parquet files and returning dataframe.
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df

def process_file(filename, dirname):
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    df.drop('step', axis=1, inplace=True)
    return df.describe().values.reshape(-1), filename.split('=')[1]

In [10]:
def feature_engineering(df):
    season_cols = [col for col in df.columns if 'Season' in col]
    df = df.drop(season_cols, axis=1) 

    #Age
    df['BMI_Age'] = df['Physical-BMI'] * df['Basic_Demos-Age']
    df['Internet_Hours_Age'] = df['PreInt_EduHx-computerinternet_hoursday'] * df['Basic_Demos-Age']

    #BMI
    df['BMI_Internet_Hours'] = df['Physical-BMI'] * df['PreInt_EduHx-computerinternet_hoursday']
    df['BFP_BMI'] = df['BIA-BIA_Fat'] / df['BIA-BIA_BMI']
    df['FFMI_BFP'] = df['BIA-BIA_FFMI'] / df['BIA-BIA_Fat']
    df['FMI_BFP'] = df['BIA-BIA_FMI'] / df['BIA-BIA_Fat']
    df['LST_TBW'] = df['BIA-BIA_LST'] / df['BIA-BIA_TBW']
    df['BFP_BMR'] = df['BIA-BIA_Fat'] * df['BIA-BIA_BMR']
    df['BFP_DEE'] = df['BIA-BIA_Fat'] * df['BIA-BIA_DEE']
    df['BMR_Weight'] = df['BIA-BIA_BMR'] / df['Physical-Weight']
    df['DEE_Weight'] = df['BIA-BIA_DEE'] / df['Physical-Weight']
    df['SMM_Height'] = df['BIA-BIA_SMM'] / df['Physical-Height']
    df['Muscle_to_Fat'] = df['BIA-BIA_SMM'] / df['BIA-BIA_FMI']
    df['Hydration_Status'] = df['BIA-BIA_TBW'] / df['Physical-Weight']
    df['ICW_TBW'] = df['BIA-BIA_ICW'] / df['BIA-BIA_TBW']
    
    return df

In [11]:
train = feature_engineering(train)
train = train.dropna(thresh=10, axis=0)
test = feature_engineering(test)
train = train.replace([np.inf, -np.inf], 0)
test = test.replace([np.inf, -np.inf], 0)

Autoencoder for accelerometer data -Compressing accelerometer tabular data to N features and merging them on original dataset.

In [12]:
class AutoEncoder(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim*3),
            nn.ReLU(),
            nn.Linear(encoding_dim*3, encoding_dim*2),
            nn.ReLU(),
            nn.Linear(encoding_dim*2, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, input_dim*2),
            nn.ReLU(),
            nn.Linear(input_dim*2, input_dim*3),
            nn.ReLU(),
            nn.Linear(input_dim*3, input_dim),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
def perform_autoencoder(df, encoding_dim=50, epochs=50, batch_size=32):
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df)
    
    data_tensor = torch.FloatTensor(df_scaled)
    
    input_dim = data_tensor.shape[1]
    autoencoder = AutoEncoder(input_dim, encoding_dim)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(autoencoder.parameters())
    
    for epoch in range(epochs):
        for i in range(0, len(data_tensor), batch_size):
            batch = data_tensor[i : i + batch_size]
            optimizer.zero_grad()
            reconstructed = autoencoder(batch)
            loss = criterion(reconstructed, batch)
            loss.backward()
            optimizer.step()
            
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}]')
                 
    with torch.no_grad():
        encoded_data = autoencoder.encoder(data_tensor).numpy()
        
    df_encoded = pd.DataFrame(encoded_data, columns=[f'Enc_{i + 1}' for i in range(encoded_data.shape[1])])
    
    return df_encoded

In [13]:
# qwk score
def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

In [14]:
# threshold rounder
def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(oof_non_rounded < thresholds[0], 0,
                    np.where(oof_non_rounded < thresholds[1], 1,
                             np.where(oof_non_rounded < thresholds[2], 2, 3)))

In [15]:
# prediction evaluation using qwk function
def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

In [16]:
## accelerometer data
print('Loading train timeseries data...')
train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
print('Loading test timeseries data...')
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")
print(f'Shape of Train accelerometer data: {train_ts.shape}')
print(f'Shape of Test accelerometer data: {test_ts.shape}')

Loading train timeseries data...


100%|██████████| 996/996 [01:11<00:00, 13.94it/s]


Loading test timeseries data...


100%|██████████| 2/2 [00:00<00:00,  9.82it/s]

Shape of Train accelerometer data: (996, 97)
Shape of Test accelerometer data: (2, 97)





In [17]:
df_train_ts = train_ts.drop('id', axis=1)
df_test_ts = test_ts.drop('id', axis=1)

In [18]:
train_ts_encoded = perform_autoencoder(df_train_ts, encoding_dim=80, epochs=100, batch_size=32)
test_ts_encoded = perform_autoencoder(df_test_ts, encoding_dim=80, epochs=100, batch_size=32)

Epoch [10/100], Loss: 1.6774]
Epoch [20/100], Loss: 1.6272]
Epoch [30/100], Loss: 1.5621]
Epoch [40/100], Loss: 1.5216]
Epoch [50/100], Loss: 1.5154]
Epoch [60/100], Loss: 1.5185]
Epoch [70/100], Loss: 1.5122]
Epoch [80/100], Loss: 1.5147]
Epoch [90/100], Loss: 1.5092]
Epoch [100/100], Loss: 1.4903]
Epoch [10/100], Loss: 0.9727]
Epoch [20/100], Loss: 0.5474]
Epoch [30/100], Loss: 0.4271]
Epoch [40/100], Loss: 0.4271]
Epoch [50/100], Loss: 0.4271]
Epoch [60/100], Loss: 0.4271]
Epoch [70/100], Loss: 0.4271]
Epoch [80/100], Loss: 0.4271]
Epoch [90/100], Loss: 0.4271]
Epoch [100/100], Loss: 0.4271]


In [19]:
time_series_cols = train_ts_encoded.columns.tolist()
train_ts_encoded["id"]=train_ts["id"]
test_ts_encoded['id']=test_ts["id"]

In [20]:
# re-adding primary keys
train['id'] = train_ids
test['id'] = test_ids
train = pd.merge(train, train_ts_encoded, how="left", on='id')
test = pd.merge(test, test_ts_encoded, how="left", on='id')

train['id']

0       00008ff9
1       000fd460
2       00105258
3       00115b9f
4            NaN
          ...   
2731    ad8189bd
2732    ad8945e3
2733    ad9f821d
2734         NaN
2735    adaf7461
Name: id, Length: 2736, dtype: object

In [21]:
train[:5]


Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,...,Enc_71,Enc_72,Enc_73,Enc_74,Enc_75,Enc_76,Enc_77,Enc_78,Enc_79,Enc_80
0,5.0,0.0,51.0,16.877316,46.0,50.8,22.0,62.5,80.25,107.5,...,,,,,,,,,,
1,9.0,0.0,57.5,14.03559,48.0,46.0,22.0,75.0,70.0,122.0,...,,,,,,,,,,
2,10.0,1.0,71.0,16.648696,56.5,75.6,26.5,65.0,94.0,117.0,...,,,,,,,,,,
3,9.0,0.0,71.0,18.292347,56.0,81.6,26.5,60.0,97.0,117.0,...,0.0,3.905175,4.612041,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,13.0,1.0,50.0,22.279952,59.5,112.2,27.0,60.0,73.0,102.0,...,,,,,,,,,,


In [22]:
test[:5]


Unnamed: 0,id,Basic_Demos-Age,Basic_Demos-Sex,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,...,Enc_71,Enc_72,Enc_73,Enc_74,Enc_75,Enc_76,Enc_77,Enc_78,Enc_79,Enc_80
0,00008ff9,5,0,51.0,16.877316,46.0,50.8,,,,...,,,,,,,,,,
1,000fd460,9,0,,14.03559,48.0,46.0,22.0,75.0,70.0,...,,,,,,,,,,
2,00105258,10,1,71.0,16.648696,56.5,75.6,,65.0,94.0,...,,,,,,,,,,
3,00115b9f,9,0,71.0,18.292347,56.0,81.6,,60.0,97.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.065893,0.0
4,0016bb22,18,1,,,,,,,,...,,,,,,,,,,


## Training

In [23]:
# funciton that trains any regressor model using kfold cross validation, k hard coded = 5
def TrainML(model_class, test_data) -> list[int]:
    global train
    

    
    #train = train.drop(columns=['id'])
    test_data = test_data.drop(columns=['id'])
    train = train[test_data.columns]

    

    
    X = train # .drop(['sii'], axis=1)
    y = train_labels

    ##
    # imputer = SimpleImputer(strategy='mean')
    # X = pd.DataFrame(imputer.fit_transform(train), columns=train.columns, index=train.index)

    # # Ensure test_data also remains a DataFrame
    # test_data = pd.DataFrame(imputer.transform(test_data), columns=test_data.columns, index=test_data.index)
    #####
    print(test_data.head())
    n_splits=5
    random_state=42
    
    ################    
    scaler = StandardScaler()
    
    scaler.fit(X)


    X = pd.DataFrame(scaler.transform(X), columns=X.columns)
   
    #ids are stored in test_ids variable
    test_data = pd.DataFrame(scaler.transform(test_data), columns=test_data.columns)
    #############
    SKF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    
    train_S = []
    test_S = []
    
    oof_non_rounded = np.zeros(len(y), dtype=float) 
    oof_rounded = np.zeros(len(y), dtype=int) 
    test_preds = np.zeros((len(test_data), n_splits))

    for fold, (train_idx, test_idx) in enumerate(tqdm(SKF.split(X, y), desc="Training Folds", total=n_splits)):
        X_train, X_val = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[test_idx]
        model = clone(model_class)
        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)

        oof_non_rounded[test_idx] = y_val_pred
        y_val_pred_rounded = y_val_pred.round(0).astype(int)
        oof_rounded[test_idx] = y_val_pred_rounded

        train_kappa = quadratic_weighted_kappa(y_train, y_train_pred.round(0).astype(int))
        val_kappa = quadratic_weighted_kappa(y_val, y_val_pred_rounded)

        train_S.append(train_kappa)
        test_S.append(val_kappa)
        
        test_preds[:, fold] = model.predict(test_data)
        
        print(f"Fold {fold+1} - Train QWK: {train_kappa:.4f}, Validation QWK: {val_kappa:.4f}")
        
    print(f"Mean Train QWK --> {np.mean(train_S):.4f}")
    print(f"Mean Validation QWK ---> {np.mean(test_S):.4f}")

    KappaOPtimizer = minimize(evaluate_predictions,
                              x0=[0.5, 1.5, 2.5], args=(y, oof_non_rounded), 
                              method='Nelder-Mead')
    assert KappaOPtimizer.success, "Optimization did not converge."
    
    oof_tuned = threshold_Rounder(oof_non_rounded, KappaOPtimizer.x)
    tKappa = quadratic_weighted_kappa(y, oof_tuned)

    print(f"----> || Optimized QWK SCORE :: {Fore.CYAN}{Style.BRIGHT} {tKappa:.3f}{Style.RESET_ALL}")

    tpm = test_preds.mean(axis=1)
    tp_rounded = threshold_Rounder(tpm, KappaOPtimizer.x)
    return tp_rounded.tolist()

In [24]:
LGBM_params = {
    'n_estimators': 300,
    'learning_rate': 0.046,
    'max_depth': 12,
    'num_leaves': 478,
    'min_data_in_leaf': 13,
    'feature_fraction': 0.893,
    'bagging_fraction': 0.784,
    'bagging_freq': 4,
    'lambda_l1': 10,  
    'lambda_l2': 0.01
}

In [25]:
XGB_Params = {
    'learning_rate': 0.05,
    'max_depth': 6,
    'n_estimators': 200,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'reg_alpha': 1,  
    'reg_lambda': 5,  
    'random_state': 42,
    'tree_method': 'exact'
}

In [26]:
CatBoost_Params = {
    'learning_rate': 0.05,
    'depth': 6,
    'iterations': 200,
    'random_seed': 42,
    'verbose': 0,
    'l2_leaf_reg': 10  
}

In [27]:
Light = LGBMRegressor(**LGBM_params, random_state=42, verbose=-1)
XGB_Model = XGBRegressor(**XGB_Params)
CatBoost_Model = CatBoostRegressor(**CatBoost_Params)

voting_model = VotingRegressor(estimators=[
    ('lightgbm', Light),
    ('xgboost', XGB_Model),
    ('catboost', CatBoost_Model),
],
     weights=[0.6, 0.6, 0.5] # 0.3 0.5 0.2
)

## XGboost model

In [28]:
xgb_preds = TrainML(model_class=XGB_Model, test_data=test)
sub = pd.DataFrame({
    
    'id'   : test_ids,
    
    'sii': xgb_preds
})
xgb_preds
sub


   Basic_Demos-Age  Basic_Demos-Sex  CGAS-CGAS_Score  Physical-BMI  \
0                5                0             51.0     16.877316   
1                9                0              NaN     14.035590   
2               10                1             71.0     16.648696   
3                9                0             71.0     18.292347   
4               18                1              NaN           NaN   

   Physical-Height  Physical-Weight  Physical-Waist_Circumference  \
0             46.0             50.8                           NaN   
1             48.0             46.0                          22.0   
2             56.5             75.6                           NaN   
3             56.0             81.6                           NaN   
4              NaN              NaN                           NaN   

   Physical-Diastolic_BP  Physical-HeartRate  Physical-Systolic_BP  ...  \
0                    NaN                 NaN                   NaN  ...   
1             

Training Folds:  20%|██        | 1/5 [00:02<00:08,  2.07s/it]

Fold 1 - Train QWK: 0.9261, Validation QWK: 0.3520


Training Folds:  40%|████      | 2/5 [00:04<00:06,  2.07s/it]

Fold 2 - Train QWK: 0.9232, Validation QWK: 0.4307


Training Folds:  60%|██████    | 3/5 [00:06<00:04,  2.13s/it]

Fold 3 - Train QWK: 0.9328, Validation QWK: 0.3703


Training Folds:  80%|████████  | 4/5 [00:08<00:02,  2.10s/it]

Fold 4 - Train QWK: 0.9286, Validation QWK: 0.3503


Training Folds: 100%|██████████| 5/5 [00:10<00:00,  2.09s/it]

Fold 5 - Train QWK: 0.9279, Validation QWK: 0.3575
Mean Train QWK --> 0.9277
Mean Validation QWK ---> 0.3722





----> || Optimized QWK SCORE :: [36m[1m 0.449[0m


Unnamed: 0,id,sii
0,00008ff9,1
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,0
5,001f3379,1
6,0038ba98,0
7,0068a485,0
8,0069fbed,0
9,0083e397,0


## Light Gradient Boosting Machine model

In [29]:
lgbm_preds = TrainML(model_class=Light, test_data=test)
sub = pd.DataFrame({
    
    'id'   : test_ids,
    
    'sii': lgbm_preds
})

sub

   Basic_Demos-Age  Basic_Demos-Sex  CGAS-CGAS_Score  Physical-BMI  \
0                5                0             51.0     16.877316   
1                9                0              NaN     14.035590   
2               10                1             71.0     16.648696   
3                9                0             71.0     18.292347   
4               18                1              NaN           NaN   

   Physical-Height  Physical-Weight  Physical-Waist_Circumference  \
0             46.0             50.8                           NaN   
1             48.0             46.0                          22.0   
2             56.5             75.6                           NaN   
3             56.0             81.6                           NaN   
4              NaN              NaN                           NaN   

   Physical-Diastolic_BP  Physical-HeartRate  Physical-Systolic_BP  ...  \
0                    NaN                 NaN                   NaN  ...   
1             

Training Folds:  20%|██        | 1/5 [00:01<00:04,  1.19s/it]

Fold 1 - Train QWK: 0.8134, Validation QWK: 0.3713


Training Folds:  40%|████      | 2/5 [00:02<00:03,  1.12s/it]

Fold 2 - Train QWK: 0.8035, Validation QWK: 0.4365


Training Folds:  60%|██████    | 3/5 [00:03<00:02,  1.10s/it]

Fold 3 - Train QWK: 0.8022, Validation QWK: 0.3913


Training Folds:  80%|████████  | 4/5 [00:04<00:01,  1.08s/it]

Fold 4 - Train QWK: 0.8088, Validation QWK: 0.3665


Training Folds: 100%|██████████| 5/5 [00:05<00:00,  1.09s/it]

Fold 5 - Train QWK: 0.8081, Validation QWK: 0.3309
Mean Train QWK --> 0.8072
Mean Validation QWK ---> 0.3793





----> || Optimized QWK SCORE :: [36m[1m 0.456[0m


Unnamed: 0,id,sii
0,00008ff9,0
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,1
5,001f3379,0
6,0038ba98,0
7,0068a485,0
8,0069fbed,2
9,0083e397,1


## Catboost model

In [30]:
cat_preds = TrainML(model_class=CatBoost_Model, test_data=test)
sub = pd.DataFrame({
    
    'id'   : test_ids,
    
    'sii': cat_preds
})

sub

   Basic_Demos-Age  Basic_Demos-Sex  CGAS-CGAS_Score  Physical-BMI  \
0                5                0             51.0     16.877316   
1                9                0              NaN     14.035590   
2               10                1             71.0     16.648696   
3                9                0             71.0     18.292347   
4               18                1              NaN           NaN   

   Physical-Height  Physical-Weight  Physical-Waist_Circumference  \
0             46.0             50.8                           NaN   
1             48.0             46.0                          22.0   
2             56.5             75.6                           NaN   
3             56.0             81.6                           NaN   
4              NaN              NaN                           NaN   

   Physical-Diastolic_BP  Physical-HeartRate  Physical-Systolic_BP  ...  \
0                    NaN                 NaN                   NaN  ...   
1             

Training Folds:  20%|██        | 1/5 [00:02<00:09,  2.36s/it]

Fold 1 - Train QWK: 0.5377, Validation QWK: 0.3441


Training Folds:  40%|████      | 2/5 [00:04<00:06,  2.30s/it]

Fold 2 - Train QWK: 0.5346, Validation QWK: 0.4283


Training Folds:  60%|██████    | 3/5 [00:06<00:04,  2.31s/it]

Fold 3 - Train QWK: 0.5327, Validation QWK: 0.3555


Training Folds:  80%|████████  | 4/5 [00:09<00:02,  2.33s/it]

Fold 4 - Train QWK: 0.5466, Validation QWK: 0.3464


Training Folds: 100%|██████████| 5/5 [00:12<00:00,  2.41s/it]

Fold 5 - Train QWK: 0.5397, Validation QWK: 0.3670
Mean Train QWK --> 0.5383
Mean Validation QWK ---> 0.3683





----> || Optimized QWK SCORE :: [36m[1m 0.465[0m


Unnamed: 0,id,sii
0,00008ff9,0
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,0
5,001f3379,0
6,0038ba98,1
7,0068a485,0
8,0069fbed,0
9,0083e397,0


## Voting regressor model - ensembler

In [31]:
vote_preds = TrainML(model_class=voting_model, test_data=test)
final_sub = pd.DataFrame({
    
    'id'   : test_ids,
    'sii': vote_preds
})


final_sub.to_csv('submission.csv', index=False)
final_sub

   Basic_Demos-Age  Basic_Demos-Sex  CGAS-CGAS_Score  Physical-BMI  \
0                5                0             51.0     16.877316   
1                9                0              NaN     14.035590   
2               10                1             71.0     16.648696   
3                9                0             71.0     18.292347   
4               18                1              NaN           NaN   

   Physical-Height  Physical-Weight  Physical-Waist_Circumference  \
0             46.0             50.8                           NaN   
1             48.0             46.0                          22.0   
2             56.5             75.6                           NaN   
3             56.0             81.6                           NaN   
4              NaN              NaN                           NaN   

   Physical-Diastolic_BP  Physical-HeartRate  Physical-Systolic_BP  ...  \
0                    NaN                 NaN                   NaN  ...   
1             

Training Folds:  20%|██        | 1/5 [00:05<00:23,  5.75s/it]

Fold 1 - Train QWK: 0.8098, Validation QWK: 0.3484


Training Folds:  40%|████      | 2/5 [00:11<00:17,  5.71s/it]

Fold 2 - Train QWK: 0.8112, Validation QWK: 0.4589


Training Folds:  60%|██████    | 3/5 [00:17<00:11,  5.78s/it]

Fold 3 - Train QWK: 0.8087, Validation QWK: 0.3784


Training Folds:  80%|████████  | 4/5 [00:22<00:05,  5.72s/it]

Fold 4 - Train QWK: 0.8088, Validation QWK: 0.3682


Training Folds: 100%|██████████| 5/5 [00:28<00:00,  5.72s/it]

Fold 5 - Train QWK: 0.8104, Validation QWK: 0.3663
Mean Train QWK --> 0.8098
Mean Validation QWK ---> 0.3840





----> || Optimized QWK SCORE :: [36m[1m 0.460[0m


Unnamed: 0,id,sii
0,00008ff9,0
1,000fd460,0
2,00105258,0
3,00115b9f,0
4,0016bb22,0
5,001f3379,1
6,0038ba98,0
7,0068a485,0
8,0069fbed,1
9,0083e397,0
