Continuing previous DNN model optimisation. Now incorporating learnings from working on XGBoost model, e.g., that the QWK score on sii can be the best loss function.

In [None]:
import numpy as np
import pandas as pd

import os

import polars as pl
from glob import glob
from tqdm.auto import tqdm

import tensorflow as tf
from tensorflow import keras
import random

np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

from sklearn.impute import KNNImputer

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor

from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.callbacks import LearningRateScheduler

from sklearn.svm import SVR

import lightgbm as lgb

from sklearn.model_selection import KFold

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

import matplotlib.pyplot as plt

from sklearn.metrics import cohen_kappa_score

In [None]:
train_data=pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test_data = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')

In [None]:
INPUT_DIR = "/kaggle/input/child-mind-institute-problematic-internet-use/"

# Import aggregate fields from parquet files
# Modified code from rsakata: https://www.kaggle.com/code/rsakata/cmi-piu-16th-place-solution

files_train = glob(INPUT_DIR + "series_train.parquet/*")
#if IS_SUBMIT:
#    files += glob(INPUT_DIR + "series_test.parquet/*")

list_df_train = []
for file in tqdm(files_train):
    df_series = (
        pl.read_parquet(file)
        .with_columns(
            (
                (pl.col("relative_date_PCIAT") - pl.col("relative_date_PCIAT").min())*24
                + (pl.col("time_of_day") // int(1e9)) / 3600
            ).floor().cast(int).alias("total_hours")
        )
        .filter(pl.col("non-wear_flag") != 1)
        .filter(pl.col("step").count().over("total_hours") == 12 * 60)
        .group_by("total_hours").agg(
            pl.col("enmo").std().alias("enmo_std"),
            pl.col("anglez").std().alias("anglez_std"),
            pl.col("light").std().alias("light_std")
        )
        .with_columns(
            (pl.col("total_hours") % 24).alias("hour"),
            pl.lit(file.split("/")[-1][3:]).alias("id")
        )
    )
    list_df_train.append(df_series.to_pandas())

df_series = pd.concat(list_df_train)
df_series["enmo_std"] = np.log(df_series["enmo_std"] + 0.01)
df_series["anglez_std"] = np.log(df_series["anglez_std"] + 1)
df_series["light_std"] = np.log(df_series["light_std"] + 0.01)

df_agg_train = df_series.groupby("id")[["enmo_std", "anglez_std", "light_std"]].agg(["mean", "std"]).reset_index()
df_agg_train.columns = [cols[0] + "_" + cols[1] if cols[1] != "" else cols[0] for cols in df_agg_train.columns]
df_agg_train

In [None]:
train_data2 = train_data.merge(df_agg_train, how="left", on="id")
train_data2.head()

In [None]:
files_test = glob(INPUT_DIR + "series_test.parquet/*")


list_df_test = []
for file in tqdm(files_test):
    df_series = (
        pl.read_parquet(file)
        .with_columns(
            (
                (pl.col("relative_date_PCIAT") - pl.col("relative_date_PCIAT").min())*24
                + (pl.col("time_of_day") // int(1e9)) / 3600
            ).floor().cast(int).alias("total_hours")
        )
        .filter(pl.col("non-wear_flag") != 1)
        .filter(pl.col("step").count().over("total_hours") == 12 * 60)
        .group_by("total_hours").agg(
            pl.col("enmo").std().alias("enmo_std"),
            pl.col("anglez").std().alias("anglez_std"),
            pl.col("light").std().alias("light_std")
        )
        .with_columns(
            (pl.col("total_hours") % 24).alias("hour"),
            pl.lit(file.split("/")[-1][3:]).alias("id")
        )
    )
    list_df_test.append(df_series.to_pandas())

df_series = pd.concat(list_df_test)
df_series["enmo_std"] = np.log(df_series["enmo_std"] + 0.01)
df_series["anglez_std"] = np.log(df_series["anglez_std"] + 1)
df_series["light_std"] = np.log(df_series["light_std"] + 0.01)

df_agg_test = df_series.groupby("id")[["enmo_std", "anglez_std", "light_std"]].agg(["mean", "std"]).reset_index()
df_agg_test.columns = [cols[0] + "_" + cols[1] if cols[1] != "" else cols[0] for cols in df_agg_test.columns]
df_agg_test

In [None]:
test_data2 = test_data.merge(df_agg_test, how="left", on="id")
test_data2.head()

In [None]:
X_train = train_data2[['Basic_Demos-Age',
                      'Basic_Demos-Sex',
                      'CGAS-CGAS_Score',
                      'Physical-BMI',
                      'BIA-BIA_BMI',
                      'Physical-Waist_Circumference',
                      'Physical-Diastolic_BP',
                      'Physical-HeartRate',
                      'Physical-Systolic_BP',
                      'Fitness_Endurance-Max_Stage',
                      'Fitness_Endurance-Time_Mins',
                      'Fitness_Endurance-Time_Sec',
                      'FGC-FGC_CU_Zone',
                      'FGC-FGC_GSND_Zone',
                      'FGC-FGC_GSD_Zone',
                      'FGC-FGC_PU_Zone',
                      'FGC-FGC_SRL_Zone',
                      'FGC-FGC_SRR_Zone',
                      'FGC-FGC_TL_Zone',
                      'BIA-BIA_Activity_Level_num',
                      'BIA-BIA_BMC',
                      'BIA-BIA_BMR',
                      'BIA-BIA_DEE',
                      'BIA-BIA_ECW',
                      'BIA-BIA_FFM',
                      'BIA-BIA_FFMI',
                      'BIA-BIA_FMI',
                      'BIA-BIA_Fat',
                      'BIA-BIA_ICW',
                      'BIA-BIA_LDM',
                      'BIA-BIA_LST',
                      'BIA-BIA_SMM',
                      'BIA-BIA_TBW',
                      'PAQ_A-PAQ_A_Total',
                      'PAQ_C-PAQ_C_Total',
                      'SDS-SDS_Total_T',
                      'PreInt_EduHx-computerinternet_hoursday'
                       ,
                      'enmo_std_mean',
                      'enmo_std_std',
                      'anglez_std_mean',
                      'anglez_std_std',
                      'light_std_mean',
                      'light_std_std'
                      ]]

y_train = train_data2['PCIAT-PCIAT_Total']

X_test = test_data2[['Basic_Demos-Age',
                      'Basic_Demos-Sex',
                      'CGAS-CGAS_Score',
                      'Physical-BMI',
                      'BIA-BIA_BMI',
                      'Physical-Waist_Circumference',
                      'Physical-Diastolic_BP',
                      'Physical-HeartRate',
                      'Physical-Systolic_BP',
                      'Fitness_Endurance-Max_Stage',
                      'Fitness_Endurance-Time_Mins',
                      'Fitness_Endurance-Time_Sec',
                      'FGC-FGC_CU_Zone',
                      'FGC-FGC_GSND_Zone',
                      'FGC-FGC_GSD_Zone',
                      'FGC-FGC_PU_Zone',
                      'FGC-FGC_SRL_Zone',
                      'FGC-FGC_SRR_Zone',
                      'FGC-FGC_TL_Zone',
                      'BIA-BIA_Activity_Level_num',
                      'BIA-BIA_BMC',
                      'BIA-BIA_BMR',
                      'BIA-BIA_DEE',
                      'BIA-BIA_ECW',
                      'BIA-BIA_FFM',
                      'BIA-BIA_FFMI',
                      'BIA-BIA_FMI',
                      'BIA-BIA_Fat',
                      'BIA-BIA_ICW',
                      'BIA-BIA_LDM',
                      'BIA-BIA_LST',
                      'BIA-BIA_SMM',
                      'BIA-BIA_TBW',
                      'PAQ_A-PAQ_A_Total',
                      'PAQ_C-PAQ_C_Total',
                      'SDS-SDS_Total_T',
                      'PreInt_EduHx-computerinternet_hoursday'
                      ,
                      'enmo_std_mean',
                      'enmo_std_std',
                      'anglez_std_mean',
                      'anglez_std_std',
                      'light_std_mean',
                      'light_std_std'
                   ]]

In [None]:
# Add this only if we are not interested in the actigraph data

X_train = X_train.drop(columns=['enmo_std_mean',
                      'enmo_std_std',
                      'anglez_std_mean',
                      'anglez_std_std',
                      'light_std_mean',
                      'light_std_std'])

X_test = X_test.drop(columns=['enmo_std_mean',
                      'enmo_std_std',
                      'anglez_std_mean',
                      'anglez_std_std',
                      'light_std_mean',
                      'light_std_std'])

In [None]:
# Add calculated fields
X_train['Physical-BMI_Calc'] = X_train.apply(lambda row: row['Physical-BMI'] if row['Physical-BMI']==row['Physical-BMI'] else row['BIA-BIA_BMI'],axis=1)
X_train['Fitness_Endurance-Time_Sec_Calc'] = X_train.apply(lambda row: row['Fitness_Endurance-Time_Sec'] + (row['Fitness_Endurance-Time_Mins']*60), axis=1)
X_train['PAQ_Total'] = X_train.apply(lambda row: row['PAQ_A-PAQ_A_Total'] if row['PAQ_A-PAQ_A_Total']==row['PAQ_A-PAQ_A_Total'] else row['PAQ_C-PAQ_C_Total'],axis=1)


# Drop fields no longer needed
X_train = X_train.drop(columns=['PAQ_A-PAQ_A_Total','PAQ_C-PAQ_C_Total',
                     'Physical-BMI','BIA-BIA_BMI',
                     'Fitness_Endurance-Time_Mins','Fitness_Endurance-Time_Sec'])

# Remove outliers - may give warnings due to NaN value comparison
X_train.loc[X_train['CGAS-CGAS_Score']>=100.0,'CGAS-CGAS_Score'] = np.nan
X_train.loc[X_train['Physical-Systolic_BP']>=180.0,'Physical-Systolic_BP'] = np.nan
X_train.loc[X_train['Physical-Diastolic_BP']>=120.0,'Physical-Diastolic_BP'] = np.nan
X_train.loc[X_train['BIA-BIA_DEE']>=6000.0,'BIA-BIA_DEE'] = np.nan
X_train.loc[(X_train['BIA-BIA_BMC']<=0.0) | (X_train['BIA-BIA_BMC']>=16.0),'BIA-BIA_BMC'] = np.nan
X_train.loc[(X_train['BIA-BIA_BMR']<=0.0) | (X_train['BIA-BIA_BMR']>=2400.0),'BIA-BIA_BMR'] = np.nan
X_train.loc[(X_train['BIA-BIA_ECW']<=0.0) | (X_train['BIA-BIA_ECW']>=60.0),'BIA-BIA_ECW'] = np.nan
X_train.loc[(X_train['BIA-BIA_FFM']<=0.0) | (X_train['BIA-BIA_FFM']>=200.0),'BIA-BIA_FFM'] = np.nan
X_train.loc[(X_train['BIA-BIA_FFMI']<=0.0) | (X_train['BIA-BIA_FFMI']>=25.0),'BIA-BIA_FFMI'] = np.nan
X_train.loc[(X_train['BIA-BIA_FMI']<=0.0) | (X_train['BIA-BIA_FMI']>=25.0),'BIA-BIA_FMI'] = np.nan
X_train.loc[(X_train['BIA-BIA_Fat']<=8.0) | (X_train['BIA-BIA_Fat']>=60.0),'BIA-BIA_Fat'] = np.nan
X_train.loc[(X_train['BIA-BIA_ICW']<=0.0) | (X_train['BIA-BIA_ICW']>=80.0),'BIA-BIA_ICW'] = np.nan
X_train.loc[(X_train['BIA-BIA_LDM']<=0.0) | (X_train['BIA-BIA_LDM']>=60.0),'BIA-BIA_LDM'] = np.nan
X_train.loc[(X_train['BIA-BIA_LST']<=0.0) | (X_train['BIA-BIA_LST']>=150.0),'BIA-BIA_LST'] = np.nan
X_train.loc[(X_train['BIA-BIA_SMM']<=0.0) | (X_train['BIA-BIA_SMM']>=100.0),'BIA-BIA_SMM'] = np.nan
X_train.loc[(X_train['BIA-BIA_TBW']<=0.0) | (X_train['BIA-BIA_TBW']>=150.0),'BIA-BIA_TBW'] = np.nan

In [None]:
features_missing_labelled = X_train.loc[y_train.notna()].isnull().sum(axis=1)/X_train.shape[1]
weights_labelled = 1 - features_missing_labelled
weights_labelled.shape

features_missing_labelled2 = X_train.loc[y_train.notna()].isnull().sum(axis=1)
weights_labelled2 = 1 * ((0.95)**features_missing_labelled2)

weights_labelled3 = np.exp((-2)*features_missing_labelled)

In [None]:
#MICE
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

iter_imputer = IterativeImputer(max_iter=10, random_state=42)
X_train_fimpute = pd.DataFrame(iter_imputer.fit_transform(X_train), columns = X_train.columns)
X_train_fimpute.describe()

In [None]:
# Clip imputed values to original max and min
for column in X_train_fimpute.columns:
    max_val = np.max(X_train[column])
    min_val = np.min(X_train[column])
    X_train_fimpute.loc[X_train_fimpute[column]>max_val,column] = max_val
    X_train_fimpute.loc[X_train_fimpute[column]<min_val, column] = min_val

X_train_fimpute.describe()

In [None]:
scaler = StandardScaler()                  

X_train_fimpute[X_train_fimpute.columns] = scaler.fit_transform(X_train_fimpute[X_train_fimpute.columns])
X_train_fimpute.describe()

In [None]:
# Repeat the above for X_test

# Add calculated fields
X_test['Physical-BMI_Calc'] = X_test.apply(lambda row: row['Physical-BMI'] if row['Physical-BMI']==row['Physical-BMI'] else row['BIA-BIA_BMI'],axis=1)
X_test['Fitness_Endurance-Time_Sec_Calc'] = X_test.apply(lambda row: row['Fitness_Endurance-Time_Sec'] + (row['Fitness_Endurance-Time_Mins']*60), axis=1)
X_test['PAQ_Total'] = X_test.apply(lambda row: row['PAQ_A-PAQ_A_Total'] if row['PAQ_A-PAQ_A_Total']==row['PAQ_A-PAQ_A_Total'] else row['PAQ_C-PAQ_C_Total'],axis=1)

# Drop fields no longer needed
X_test = X_test.drop(columns=['PAQ_A-PAQ_A_Total','PAQ_C-PAQ_C_Total',
                     'Physical-BMI','BIA-BIA_BMI',
                     'Fitness_Endurance-Time_Mins','Fitness_Endurance-Time_Sec'])

# Remove outliers
X_test.loc[X_test['CGAS-CGAS_Score']>=100.0,'CGAS-CGAS_Score'] = np.nan
X_test.loc[X_test['Physical-Systolic_BP']>=180.0,'Physical-Systolic_BP'] = np.nan
X_test.loc[X_test['Physical-Diastolic_BP']>=120.0,'Physical-Diastolic_BP'] = np.nan
X_test.loc[X_test['BIA-BIA_DEE']>=6000.0,'BIA-BIA_DEE'] = np.nan
X_test.loc[(X_test['BIA-BIA_BMC']<=0.0) | (X_test['BIA-BIA_BMC']>=16.0),'BIA-BIA_BMC'] = np.nan
X_test.loc[(X_test['BIA-BIA_BMR']<=0.0) | (X_test['BIA-BIA_BMR']>=2400.0),'BIA-BIA_BMR'] = np.nan
X_test.loc[(X_test['BIA-BIA_ECW']<=0.0) | (X_test['BIA-BIA_ECW']>=60.0),'BIA-BIA_ECW'] = np.nan
X_test.loc[(X_test['BIA-BIA_FFM']<=0.0) | (X_test['BIA-BIA_FFM']>=200.0),'BIA-BIA_FFM'] = np.nan
X_test.loc[(X_test['BIA-BIA_FFMI']<=0.0) | (X_test['BIA-BIA_FFMI']>=25.0),'BIA-BIA_FFMI'] = np.nan
X_test.loc[(X_test['BIA-BIA_FMI']<=0.0) | (X_test['BIA-BIA_FMI']>=25.0),'BIA-BIA_FMI'] = np.nan
X_test.loc[(X_test['BIA-BIA_Fat']<=8.0) | (X_test['BIA-BIA_Fat']>=60.0),'BIA-BIA_Fat'] = np.nan
X_test.loc[(X_test['BIA-BIA_ICW']<=0.0) | (X_test['BIA-BIA_ICW']>=80.0),'BIA-BIA_ICW'] = np.nan
X_test.loc[(X_test['BIA-BIA_LDM']<=0.0) | (X_test['BIA-BIA_LDM']>=60.0),'BIA-BIA_LDM'] = np.nan
X_test.loc[(X_test['BIA-BIA_LST']<=0.0) | (X_test['BIA-BIA_LST']>=150.0),'BIA-BIA_LST'] = np.nan
X_test.loc[(X_test['BIA-BIA_SMM']<=0.0) | (X_test['BIA-BIA_SMM']>=100.0),'BIA-BIA_SMM'] = np.nan
X_test.loc[(X_test['BIA-BIA_TBW']<=0.0) | (X_test['BIA-BIA_TBW']>=150.0),'BIA-BIA_TBW'] = np.nan

In [None]:
# Imputation
X_test_fimpute = pd.DataFrame(iter_imputer.transform(X_test), columns = X_test.columns)

# Clipping
for column in X_test_fimpute.columns:
    max_val = np.max(X_train[column])
    min_val = np.min(X_train[column])
    X_test_fimpute.loc[X_test_fimpute[column]>max_val,column] = max_val
    X_test_fimpute.loc[X_test_fimpute[column]<min_val, column] = min_val

# Scaling
X_test_fimpute[X_test_fimpute.columns] = scaler.transform(X_test_fimpute[X_test_fimpute.columns])

X_test_fimpute.describe()

# Use labelled data only

In [None]:
X_train_labelled = X_train_fimpute.loc[y_train.notna()]
y_train_labelled = y_train[y_train.notna()]
print("Size of labelled train data set is: ", (X_train_labelled.shape, y_train_labelled.shape))

In [None]:
# Since we are not using the unlabelled data in this notebook, we can drop the indices
# This will help us prevent any accidents when doing cross-validation when we split by index, don't have to think about loc vs iloc etc.

X_train_labelled = X_train_labelled.reset_index(drop=True)
y_train_labelled = y_train_labelled.reset_index(drop=True)

In [None]:
weights_labelled = weights_labelled.reset_index(drop=True)
weights_labelled2 = weights_labelled2.reset_index(drop=True)
weights_labelled3 = weights_labelled3.reset_index(drop=True)

In [None]:
X_train_labelled.shape, y_train_labelled.shape, weights_labelled.shape, weights_labelled2.shape, weights_labelled3.shape

# Set up useful functions and data
* Augmented data
* QWK metric
* QWK loss function
* Weights

In [None]:
# X datasets - X_train_labelled, X_train_labelled_aug1a, X_train_labelled_aug1b, X_train_labelled_aug2a, X_train_labelled_aug2b
# aug1/aug2 - augment once or twice
# a/b - 0.1 noise multiplier, 0.15 noise multiplier

# Augmented data

# Get standard deviations of each column in X and y
std_X = np.std(X_train_labelled, axis=0)
std_y = np.std(y_train_labelled, axis=0)

# Create augmented datasets

X_noise_multiplier=0.1
y_noise_multiplier=0.1

# Deliberately not doing this in a for loop because we will not augment more than twice
# For ease of understanding the datasets being created
# And we may choose to add different noisiness to each augmentation
X_train_labelled_noisy = X_train_labelled + (X_noise_multiplier * np.random.normal(0, std_X, X_train_labelled.shape))
y_train_labelled_noisy = y_train_labelled + (y_noise_multiplier * np.random.normal(0, std_y, y_train_labelled.shape))

X_train_labelled_aug1a = pd.concat([X_train_labelled,X_train_labelled_noisy], ignore_index=True)
y_train_labelled_aug1a = pd.concat([y_train_labelled,y_train_labelled_noisy], ignore_index=True)
print(X_train_labelled_aug1a.shape, y_train_labelled_aug1a.shape)

#repeat
X_train_labelled_noisy = X_train_labelled + (X_noise_multiplier * np.random.normal(0, std_X, X_train_labelled.shape))
y_train_labelled_noisy = y_train_labelled + (y_noise_multiplier * np.random.normal(0, std_y, y_train_labelled.shape))

X_train_labelled_aug2a = pd.concat([X_train_labelled_aug1a,X_train_labelled_noisy], ignore_index=True)
y_train_labelled_aug2a = pd.concat([y_train_labelled_aug1a,y_train_labelled_noisy], ignore_index=True)
print(X_train_labelled_aug2a.shape, y_train_labelled_aug2a.shape)


# Increased noise multiplier
X_noise_multiplier=0.15
y_noise_multiplier=0.15

X_train_labelled_noisy = X_train_labelled + (X_noise_multiplier * np.random.normal(0, std_X, X_train_labelled.shape))
y_train_labelled_noisy = y_train_labelled + (y_noise_multiplier * np.random.normal(0, std_y, y_train_labelled.shape))

X_train_labelled_aug1b = pd.concat([X_train_labelled,X_train_labelled_noisy], ignore_index=True)
y_train_labelled_aug1b = pd.concat([y_train_labelled,y_train_labelled_noisy], ignore_index=True)
print(X_train_labelled_aug1b.shape, y_train_labelled_aug1b.shape)

#repeat
X_train_labelled_noisy = X_train_labelled + (X_noise_multiplier * np.random.normal(0, std_X, X_train_labelled.shape))
y_train_labelled_noisy = y_train_labelled + (y_noise_multiplier * np.random.normal(0, std_y, y_train_labelled.shape))

X_train_labelled_aug2b = pd.concat([X_train_labelled_aug1b,X_train_labelled_noisy], ignore_index=True)
y_train_labelled_aug2b = pd.concat([y_train_labelled_aug1b,y_train_labelled_noisy], ignore_index=True)
print(X_train_labelled_aug2b.shape, y_train_labelled_aug2b.shape)



X_noise_multiplier=0.2
y_noise_multiplier=0.2

X_train_labelled_noisy = X_train_labelled + (X_noise_multiplier * np.random.normal(0, std_X, X_train_labelled.shape))
y_train_labelled_noisy = y_train_labelled + (y_noise_multiplier * np.random.normal(0, std_y, y_train_labelled.shape))

X_train_labelled_aug1c = pd.concat([X_train_labelled,X_train_labelled_noisy], ignore_index=True)
y_train_labelled_aug1c = pd.concat([y_train_labelled,y_train_labelled_noisy], ignore_index=True)
print(X_train_labelled_aug1c.shape, y_train_labelled_aug1c.shape)

#repeat
X_train_labelled_noisy = X_train_labelled + (X_noise_multiplier * np.random.normal(0, std_X, X_train_labelled.shape))
y_train_labelled_noisy = y_train_labelled + (y_noise_multiplier * np.random.normal(0, std_y, y_train_labelled.shape))

X_train_labelled_aug2c = pd.concat([X_train_labelled_aug1c,X_train_labelled_noisy], ignore_index=True)
y_train_labelled_aug2c = pd.concat([y_train_labelled_aug1c,y_train_labelled_noisy], ignore_index=True)
print(X_train_labelled_aug2c.shape, y_train_labelled_aug2c.shape)


In [None]:
# y data: y_train_labelled, y_train_labelled_bin, y_train_labelled_sii
# For simplicity, just augment without added noise for bin and sii

y_train_labelled_sii = y_train_labelled.copy()
y_train_labelled_sii.name='sii'
y_train_labelled_sii = y_train_labelled_sii.apply(lambda row: 0 if row<=30 else 
                             (1 if row<50 else (
                                2 if row<80 else (3)
                            )))

y_train_labelled_sii_aug1 = pd.concat([y_train_labelled_sii, y_train_labelled_sii], ignore_index=True)
y_train_labelled_sii_aug2 = pd.concat([y_train_labelled_sii_aug1, y_train_labelled_sii], ignore_index=True)

y_train_labelled_bin = y_train_labelled.copy()
y_train_labelled_bin.name='PCIAT_bin'
y_train_labelled_bin = y_train_labelled_bin.apply(lambda row: 0 if row<=15 else 
                                                  (1 if row<=30 else 
                                                   (2 if row<=40 else 
                                                    (3 if row<50 else 
                                                     (4 if row<=65 else 
                                                      (5 if row<80 else 
                                                       (6 if row<=90 else 
                                                        (7)
                                                       )
                                                      )
                                                     )
                                                    )
                                                   )
                                                  )
                                                 )

y_train_labelled_bin_aug1 = pd.concat([y_train_labelled_bin, y_train_labelled_bin], ignore_index=True)
y_train_labelled_bin_aug2 = pd.concat([y_train_labelled_bin_aug1, y_train_labelled_bin], ignore_index=True)

In [None]:
y_train_labelled_sii.head()

In [None]:
# Weights. We already have defined weights_labelled, weights_labelled2, weights_labelled3
# Just need to augment

weights_labelled_aug1 = pd.concat([weights_labelled, weights_labelled], ignore_index=True)
weights_labelled_aug2 = pd.concat([weights_labelled_aug1, weights_labelled], ignore_index=True)
print(weights_labelled_aug1.shape, weights_labelled_aug2.shape)

weights_labelled2_aug1 = pd.concat([weights_labelled2, weights_labelled2], ignore_index=True)
weights_labelled2_aug2 = pd.concat([weights_labelled2_aug1, weights_labelled2], ignore_index=True)
print(weights_labelled2_aug1.shape, weights_labelled2_aug2.shape)

weights_labelled3_aug1 = pd.concat([weights_labelled3, weights_labelled3], ignore_index=True)
weights_labelled3_aug2 = pd.concat([weights_labelled3_aug1, weights_labelled3], ignore_index=True)
print(weights_labelled3_aug1.shape, weights_labelled3_aug2.shape)

In [None]:
# Custom qwk metric and loss function for pciat_sii
#Modified from https://medium.com/@nlztrk/quadratic-weighted-kappa-qwk-metric-and-how-to-optimize-it-062cc9121baa
y = np.array([0,1,2,1,1,2,3,3,2,3,2,1,2,1,0,1,0])

c = 1.47
d = 0.95

g = np.zeros(4)
for i in range(4):
    g[i] = ((y - i)**2).mean()
    #g[i] = i

print(g)
h = [(x-c)**2 + d for x in [0,1,2,3]]
print(h)

plt.plot([0,1,2,3], g, marker=".", label="actual")
plt.plot([0,1,2,3], [(x-c)**2 + d for x in [0,1,2,3]], label="fitting")
plt.legend()
plt.show()

In [None]:
# Weight matrix for QWK calculation
W = np.zeros((4, 4))
for i in range(len(W)):
    for j in range(len(W)):
        W[i][j] = float(((i-j)**2)/((4)-1)**2)

print(W)

In [None]:
# Given a confusion matrix, compute the expected matrix from the outer product of the row and column frequencies
def compute_expected_matrix(conf_matrix):
    row_sums = tf.reduce_sum(conf_matrix, axis=1, keepdims=True)
    col_sums = tf.reduce_sum(conf_matrix, axis=0, keepdims=True)
    
    # Normalize the row and column sums by the total sum of the confusion matrix
    total_sum = tf.reduce_sum(conf_matrix)
    
    # Compute the expected matrix by multiplying the row and column marginals and normalizing
    expected_matrix = (row_sums @ col_sums) / total_sum
    
    return expected_matrix

In [None]:
'''
t1 = tf.constant([0,1,2,3])
t2 = tf.constant([0,1,2,3])
#t1 = tf.cast(t1,tf.float32) + c
#t2 = tf.cast(t2,tf.float32) + c
#t1 = t1 + c
#t2 = t2 + c

t2 = tf.clip_by_value(t2, 0, 3)
    
# Round predictions to nearest integer (discrete values)
t1 = tf.round(t1)
t2 = tf.round(t2)

print(t1,t2)


confusion_matrix = tf.math.confusion_matrix(tf.cast(t1, tf.int32), tf.cast(t2, tf.int32), num_classes=4)
#weight_matrix = tf.cast(
#    tf.abs(tf.subtract(tf.reshape(tf.range(4), (-1, 1)), tf.reshape(tf.range(4), (1, -1)))), tf.float32
#)
weight_matrix = tf.cast(tf.constant(W),tf.float32)
#weight_matrix = tf.constant(W)/(tf.reduce_sum(tf.constant(W)))
#weighted_kappa = tf.reduce_sum(weight_matrix * tf.cast(confusion_matrix, tf.float32)) / tf.cast(tf.reduce_sum(confusion_matrix),tf.float32)
#weighted_kappa
expected_matrix = compute_expected_matrix(confusion_matrix)
confusion_matrix = confusion_matrix / tf.reduce_sum(confusion_matrix)

print(confusion_matrix)
print(weight_matrix)
#print(weighted_kappa)
print(expected_matrix)
print(tf.reduce_sum(weight_matrix))

print("Confusion Matrix shape:", confusion_matrix.shape)
print("Weight Matrix shape:", weight_matrix.shape)
print("Expected Matrix shape:", expected_matrix.shape)

weighted_kappa = 1 - (tf.reduce_sum((weight_matrix * tf.cast(confusion_matrix,tf.float32)))) / (tf.reduce_sum((weight_matrix * tf.cast(expected_matrix,tf.float32)))) 

#weighted_kappa = tf.reduce_sum(weight_matrix * tf.cast(confusion_matrix, tf.float32))
print(weighted_kappa)
'''

In [None]:
# Metric for tracking model as it is trained
# Writing own QWK score function. Gradients not necessary as this is not the loss function
def qwk_sii(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred, 0, 3)
    y_true = tf.squeeze(y_true)
    y_pred = tf.squeeze(y_pred)

    y_true = tf.round(y_true)
    y_pred = tf.round(y_pred)

    confusion_matrix = tf.math.confusion_matrix(tf.cast(y_true, tf.int32), tf.cast(y_pred, tf.int32), num_classes=4)    
    expected_matrix = compute_expected_matrix(confusion_matrix)
    #print(confusion_matrix)
    confusion_matrix = confusion_matrix / tf.reduce_sum(confusion_matrix)
    expected_matrix = expected_matrix / tf.reduce_sum(expected_matrix)
    weight_matrix = tf.cast(tf.constant(W),tf.float32) # W defined for 4 classes
    numerator = tf.reduce_sum(weight_matrix * tf.cast(confusion_matrix, tf.float32))
    denominator = tf.reduce_sum(weight_matrix * tf.cast(expected_matrix, tf.float32))

    weighted_kappa = 1 - (numerator / denominator)

    #print(confusion_matrix)
    #print(expected_matrix)
    #print(weight_matrix)

    #E = np.outer(W, confusion)

    return weighted_kappa
    

In [None]:
@tf.custom_gradient
def custom_confusion_matrix_loss(y_true, y_pred):
#def custom_confusion_matrix_loss(y_true, y_pred, sample_weight=None):
    # Clip predictions to the range [0, 3] to calculate confusion matrix
    # Not overwriting y_true and y_pred so they can be passed as is to the gradient func
    y_pred = tf.clip_by_value(y_pred, 0, 3)
    y_true2 = tf.squeeze(y_true)  # Flatten to 1D, e.g., expecting (64,) instead of (64,1)
    y_pred2 = tf.squeeze(y_pred)  # Flatten to 1D
    
    # Round the true and predicted values: 0, 1, 2, 3
    y_true2 = tf.round(y_true2)
    y_pred2 = tf.round(y_pred2)

    # Compute the confusion matrix O
    confusion_matrix = tf.math.confusion_matrix(tf.cast(y_true2, tf.int32), tf.cast(y_pred2, tf.int32), num_classes=4)

    # Compute the expected matrix E
    expected_matrix = compute_expected_matrix(confusion_matrix)

    # Normalize confusion matrix and expected matrix
    confusion_matrix = confusion_matrix / tf.reduce_sum(confusion_matrix)
    expected_matrix = expected_matrix / tf.reduce_sum(expected_matrix)

    # Multiply by weight matrix
    numerator = tf.reduce_sum(W * tf.cast(confusion_matrix, tf.float32))
    denominator = tf.reduce_sum(W * tf.cast(expected_matrix, tf.float32))

    weighted_kappa = 1 - numerator / denominator
    
    # Forward pass (return the loss)
    def grad(dy):
        # Approximate gradient function using c and d determined previously
        # Referring to: https://medium.com/@nlztrk/quadratic-weighted-kappa-qwk-metric-and-how-to-optimize-it-062cc9121baa
        labels = y_true + c
        preds = y_pred + c
        #print(labels.shape, preds.shape)
        preds = tf.clip_by_value(preds, 0, 3)
        #print(labels.shape, preds.shape)
        f = 1 / 2 * tf.reduce_sum((preds - labels) ** 2)
        #print("f: ", f)
        g = 1 / 2 * tf.reduce_sum((preds - c) ** 2 + d)
        #print("g: ", g)

        df = preds - labels
        #print("df: ", df)
        dg = preds - c
        #print("dg: ", dg)
        grad = (df / g - f * dg / g ** 2) * tf.cast(tf.size(labels), tf.float32)
        #print("grad: ", grad)
        #grad = (df/g - f*dg/g**2)*len()

        #if sample_weight is not None:
        #    sample_weight = tf.convert_to_tensor(sample_weight)
        #    grad = grad * sample_weight  # Apply sample weight to the gradient

        #if sample_weight is None: # This comparison just didn't work. Maybe have to always pass weights explicitly, even if default
        #    sample_weight = tf.ones_like(grad) # Default sample weights

        #grad = grad * sample_weight

        grad_loss = dy * (grad) # Multiply by dy for backpropagation

        # Experienced problems with exploding gradients, so clipping
        # Can clip all gradients to -1,1, or scale them by a consistent factor
        #grad_loss = tf.clip_by_value(grad_loss, -1, 1)
        clipping_factor = tf.reduce_max(tf.abs(grad_loss)) # Max gradient change
        #print("clipping factor: ", clipping_factor)

        #if clipping_factor>1:
        #    grad_loss = grad_loss / clipping_factor

        grad_loss = grad_loss / clipping_factor

        #return grad, grad
        #print("grad loss: ", grad_loss)
        #print("shapes: ", (y_true.shape, y_pred.shape), (grad_loss.shape, grad_loss.shape))
        return grad_loss, grad_loss  # Gradients for y_true, y_pred

    return (1 - weighted_kappa), grad # (1 - weighted_kappa) to treat as a loss function
    #return weighted_kappa, grad

In [None]:
def quadratic_weighted_kappa_sii(y_true, y_pred):
    return custom_confusion_matrix_loss(y_true, y_pred)
#def quadratic_weighted_kappa_sii(y_true, y_pred, sample_weight=None):
    #return custom_confusion_matrix_loss(y_true, y_pred, sample_weight)

class QWKLoss(tf.keras.losses.Loss):
    def __init__(self, name="QWKLoss"):
        super().__init__(name=name)
    
    def call(self, y_true, y_pred):
        return quadratic_weighted_kappa_sii(y_true, y_pred)
        #return soft_quadratic_weighted_kappa(y_true, y_pred)
    #def call(self, y_true, y_pred, sample_weight=None):
        #return quadratic_weighted_kappa_sii(y_true, y_pred, sample_weight)
        

In [None]:
#t1 = tf.constant([1,3,0,2,3,1,1,0,2])
#t2 = tf.constant([3,0,3,0,0,3,3,3,0])
t1 = tf.constant([0,1,2,3])
t2 = tf.constant([3,3,0,0])

print(quadratic_weighted_kappa_sii(t1,t2))

In [None]:
# PCA

pca = PCA(n_components=25)

X_train_labelled_pca = pd.DataFrame(pca.fit_transform(X_train_labelled))
X_test_pca = pd.DataFrame(pca.transform(X_test_fimpute))
X_train_labelled_pca_aug1b = pd.DataFrame(pca.transform(X_train_labelled_aug1b))
X_train_labelled_pca_aug2b = pd.DataFrame(pca.transform(X_train_labelled_aug2b))

print(X_train_labelled_pca.shape, X_test_pca.shape, X_train_labelled_pca_aug1b.shape, X_train_labelled_pca_aug2b.shape)

# Models

In [None]:
k = 3  # Number of folds for cross-validation
kf = KFold(n_splits=k, shuffle=True, random_state=42)  # Set shuffle=True to randomize data splits

In [None]:
def create_model_v7():
    model = keras.models.Sequential([
        keras.layers.Dense(20, input_shape=(X_train_labelled.shape[1],), activation="selu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(20, activation="selu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(1, activation="linear")
    ])

    #model.compile(optimizer=Adam(learning_rate=0.005), loss='mean_squared_error', metrics=['mae','mse',quadratic_weighted_kappa_sii],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss=QWKLoss(), metrics=['mae','mse'],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss=soft_quadratic_weighted_kappa, metrics=['mae','mse'],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss='mean_squared_error', metrics=['mae','mse',qwk_sii]) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    model.compile(optimizer=Adam(learning_rate=0.0001), loss=QWKLoss(), metrics=['mae','mse',qwk_sii],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    return model

In [None]:
model = create_model_v7()
model.summary()

In [None]:
'''
# Prepare to store results
validation_losses = []
validation_qwk = []
models = []

# Loop over each fold
for train_index, val_index in kf.split(X_train_labelled):
    # Split the data into training and validation sets for the current fold
    X_train_t, X_train_v = X_train_labelled.loc[train_index], X_train_labelled.loc[val_index]
    y_train_t, y_train_v = y_train_labelled_sii.loc[train_index], y_train_labelled_sii.loc[val_index]
    weights_labelled_t, weights_labelled_v = weights_labelled3.loc[train_index], weights_labelled3.loc[val_index]

    # Build a new model for each fold
    model = create_model_v7()
    
    # Define early stopping to avoid overfitting
    #early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    early_stopping = EarlyStopping(monitor='val_qwk_sii', mode='max', patience=20, restore_best_weights=True)
    
    # Train the model on the training set and evaluate on the validation set
    history = model.fit(
        X_train_t, y_train_t,
        validation_data=(X_train_v, y_train_v),
        epochs=200,
        batch_size=64,
        callbacks=[early_stopping],
        sample_weight=weights_labelled_t,
        verbose=2
    )
    
    # Evaluate the model on the validation set
    val_loss, val_mae, val_mse, val_qwk = model.evaluate(X_train_v, y_train_v, verbose=2)
    validation_losses.append(val_loss)
    validation_qwk.append(val_qwk)
    models.append(model)

# Calculate the average validation loss across all folds
avg_val_loss = np.mean(validation_losses)
avg_val_qwk = np.mean(validation_qwk)
print(f"Average Validation Loss: {avg_val_loss}, Average Validation QWK: {avg_val_qwk}")
'''

In [None]:
def create_model_v8():
    model = keras.models.Sequential([
        keras.layers.Dense(20, input_shape=(X_train_labelled.shape[1],), activation="selu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(20, activation="selu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(1, activation="linear")
    ])   

    #model.compile(optimizer=Adam(learning_rate=0.005), loss='mean_squared_error', metrics=['mae','mse',quadratic_weighted_kappa_sii],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss=QWKLoss(), metrics=['mae','mse'],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss=soft_quadratic_weighted_kappa, metrics=['mae','mse'],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss='mean_squared_error', metrics=['mae','mse',qwk_sii]) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    model.compile(optimizer=Adam(learning_rate=0.0001), loss=QWKLoss(), metrics=['mae','mse',qwk_sii],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    return model

In [None]:
model = create_model_v8()
model.summary()

In [None]:
'''
# Prepare to store results
validation_losses = []
validation_qwk = []
models = []

# Loop over each fold
for train_index, val_index in kf.split(X_train_labelled):
    # Split the data into training and validation sets for the current fold
    X_train_t, X_train_v = X_train_labelled_aug2b.loc[train_index], X_train_labelled_aug2b.loc[val_index]
    y_train_t, y_train_v = y_train_labelled_sii_aug2.loc[train_index], y_train_labelled_sii_aug2.loc[val_index]
    weights_labelled_t, weights_labelled_v = weights_labelled_aug2.loc[train_index], weights_labelled_aug2.loc[val_index]

    # Build a new model for each fold
    model = create_model_v8()
    
    # Define early stopping to avoid overfitting
    #early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    early_stopping = EarlyStopping(monitor='val_qwk_sii', mode='max', patience=10, restore_best_weights=True)
    
    # Train the model on the training set and evaluate on the validation set
    history = model.fit(
        X_train_t, y_train_t,
        validation_data=(X_train_v, y_train_v),
        epochs=100,
        batch_size=64,
        callbacks=[early_stopping],
        #sample_weight=weights_labelled_t,
        verbose=2
    )
    
    # Evaluate the model on the validation set
    val_loss, val_mae, val_mse, val_qwk = model.evaluate(X_train_v, y_train_v, verbose=2)
    validation_losses.append(val_loss)
    validation_qwk.append(val_qwk)
    models.append(model)

# Calculate the average validation loss across all folds
avg_val_loss = np.mean(validation_losses)
avg_val_qwk = np.mean(validation_qwk)
print(f"Average Validation Loss: {avg_val_loss}, Average Validation QWK: {avg_val_qwk}")
'''

In [None]:
def create_model_v9():
    model = keras.models.Sequential([
        keras.layers.Dense(24, input_shape=(X_train_labelled_pca.shape[1],), activation="selu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(20, activation="selu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=0.2),
        #keras.layers.Dense(16, activation="selu", kernel_initializer="he_normal"),
        #keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(1, activation="linear")
    ])   

    #model.compile(optimizer=Adam(learning_rate=0.005), loss='mean_squared_error', metrics=['mae','mse',quadratic_weighted_kappa_sii],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss=QWKLoss(), metrics=['mae','mse'],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss=soft_quadratic_weighted_kappa, metrics=['mae','mse'],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss='mean_squared_error', metrics=['mae','mse',qwk_sii]) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    model.compile(optimizer=Adam(learning_rate=0.0001), loss=QWKLoss(), metrics=['mae','mse',qwk_sii],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    return model

In [None]:
model = create_model_v9()
model.summary()

In [None]:
'''
# Prepare to store results
validation_losses = []
validation_qwk = []
models = []

# Loop over each fold
for train_index, val_index in kf.split(X_train_labelled):
    # Split the data into training and validation sets for the current fold
    X_train_t, X_train_v = X_train_labelled_pca_aug1b.loc[train_index], X_train_labelled_pca_aug1b.loc[val_index]
    y_train_t, y_train_v = y_train_labelled_sii_aug1.loc[train_index], y_train_labelled_sii_aug1.loc[val_index]
    #weights_labelled_t, weights_labelled_v = weights_labelled_aug2.loc[train_index], weights_labelled_aug2.loc[val_index]

    # Build a new model for each fold
    model = create_model_v9()
    
    # Define early stopping to avoid overfitting
    #early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    early_stopping = EarlyStopping(monitor='val_qwk_sii', mode='max', patience=10, restore_best_weights=True)
    
    # Train the model on the training set and evaluate on the validation set
    history = model.fit(
        X_train_t, y_train_t,
        validation_data=(X_train_v, y_train_v),
        epochs=100,
        batch_size=64,
        callbacks=[early_stopping],
        #sample_weight=weights_labelled_t,
        verbose=2
    )
    
    # Evaluate the model on the validation set
    val_loss, val_mae, val_mse, val_qwk = model.evaluate(X_train_v, y_train_v, verbose=2)
    validation_losses.append(val_loss)
    validation_qwk.append(val_qwk)
    models.append(model)

# Calculate the average validation loss across all folds
avg_val_loss = np.mean(validation_losses)
avg_val_qwk = np.mean(validation_qwk)
print(f"Average Validation Loss: {avg_val_loss}, Average Validation QWK: {avg_val_qwk}")
'''

In [None]:
# V10: go back to NMSE

def create_model_v10():
    model = keras.models.Sequential([
        keras.layers.Dense(24, input_shape=(X_train_labelled_pca.shape[1],), activation="selu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(20, activation="selu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(10, activation="selu", kernel_initializer="he_normal"),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(1, activation="linear")
    ])   

    #model.compile(optimizer=Adam(learning_rate=0.005), loss='mean_squared_error', metrics=['mae','mse',quadratic_weighted_kappa_sii],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss=QWKLoss(), metrics=['mae','mse'],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.005), loss=soft_quadratic_weighted_kappa, metrics=['mae','mse'],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    model.compile(optimizer=Adam(learning_rate=0.005), loss='mean_squared_error', metrics=['mae','mse']) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    #model.compile(optimizer=Adam(learning_rate=0.0001), loss=QWKLoss(), metrics=['mae','mse',qwk_sii],run_eagerly=True) #run_eagerly=True is necessary in order for the tf.numpy() function in qwk to work
    return model

In [None]:
model = create_model_v10()
model.summary()

In [None]:

# Prepare to store results
validation_losses = []
#validation_qwk = []
models = []

# Loop over each fold
for train_index, val_index in kf.split(X_train_labelled_pca_aug2b):
    # Split the data into training and validation sets for the current fold
    X_train_t, X_train_v = X_train_labelled_pca_aug2b.loc[train_index], X_train_labelled_pca_aug2b.loc[val_index]
    y_train_t, y_train_v = y_train_labelled_aug2b.loc[train_index], y_train_labelled_aug2b.loc[val_index]
    #weights_labelled_t, weights_labelled_v = weights_labelled3_aug2.loc[train_index], weights_labelled3_aug2.loc[val_index]

    # Build a new model for each fold
    model = create_model_v10()
    
    # Define early stopping to avoid overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    #early_stopping = EarlyStopping(monitor='val_qwk_sii', mode='max', patience=10, restore_best_weights=True)
    
    # Train the model on the training set and evaluate on the validation set
    history = model.fit(
        X_train_t, y_train_t,
        validation_data=(X_train_v, y_train_v),
        epochs=300,
        batch_size=64,
        callbacks=[early_stopping],
        #sample_weight=weights_labelled_t,
        verbose=2
    )
    
    # Evaluate the model on the validation set
    val_loss, val_mae, val_mse = model.evaluate(X_train_v, y_train_v, verbose=2)
    validation_losses.append(val_loss)
    #validation_qwk.append(val_qwk)
    models.append(model)

# Calculate the average validation loss across all folds
avg_val_loss = np.mean(validation_losses)
#avg_val_qwk = np.mean(validation_qwk)
print(f"Average Validation Loss: {avg_val_loss}")


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

#model1.evaluate(X_train_v, y_train_v)
val_loss, val_mae, val_mse = model.evaluate(X_train_v, y_train_v)
print(f"Validation loss: {val_loss:.4f}, Validation MAE: {val_mae:.4f}")

In [None]:
#plt.plot(history.history['qwk_sii'])
#plt.plot(history.history['val_qwk_sii'])
#plt.title('Model QWK')
#plt.xlabel('Epoch')
#plt.ylabel('QWK')
#plt.legend()
#plt.show()

#model1.evaluate(X_train_v, y_train_v)
#val_loss, val_mae, val_mse, val_qwk = model.evaluate(X_train_v, y_train_v)
#print(f"Validation qwk: {val_qwk:.4f}")

In [None]:
#tf.experimental.numpy.experimental_enable_numpy_behavior()

# Final prediction with model

In [None]:
y_test = test_data[['id']]

In [None]:
#y_test['PCIAT-PCIAT_Total_dnn'] = np.round(np.clip(np.mean([model.predict(X_test_fimpute) for model in models], axis=0),0,3))
#y_test['PCIAT-PCIAT_Total_dnn'] = np.round(np.clip(np.mean([model.predict(X_test_pca) for model in models], axis=0),0,3))
y_test['PCIAT-PCIAT_Total_dnn'] = np.mean([model.predict(X_test_pca) for model in models], axis=0)

#y_test['sii'] = y_test['PCIAT-PCIAT_Total_dnn'].astype('int64')
y_test['sii'] = y_test.apply(lambda row: 0 if row['PCIAT-PCIAT_Total_dnn']<=30 else 
                             (1 if row['PCIAT-PCIAT_Total_dnn']<50 else (
                                2 if row['PCIAT-PCIAT_Total_dnn']<80 else (3)
                            )), axis=1)

y_test.head()



In [None]:
solution = y_test[['id','sii']]
solution.to_csv("submission.csv", index=False)