In [21]:
import xgboost as xgb
from sklearn.metrics import f1_score, recall_score, roc_auc_score, roc_curve, auc, accuracy_score
from custom_functions import processing
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
import statistics

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [22]:
# import data
file_path = 'P:\DATA_OCT_22\Expert_Eye\Dataset\Data\data_v10.xlsx'
dataset = pd.read_excel(file_path)
dataset = dataset.drop(['Foldername'], axis=1)
dataset.head()

Unnamed: 0,Gender,VINCQ32DDN,VINICODEX003,FROPCOM0001,FROPCOM0005,FROPCOM0006_S1_,FROPCOM0006_S2_,FROPCOM0006_S3_,FROPCOM0006_S4_,FROPCOM0006_S5_,...,HADS_D_Score,grip,walk_time_4m,Item_1,Item_2,Item_3,Item_4,Item_5,Fried_Score,Fried_State
0,0,76.0,0.0,0.0,3.0,,,,,1.0,...,9,,8.45,0.0,0,1.0,1.0,,2,0
1,0,75.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,...,9,21.5,6.11,0.0,0,1.0,1.0,1.0,3,1
2,0,67.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,...,10,23.2,20.0,0.0,0,0.0,1.0,1.0,2,0
3,0,72.0,1.0,0.0,1.0,,,,,,...,15,17.7,4.87,0.0,1,0.0,0.0,1.0,2,0
4,1,69.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10,40.0,3.48,0.0,0,0.0,0.0,1.0,1,0


In [23]:
# feature names
feature_names = dataset.drop(['Fried_State','Fried_Score','Frailty_State','Frailty_Score', 
                              'Item_1', 'Item_2', 'Item_3', 'Item_4', 'Item_5', 
                              'Weight_Diff', 'HADS_D_Score', 'walk_time_4m', 'EXAMCLIN02', 'grip'], axis=1).columns.to_list()

# Features and Targets
X = dataset.drop(['Fried_State','Fried_Score','Frailty_State','Frailty_Score',
                  'Item_1','Item_2','Item_3','Item_4','Item_5',
                  'Weight_Diff', 'HADS_D_Score','walk_time_4m','EXAMCLIN02','grip'], axis=1).values

# The two tasks are Fried_State and Frailty_State
y_fried = dataset['Fried_State'].values
y_frailty = dataset['Frailty_State'].values 

In [24]:
# Combine the labels of both tasks into a single array
y = np.column_stack((y_fried, y_frailty))

In [25]:
# Custom Objective Functions for XGBoost `Multi-Task` Feature Learning (MTFL)
# Same X for both tasks
# Task 1: Fried_State -> y1
# Task 2: Frailty_State -> y2

# XGBoost MTFL
def xgb_mtfl_objective(y_true, y_pred):
    #y_true = y_true.get_label()
    print(type(y_pred))
    y_pred = y_pred.reshape(len(y_true), 2)
    grad = np.zeros((len(y_true), 2))
    hess = np.zeros((len(y_true), 2))
    grad[:,0] = - y_true * np.exp(-y_true * y_pred[:,0]) / (1.0 + np.exp(-y_true * y_pred[:,0]))
    grad[:,1] = - y_true * np.exp(-y_true * y_pred[:,1]) / (1.0 + np.exp(-y_true * y_pred[:,1]))
    hess[:,0] = np.exp(-y_true * y_pred[:,0]) / (1.0 + np.exp(-y_true * y_pred[:,0]))**2
    hess[:,1] = np.exp(-y_true * y_pred[:,1]) / (1.0 + np.exp(-y_true * y_pred[:,1]))**2
    return grad.flatten('F'), hess.flatten('F')


In [None]:
# Flatten the y_true and y_pred arrays
y_flatten = y.flatten()

# Create a DMatrix
dtrain = xgb.DMatrix(X, label=y_flatten)

# Set the parameters for XGBoost
param = {'max_depth': 2, 'eta': 1}

# Train the model
num_round = 10
bst = xgb.train(param, dtrain, num_boost_round=num_round, obj=xgb_mtfl_objective)

# Create a DMatrix for test data
dtest = xgb.DMatrix(X)

# Predict the labels
y_pred = bst.predict(dtest)

# Print the accuracy
print('Accuracy: ', accuracy_score(y, y_pred.round()))

# Print the roc_auc_score
print('ROC_AUC: ', roc_auc_score(y, y_pred))

# Print the recall_score
print('Recall: ', recall_score(y, y_pred.round(), average='weighted'))
