In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn import model_selection
import tensorflow.keras.backend as K
import warnings
warnings.filterwarnings("ignore")
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/amp-parkinsons-disease-progression-prediction/train_proteins.csv
/kaggle/input/amp-parkinsons-disease-progression-prediction/train_clinical_data.csv
/kaggle/input/amp-parkinsons-disease-progression-prediction/public_timeseries_testing_util.py
/kaggle/input/amp-parkinsons-disease-progression-prediction/supplemental_clinical_data.csv
/kaggle/input/amp-parkinsons-disease-progression-prediction/train_peptides.csv
/kaggle/input/amp-parkinsons-disease-progression-prediction/amp_pd_peptide/competition.cpython-37m-x86_64-linux-gnu.so
/kaggle/input/amp-parkinsons-disease-progression-prediction/amp_pd_peptide/__init__.py
/kaggle/input/amp-parkinsons-disease-progression-prediction/amp_pd_peptide_310/competition.cpython-310-x86_64-linux-gnu.so
/kaggle/input/amp-parkinsons-disease-progression-prediction/amp_pd_peptide_310/__init__.py
/kaggle/input/amp-parkinsons-disease-progression-prediction/example_test_files/sample_submission.csv
/kaggle/input/amp-parkinsons-disease-progression-pre

In [2]:
def average_dict_val(values_dict):
    
    total = 0
    num_values = len(values_dict)
    
    for value in values_dict.values():
        total += value

    average = total / num_values
    print("Average value: {:.2f}".format(average))

# 1. Data Analysis

In [3]:
df_train_cli = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/train_clinical_data.csv")
df_train_pep = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/train_peptides.csv")
df_train_pro = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/train_proteins.csv")

## 1.1 missing value

In [4]:
print(f'Unique Clinical Data patient #: {df_train_cli["patient_id"].nunique()}')
print("----------------------------------------------------------")
print(f'Null Values Found in Clinical Data:')
for col in df_train_cli.columns:
    print(f'Null values found in {col}: {df_train_cli[col].isna().sum()}')
print('')

Unique Clinical Data patient #: 248
----------------------------------------------------------
Null Values Found in Clinical Data:
Null values found in visit_id: 0
Null values found in patient_id: 0
Null values found in visit_month: 0
Null values found in updrs_1: 1
Null values found in updrs_2: 2
Null values found in updrs_3: 25
Null values found in updrs_4: 1038
Null values found in upd23b_clinical_state_on_medication: 1327



In [5]:
df_train_cli.updrs_3 = df_train_cli.updrs_3.interpolate(method='linear', axis=0)

## 1.2 Target values preparation

In [6]:
patients = {}
for e in range(1,5):
    for m in [0,6,12,24]:
        df_train_cli[f'updrs_{e}_plus_{m}_months'] = 0

for patient in df_train_cli.patient_id.unique():
    temp = df_train_cli[df_train_cli.patient_id == patient]
    month_list = []
    month_windows = [0,6,12,24]
    for month in temp.visit_month.values:
        month_list.append([month, month + 6, month + 12, month + 24])
    for month in range(len(month_list)):
        for x in range(1,5):
            if x == 3:
                arr = temp[temp.visit_month.isin(month_list[month])][f'updrs_{x}'].to_list()
            else:
                arr = temp[temp.visit_month.isin(month_list[month])][f'updrs_{x}'].fillna(0).to_list()
            if len(arr) == 4:
                for e, i in enumerate(arr):
                    m = month_list[month][0]
                    temp.loc[temp.visit_month == m,[f'updrs_{x}_plus_{month_windows[e]}_months']] = i
            else:
                temp = temp[~temp.visit_month.isin(month_list[month])]
    patients[patient] = temp

In [7]:
# formatted_clin = pd.concat(patients.values(), ignore_index=True).set_index('visit_id').iloc[:,7:]
formatted_clin = pd.concat(patients.values(), ignore_index=True).set_index('visit_id').iloc[:,6:]
formatted_clin['upd23b_clinical_state_on_medication'] = formatted_clin['upd23b_clinical_state_on_medication'].fillna('unknown')
formatted_clin.head()

Unnamed: 0_level_0,upd23b_clinical_state_on_medication,updrs_1_plus_0_months,updrs_1_plus_6_months,updrs_1_plus_12_months,updrs_1_plus_24_months,updrs_2_plus_0_months,updrs_2_plus_6_months,updrs_2_plus_12_months,updrs_2_plus_24_months,updrs_3_plus_0_months,updrs_3_plus_6_months,updrs_3_plus_12_months,updrs_3_plus_24_months,updrs_4_plus_0_months,updrs_4_plus_6_months,updrs_4_plus_12_months,updrs_4_plus_24_months
visit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
55_0,unknown,10,8,10,16,6,10,10,9,15.0,34.0,41.0,49.0,0,0,0,0
55_6,unknown,8,10,7,14,10,10,13,13,34.0,41.0,38.0,49.0,0,0,0,0
55_12,On,10,7,16,17,10,13,9,18,41.0,38.0,49.0,51.0,0,0,0,0
55_18,On,7,16,14,12,13,9,13,20,38.0,49.0,49.0,41.0,0,0,0,0
55_24,On,16,14,17,17,9,13,18,16,49.0,49.0,51.0,52.0,0,0,0,0


# 2. Feature Preperation
## 2.1 pro preperation

There will be some variable defined:
* **protein_list** : all the **unique proteins**

* **df**: data merged

In [8]:
prot_features = df_train_pro.pivot(index='visit_id', columns='UniProt', values='NPX').fillna(0)

df = prot_features.merge(formatted_clin, left_index=True,right_index=True,how='right')
print(f'\nNA values: {df[prot_features.columns].isna().sum().sum()/(len(df)*len(prot_features.columns)):.2%}')
df['visit_month'] = df.reset_index().visit_id.str.split('_').apply(lambda x: int(x[1])).values

visit_month_list = df.reset_index().visit_id.str.split('_').apply(lambda x: int(x[1])).unique().tolist()
protein_list = prot_features.columns.to_list()
print(df.dropna().shape)


NA values: 49.69%
(480, 245)


## 2.2 peptides preperation

In [9]:
# create a new variables
df_train_pep['UniProt_Peptide'] = df_train_pep['UniProt'] + '_' + df_train_pep['Peptide']
pep_features = df_train_pep.pivot(index='visit_id', columns='UniProt_Peptide', values='PeptideAbundance').fillna(0)


df = pep_features.merge(df, left_index=True,right_index=True,how='right')
print(f'\nNA values: {df[pep_features.columns].isna().sum().sum()/(len(df)*len(pep_features.columns)):.2%}')
df['visit_month'] = df.reset_index().visit_id.str.split('_').apply(lambda x: int(x[1])).values

visit_month_list = df.reset_index().visit_id.str.split('_').apply(lambda x: int(x[1])).unique().tolist()
peptide_list = pep_features.columns.to_list()


NA values: 49.69%


In [10]:
df.dropna(inplace = True)
df

Unnamed: 0_level_0,O00391_NEQEQPLGQWHLS,O00533_GNPEPTFSWTK,O00533_IEIPSSVQQVPTIIK,O00533_KPQSAVYSTGSNGILLC(UniMod_4)EAEGEPQPTIK,O00533_SMEQNGPGLEYR,O00533_TLKIENVSYQDKGNYR,O00533_VIAVNEVGR,O00533_VMTPAVYAPYDVK,O00533_VNGSPVDNHPFAGDVVFPR,O00584_ELDLNSVLLK,...,updrs_2_plus_24_months,updrs_3_plus_0_months,updrs_3_plus_6_months,updrs_3_plus_12_months,updrs_3_plus_24_months,updrs_4_plus_0_months,updrs_4_plus_6_months,updrs_4_plus_12_months,updrs_4_plus_24_months,visit_month
visit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
55_0,11254.3,102060.0,174185.0,27278.9,30838.70,23216.5,170878.0,148771.0,55202.1,27229.3,...,9,15.0,34.0,41.0,49.0,0,0,0,0,0
55_6,13163.6,90643.1,147434.0,24320.6,25532.90,21884.6,152910.0,118982.0,48758.2,23305.4,...,13,34.0,41.0,38.0,49.0,0,0,0,0,6
55_12,15257.6,114433.0,194848.0,34090.4,30140.50,29528.6,184855.0,172592.0,54596.9,28367.0,...,18,41.0,38.0,49.0,51.0,0,0,0,0,12
55_36,13530.8,120904.0,161969.0,25936.8,37848.40,27058.9,164386.0,166703.0,49026.2,28379.8,...,21,51.0,41.0,52.0,56.0,0,0,0,0,36
942_6,11218.7,44187.4,106295.0,17013.7,15552.10,15239.1,77440.0,94832.4,28957.8,14231.4,...,4,21.0,25.0,18.0,19.0,0,0,0,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64674_12,0.0,35020.4,83801.0,11850.5,9556.72,0.0,28454.5,0.0,20291.1,14039.7,...,18,18.0,29.0,26.0,44.0,0,0,0,1,12
64674_24,0.0,45481.4,94692.8,17076.5,11470.90,12929.6,50800.0,35483.7,25952.7,19829.0,...,17,26.0,48.0,44.0,46.0,0,0,1,1,24
65043_0,13472.4,158777.0,0.0,51184.3,55708.40,53159.7,301939.0,224293.0,82892.2,22158.6,...,8,16.0,19.0,14.0,8.5,0,0,0,0,0
65043_12,14134.9,126575.0,296011.0,44624.9,44012.30,37695.2,186724.0,180328.0,68679.6,28990.8,...,7,14.0,13.0,8.5,5.0,0,0,0,0,12


In [11]:
# 特定のカラム名を指定
column_name = 'upd23b_clinical_state_on_medication'

# カラム名がデータフレームのcolumns属性に存在するか確認
if column_name in df.columns:
    print(f"{column_name} is in the DataFrame.")
else:
    print(f"{column_name} is not in the DataFrame.")

upd23b_clinical_state_on_medication is in the DataFrame.


In [12]:
df.upd23b_clinical_state_on_medication.unique()

array(['unknown', 'On', 'Off'], dtype=object)

# 3 Model Predict
## 3.1 SMAPE Estimation

* 1. 我们况且认为 train中的蛋白质包括某一时刻病人所测得的所有蛋白质，以及多肽，即不存在没有测到的蛋白质；该种情况下，我们以train  直接训练模型，并对test 中的新variable 进行填充后进行predict；
* 2. 我们况且认为 train中的蛋白质没有包括某一时刻病人所测得的所有蛋白质，以及多肽，即存在没有测到的蛋白质。该种情况下，我们需先 test和 train 共有蛋白质，然后在对train进行模型训练，这种方法消耗的算力很大；

In [13]:
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
import datetime
!nvidia-smi

Mon May 15 05:29:53 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [14]:
def smape_loss(y_true, y_pred):
    epsilon = 0.1
    numer = K.abs(y_pred - y_true)
    denom = K.maximum(K.abs(y_true) + K.abs(y_pred) + epsilon, 0.5 + epsilon)
    smape = numer / (denom/2)
    smape = tf.where(tf.math.is_nan(smape), tf.zeros_like(smape), smape)
    return smape


def smape_obj(preds, y_true):
    actuals = y_true
    summ = np.abs(actuals) + np.abs(preds)
    smape = np.where(summ == 0, 0, (np.abs(actuals - preds) / summ)) * 200
    grad = np.where(actuals >= preds, -200 / summ, 200 / summ)
    hess = np.where(summ == 0, 0, 400 / (summ * summ))
    return grad, hess

def calculate_smape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    numer = np.round(np.abs(y_pred-y_true),0)
    denom = np.round(np.abs(y_true) + np.abs(y_pred),0)

    return 1/len(y_true) * np.sum(np.nan_to_num(numer / (denom/2))) *100

In [15]:
# target = formatted_clin.columns.to_list()
target = [tg for tg in formatted_clin.columns if tg != 'upd23b_clinical_state_on_medication']
X = df.drop(target, axis = 1)
y = df[target]

In [16]:
# parametres setting
params_1 = {
    'objective': 'reg:squarederror',
    'n_estimators': 800, 
    'learning_rate': 0.05, 'max_depth': 4,
    'tree_method': 'gpu_hist', 'gpu_id': 0
}

params_2 = {
    'objective': 'reg:squarederror', 
    'n_estimators': 800, 
    'learning_rate': 0.1, 'max_depth': 4,
    'tree_method': 'gpu_hist', 'gpu_id': 0
}

params_3 = {
    'objective': 'reg:squarederror',
    'n_estimators': 900, 
    'learning_rate': 0.05, 'max_depth': 4,
    'tree_method': 'gpu_hist', 'gpu_id': 0
}

params_4 = {
    'objective':'reg:squarederror',
    'n_estimators': 1000, 
    'learning_rate': 0.08, 'max_depth': 4,
    'tree_method': 'gpu_hist', 'gpu_id': 0
}

params_dic = dict(zip(target,
                      [params_1, params_2, params_3, params_4,
                       params_1, params_2, params_3, params_4,
                       params_1, params_2, params_3, params_4,
                       params_1, params_2, params_3, params_4]))

In [17]:
import optuna
import xgboost as xgb
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


def objective(trial, X, y, u):
    params = {
        'objective': 'reg:squarederror',
        'n_estimators': trial.suggest_int("n_estimators", 500, 1000),
        'learning_rate': trial.suggest_float("learning_rate", 0.01, 0.2),
        'max_depth': trial.suggest_int("max_depth", 1, 10),
        'tree_method': 'gpu_hist',
        'gpu_id': 0,
    }

    _, smape, mse, _, _ = model_train_operation(X, y, u, params)
    return mse


def model_train_operation(X, y, u, params=None, feature_number=0):
    selected_X = X.columns.to_list()
    
    if feature_number != 0:
        selector = SelectKBest(score_func=f_regression, k=feature_number)
        X_new = selector.fit_transform(X, y[u])
        selected_indices = selector.get_support(indices=True)
        selected_X = X.columns[selected_indices]
        selected_X = selected_X.tolist()
        
        if 'visit_month' not in selected_X:
            selected_X.append('visit_month')
        
        X = X[selected_X]

    if params is None:
        study = optuna.create_study(direction="minimize")
        study.optimize(lambda trial: objective(trial, X, y, u), n_trials=50)
        params = study.best_params
        params["objective"] = "reg:squarederror"
        params["tree_method"] = "gpu_hist"
        params["gpu_id"] = 0

    model = xgb.XGBRegressor(random_state=4, **params)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.001, random_state=42)

    model.fit(X_train, y_train[u])

    y_pred = model.predict(X_test)
    smape = calculate_smape(y_test[u], y_pred)
    mse = mean_squared_error(y_test[u], y_pred)
    return model, smape, mse, selected_X, params

## 3.2 Data Test transform

In [18]:
df_test_cli = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/example_test_files/test.csv")
df_test_pep = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/example_test_files/test_peptides.csv")
df_test_pro = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/example_test_files/test_proteins.csv")
df_test_sub = pd.read_csv("/kaggle/input/amp-parkinsons-disease-progression-prediction/example_test_files/sample_submission.csv")

In [19]:
def data_test_transform(df_clin_test, df_test_pep, df_test_pro):
    df_test_pep['UniProt_Peptide'] = df_test_pep['UniProt'] +'_' +df_test_pep['Peptide']
    # 1. transform test_pep
    pep_test = df_test_pep.pivot_table(index=['visit_id','visit_month'],
                                       columns='UniProt_Peptide',
                                       values='PeptideAbundance').reset_index()
    pep_test = pd.concat([pep_test[['visit_id', 'visit_month']],
                          pep_test.drop(['visit_id', 'visit_month'], axis=1)], axis=1).fillna(0)
    
    # 2. transform test_pro
    pro_test = df_test_pro.pivot_table(index=['visit_id','visit_month'],
                                         columns='UniProt',values='NPX',fill_value=0).reset_index()
    pro_test = pd.concat([pro_test[['visit_id', 'visit_month']],
                          pro_test.drop(['visit_id', 'visit_month'], axis=1)], axis=1).fillna(0)
    df_test = pd.merge(pro_test, pep_test, on=['visit_id', 'visit_month'])
    
    if 'upd23b_clinical_state_on_medication' in df_clin_test.columns.tolist():
        df_clin_test['upd23b_clinical_state_on_medication'] = df_clin_test['upd23b_clinical_state_on_medication'].fillna('unknown')
        tmp = df_clin_test['visit_id', 'visit_month']
        df_test = pd.merge(df_test, tmp, on=['visit_id', 'visit_month'], how='left')
        
    else:
        df_test['upd23b_clinical_state_on_medication'] = 'unknown'
    
    return df_test

def data_columns_remain(test, train):
    train_copy = train.copy(deep=True)
    common_cols = test.columns.intersection(train_copy.columns)
    test = test[common_cols]
    for col in train_copy.columns:
        if col not in common_cols:
            test[col] = 0
        
    test = test[train_copy.columns]
    return test, train_copy

In [20]:
df_test = data_test_transform(df_test_cli, df_test_pep, df_test_pro)
df_test.set_index('visit_id', inplace=True)
df_test, df_train = data_columns_remain(df_test, X)

In [21]:
# 特定のカラム名を指定
column_name = 'upd23b_clinical_state_on_medication'

# カラム名がデータフレームのcolumns属性に存在するか確認
if column_name in df_test.columns:
    print(f"{column_name} is in the DataFrame.")
else:
    print(f"{column_name} is not in the DataFrame.")

upd23b_clinical_state_on_medication is in the DataFrame.


In [22]:
# updrs_4 的预测
from scipy.optimize import minimize
def smape_plus_1(y_true, y_pred):
    y_true_plus_1 = y_true + 1
    y_pred_plus_1 = y_pred + 1
    metric = np.zeros(len(y_true_plus_1))
    
    numerator = np.abs(y_true_plus_1 - y_pred_plus_1)
    denominator = ((np.abs(y_true_plus_1) + np.abs(y_pred_plus_1)) / 2)
    
    mask_not_zeros = (y_true_plus_1 != 0) | (y_pred_plus_1 != 0)
    metric[mask_not_zeros] = numerator[mask_not_zeros] / denominator[mask_not_zeros]
    
    return 100 * np.nanmean(metric)


In [23]:
train_clinical_all = pd.read_csv('/kaggle/input/amp-parkinsons-disease-progression-prediction/train_clinical_data.csv')
proteins = pd.read_csv('/kaggle/input/amp-parkinsons-disease-progression-prediction/train_proteins.csv')
proteins_features = pd.pivot_table(proteins, values='NPX', index='visit_id', columns='UniProt', aggfunc='sum')

train_clinical_all = train_clinical_all.merge(
    proteins_features,
    left_on='visit_id',
    right_index=True,
    how='left'
)

train_clinical_all[proteins_features.columns] = train_clinical_all.groupby('patient_id')[proteins_features.columns].\
                                                                                        fillna(method='ffill')
train_clinical_all['pred_month'] = train_clinical_all['visit_month']

for plus_month in [6, 12, 24]:
    train_shift = train_clinical_all[['patient_id', 'visit_month', 'pred_month', 'updrs_1', 'updrs_2', 'updrs_3', 'updrs_4']].copy()
    train_shift['visit_month'] -= plus_month
    train_shift.rename(columns={f'updrs_{i}': f'updrs_{i}_plus_{plus_month}' for i in range(1, 5)}, inplace=True)
    train_shift.rename(columns={'pred_month': f'pred_month_plus_{plus_month}'}, inplace=True)
    train_clinical_all = train_clinical_all.merge(train_shift, how='left', on=['patient_id', 'visit_month'])

train_clinical_all.rename(columns={f'updrs_{i}': f'updrs_{i}_plus_0' for i in range(1, 5)}, inplace=True)
train_clinical_all.rename(columns={'pred_month': f'pred_month_plus_0'}, inplace=True)
train_clinical_all

Unnamed: 0,visit_id,patient_id,visit_month,updrs_1_plus_0,updrs_2_plus_0,updrs_3_plus_0,updrs_4_plus_0,upd23b_clinical_state_on_medication,O00391,O00533,...,pred_month_plus_12,updrs_1_plus_12,updrs_2_plus_12,updrs_3_plus_12,updrs_4_plus_12,pred_month_plus_24,updrs_1_plus_24,updrs_2_plus_24,updrs_3_plus_24,updrs_4_plus_24
0,55_0,55,0,10.0,6.0,15.0,,,11254.3,732430.0,...,12.0,10.0,10.0,41.0,0.0,24.0,16.0,9.0,49.0,0.0
1,55_3,55,3,10.0,7.0,25.0,,,11254.3,732430.0,...,,,,,,,,,,
2,55_6,55,6,8.0,10.0,34.0,,,13163.6,630465.0,...,18.0,7.0,13.0,38.0,0.0,30.0,14.0,13.0,49.0,0.0
3,55_9,55,9,8.0,9.0,30.0,0.0,On,13163.6,630465.0,...,,,,,,,,,,
4,55_12,55,12,10.0,10.0,41.0,0.0,On,15257.6,815083.0,...,24.0,16.0,9.0,49.0,0.0,36.0,17.0,18.0,51.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2610,65043_48,65043,48,7.0,6.0,13.0,0.0,Off,10589.6,902434.0,...,60.0,6.0,6.0,16.0,1.0,72.0,3.0,9.0,14.0,1.0
2611,65043_54,65043,54,4.0,8.0,11.0,1.0,Off,10589.6,902434.0,...,,,,,,,,,,
2612,65043_60,65043,60,6.0,6.0,16.0,1.0,Off,10589.6,902434.0,...,72.0,3.0,9.0,14.0,1.0,84.0,7.0,9.0,20.0,3.0
2613,65043_72,65043,72,3.0,9.0,14.0,1.0,Off,10589.6,902434.0,...,84.0,7.0,9.0,20.0,3.0,,,,,


In [24]:
def calculate_month_trend_predicitons(pred_month, trend, target):
    if target == 'updrs_4': 
        pred_month = pred_month.clip(54, None)
    return trend[0] + pred_month * trend[1]


target_to_trend = {
    'updrs_1': [5.394793062665313, 0.027091086167821344],
    'updrs_2': [5.469498130092747, 0.02824188329658148],
    'updrs_3': [21.182145576879183, 0.08897763331790556],
    'updrs_4': [-4.434453480103724, 0.07531448585334258]
}

def calculate_predicitons_protein(protein, pred_month, protein_shift,target,target_to_trend):
    trend_pred_month = target_to_trend[target]
    pred_month_trend = calculate_month_trend_predicitons(pred_month=pred_month, trend=trend_pred_month, target=target)
    return np.round(pred_month_trend + protein_shift)


def function_to_minimize(x, y_true_array, pred_month_array, protein_array, target,target_to_trend):
    metric = smape_plus_1(
        y_true=y_true_array, 
        y_pred=calculate_predicitons_protein(
            protein=protein_array,
            pred_month=pred_month_array,
            protein_shift=x[0],
            target=target,
            target_to_trend=target_to_trend
        )
    )
    return metric

def find_best_const(train_clinical_all_filtered, target, target_to_trend):
    columns_with_target = [f'{target}_plus_{plus_month}' for plus_month in [0, 6, 12, 24]]
    columns_with_pred_month = [f'pred_month_plus_{plus_month}' for plus_month in [0, 6, 12, 24]]
    y_true_array = train_clinical_all_filtered[columns_with_target].values.ravel()
    pred_month_array = train_clinical_all_filtered[columns_with_pred_month].values.ravel()
    protein_array = np.concatenate([train_clinical_all_filtered[feature].values] * 4)
    result = minimize(
        fun=lambda x: function_to_minimize(x, y_true_array, pred_month_array, protein_array, target, target_to_trend),
        x0=[0.0],
        method='Powell'
    ).x[0]
    return result



## Plot shifts

In [25]:
from tqdm.auto import tqdm
import plotly.express as px
from collections import defaultdict

feature = 'O15240'
quantiles = [0, 0.05, 0.95, 1.0]

df_plot = []
for quantile_low, quantile_high in tqdm(zip(quantiles[:-1], quantiles[1:])):
    item = {
        'quantile_low': quantile_low,
        'quantile_high': quantile_high,
        'quantile_middle': (quantile_low + quantile_high) / 2
    }
    quantile_low_value = train_clinical_all[feature].quantile(quantile_low)
    quantile_high_value = train_clinical_all[feature].quantile(quantile_high)
    item['quantile_low_value'] = quantile_low_value
    item['quantile_high_value'] = quantile_high_value
    
    if quantile_high == 1:
        quantile_high_value += 0.00001
        
    train_clinical_all_filtered = train_clinical_all[
        (train_clinical_all[feature] >= quantile_low_value)
        & (train_clinical_all[feature] < quantile_high_value)
    ]
    for target_key in ['updrs_1', 'updrs_2', 'updrs_3', 'updrs_4']:
        item[f'{target_key}_shift'] = find_best_const(train_clinical_all_filtered, target_key, target_to_trend)
    df_plot.append(item)
    
df_plot = pd.DataFrame(df_plot)

0it [00:00, ?it/s]

In [26]:
for target_key in ['updrs_1', 'updrs_2', 'updrs_3', 'updrs_4']:
    fig = px.line(
        df_plot,
        y=f'{target_key}_shift',
        x='quantile_middle',
        title=feature + ' ' + target_key
    )
    fig.show()

## Find shifts

In [27]:
npx_groups = [
    {'quantile_low': 0.0, 'quantile_high': 0.05},
    {'quantile_low': 0.05, 'quantile_high': 0.95},
    {'quantile_low': 0.95, 'quantile_high': 1.0},
]
target_to_npx_groups_shift = defaultdict(list)

for target_key in ['updrs_1', 'updrs_2', 'updrs_3']:
    for npx_group in npx_groups:
        item = npx_group.copy()
        item['feature'] = feature
        
        if item['quantile_low'] == 0:
            item['quantile_low_value'] = -np.inf
        else:
            item['quantile_low_value'] = train_clinical_all[feature].quantile(item['quantile_low'])
            
        if item['quantile_high'] == 1:
            item['quantile_high_value'] = np.inf
        else: 
            item['quantile_high_value'] = train_clinical_all[feature].quantile(item['quantile_high'])

        train_clinical_all_filtered = train_clinical_all[
            (train_clinical_all[feature] >= item['quantile_low_value'])
            & (train_clinical_all[feature] < item['quantile_high_value'])
        ]
        
        item['shift'] = find_best_const(train_clinical_all_filtered, target_key, target_to_trend)
        target_to_npx_groups_shift[target_key].append(item)

In [28]:
models_dict = {}
mse_scores = {}
feature_dict = dict(zip(target, [x for x in range(len(target))]))

# feature_number_dict = dict(zip(target, [600, 700, 700, 650,
#                                         600, 850, 700, 470,
#                                         600, 0, 700, 460,
#                                         100, 150, 100, 150]))

feature_number_dict = dict(zip(target, [600, 531, 700, 650,
                                        600, 850, 700, 470,
                                        1198, 600, 380, 460,
                                        350, 150, 100, 150]))

In [29]:
feature_number_dict

{'updrs_1_plus_0_months': 600,
 'updrs_1_plus_6_months': 531,
 'updrs_1_plus_12_months': 700,
 'updrs_1_plus_24_months': 650,
 'updrs_2_plus_0_months': 600,
 'updrs_2_plus_6_months': 850,
 'updrs_2_plus_12_months': 700,
 'updrs_2_plus_24_months': 470,
 'updrs_3_plus_0_months': 1198,
 'updrs_3_plus_6_months': 600,
 'updrs_3_plus_12_months': 380,
 'updrs_3_plus_24_months': 460,
 'updrs_4_plus_0_months': 350,
 'updrs_4_plus_6_months': 150,
 'updrs_4_plus_12_months': 100,
 'updrs_4_plus_24_months': 150}

# 4 Accuracy evaluation & Parameter tuning

In [30]:
"""
事前にoptunaで探索したパラメータを渡す
"""

optuna_params = {'updrs_1_plus_0_months': {'n_estimators': 712,
  'learning_rate': 0.1825861856111673,
  'max_depth': 4,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_1_plus_6_months': {'n_estimators': 951,
  'learning_rate': 0.12581629993622995,
  'max_depth': 5,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_1_plus_12_months': {'n_estimators': 939,
  'learning_rate': 0.05787928028077631,
  'max_depth': 5,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_1_plus_24_months': {'n_estimators': 837,
  'learning_rate': 0.08854302794494315,
  'max_depth': 4,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_2_plus_0_months': {'n_estimators': 610,
  'learning_rate': 0.0894439390658949,
  'max_depth': 4,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_2_plus_6_months': {'n_estimators': 663,
  'learning_rate': 0.07115935468546157,
  'max_depth': 4,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_2_plus_12_months': {'n_estimators': 570,
  'learning_rate': 0.15974053275158284,
  'max_depth': 6,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_2_plus_24_months': {'n_estimators': 899,
  'learning_rate': 0.1137330871057121,
  'max_depth': 6,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_3_plus_0_months': {'n_estimators': 955,
  'learning_rate': 0.1618812729769501,
  'max_depth': 3,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_3_plus_6_months': {'n_estimators': 711,
  'learning_rate': 0.07387160457380607,
  'max_depth': 3,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_3_plus_12_months': {'n_estimators': 835,
  'learning_rate': 0.04652113781828601,
  'max_depth': 2,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_3_plus_24_months': {'n_estimators': 895,
  'learning_rate': 0.07446481127214379,
  'max_depth': 2,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_4_plus_0_months': {'n_estimators': 810,
  'learning_rate': 0.1806091603954042,
  'max_depth': 4,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_4_plus_6_months': {'n_estimators': 876,
  'learning_rate': 0.15296781926342792,
  'max_depth': 9,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_4_plus_12_months': {'n_estimators': 910,
  'learning_rate': 0.14071882739394206,
  'max_depth': 3,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0},
 'updrs_4_plus_24_months': {'n_estimators': 806,
  'learning_rate': 0.06290597910272058,
  'max_depth': 3,
  'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'gpu_id': 0}}



In [31]:
# def optunate_feature_number(X, y, u, optuna_params,feature_number):
#     # ダミー化を行う
#     X = pd.get_dummies(X, columns=X.select_dtypes(include='object').columns)  

#     params = optuna_params[u]
#     model, smape_test, mse_test, select_X, _ = model_train_operation(X, y, u, params=params,
#                                                                                feature_number=feature_number)
# #         smape_metrics_dict[u] = smape_test
# #         mae_metrics_dict[u] = mse_test
#     return smape_test, mse_test, feature_number

# tg_target = 'updrs_1_plus_6_months'
# optu_num_smape={}
# optu_num_mse={}
# for i in range(500,600, 1):
    
#     smape_test, mse_test, feature_number = optunate_feature_number(X=X, y=y, u=tg_target, optuna_params = optuna_params,feature_number = i)
    
#     optu_num_smape[feature_number] = smape_test
#     optu_num_mse[feature_number] = mse_test

# # valueが最小のkeyを取得
# min_smape_value_key = min(optu_num_smape, key=optu_num_smape.get)
# print('== SMAPE ===')
# print('feature number : ',min_smape_value_key)
# print('smape score : ',optu_num_smape[min_smape_value_key])

# # valueが最小のkeyを取得
# min_mse_value_key = min(optu_num_mse, key=optu_num_mse.get)
# print('== MSE ===')
# print('feature number : ',min_mse_value_key)
# print('mse score : ',optu_num_mse[min_mse_value_key])

In [32]:
# tg_target = 'updrs_3_plus_0_months'
# optu_num_smape={}
# optu_num_mse={}
# for i in range(1180,1200, 1):
    
#     smape_test, mse_test, feature_number = optunate_feature_number(X=X, y=y, u=tg_target, optuna_params = optuna_params,feature_number = i)
    
#     optu_num_smape[feature_number] = smape_test
#     optu_num_mse[feature_number] = mse_test

# # valueが最小のkeyを取得
# min_smape_value_key = min(optu_num_smape, key=optu_num_smape.get)
# print('== SMAPE ===')
# print('feature number : ',min_smape_value_key)
# print('smape score : ',optu_num_smape[min_smape_value_key])

# # valueが最小のkeyを取得
# min_mse_value_key = min(optu_num_mse, key=optu_num_mse.get)
# print('== MSE ===')
# print('feature number : ',min_mse_value_key)
# print('mse score : ',optu_num_mse[min_mse_value_key])

In [33]:
# tg_target = 'updrs_3_plus_6_months'
# optu_num_smape={}
# optu_num_mse={}
# for i in range(580,620, 1):
    
#     smape_test, mse_test, feature_number = optunate_feature_number(X=X, y=y, u=tg_target, optuna_params = optuna_params,feature_number = i)
    
#     optu_num_smape[feature_number] = smape_test
#     optu_num_mse[feature_number] = mse_test

# # valueが最小のkeyを取得
# min_smape_value_key = min(optu_num_smape, key=optu_num_smape.get)
# print('== SMAPE ===')
# print('feature number : ',min_smape_value_key)
# print('smape score : ',optu_num_smape[min_smape_value_key])

# # valueが最小のkeyを取得
# min_mse_value_key = min(optu_num_mse, key=optu_num_mse.get)
# print('== MSE ===')
# print('feature number : ',min_mse_value_key)
# print('mse score : ',optu_num_mse[min_mse_value_key])

In [34]:
# tg_target = 'updrs_3_plus_12_months'
# optu_num_smape={}
# optu_num_mse={}
# for i in range(360,400, 1):
    
#     smape_test, mse_test, feature_number = optunate_feature_number(X=X, y=y, u=tg_target, optuna_params = optuna_params,feature_number = i)
    
#     optu_num_smape[feature_number] = smape_test
#     optu_num_mse[feature_number] = mse_test

# # valueが最小のkeyを取得
# min_smape_value_key = min(optu_num_smape, key=optu_num_smape.get)
# print('== SMAPE ===')
# print('feature number : ',min_smape_value_key)
# print('smape score : ',optu_num_smape[min_smape_value_key])

# # valueが最小のkeyを取得
# min_mse_value_key = min(optu_num_mse, key=optu_num_mse.get)
# print('== MSE ===')
# print('feature number : ',min_mse_value_key)
# print('mse score : ',optu_num_mse[min_mse_value_key])

In [35]:
smape_metrics_dict = {}
mae_metrics_dict = {}

for i,u in enumerate(target):

    # ダミー化を行う
    X = pd.get_dummies(X, columns=X.select_dtypes(include='object').columns)  
    
    feature_number = feature_number_dict[u]

    params = optuna_params[u]
    model, smape_test, mse_test, select_X, _ = model_train_operation(X, y, u, params=params,
                                                                               feature_number=feature_number)
    smape_metrics_dict[u] = smape_test
    mae_metrics_dict[u] = mse_test
    
    optuna_params[u] = params

In [36]:
print(smape_metrics_dict)
average_dict_val(smape_metrics_dict)

{'updrs_1_plus_0_months': 22.22222222222222, 'updrs_1_plus_6_months': 100.0, 'updrs_1_plus_12_months': 40.0, 'updrs_1_plus_24_months': 142.85714285714286, 'updrs_2_plus_0_months': 133.33333333333331, 'updrs_2_plus_6_months': 133.33333333333331, 'updrs_2_plus_12_months': 0.0, 'updrs_2_plus_24_months': 100.0, 'updrs_3_plus_0_months': 8.0, 'updrs_3_plus_6_months': 8.0, 'updrs_3_plus_12_months': 8.695652173913043, 'updrs_3_plus_24_months': 53.333333333333336, 'updrs_4_plus_0_months': 200.0, 'updrs_4_plus_6_months': 0.0, 'updrs_4_plus_12_months': 200.0, 'updrs_4_plus_24_months': 0.0}
Average value: 71.86


In [37]:
print(mae_metrics_dict)
average_dict_val(mae_metrics_dict)

{'updrs_1_plus_0_months': 1.9749971457204083, 'updrs_1_plus_6_months': 18.76347898887707, 'updrs_1_plus_12_months': 1.3530495414763664, 'updrs_1_plus_24_months': 21.706378003366126, 'updrs_2_plus_0_months': 14.082963753389777, 'updrs_2_plus_6_months': 17.33846740978356, 'updrs_2_plus_12_months': 0.13189896310956328, 'updrs_2_plus_24_months': 3.14988817947102, 'updrs_3_plus_0_months': 0.29574890506864904, 'updrs_3_plus_6_months': 0.885455844050739, 'updrs_3_plus_12_months': 0.5080240581819453, 'updrs_3_plus_24_months': 66.91552545153536, 'updrs_4_plus_0_months': 0.28491412992184806, 'updrs_4_plus_6_months': 0.004876867873395918, 'updrs_4_plus_12_months': 1.8728649945542344, 'updrs_4_plus_24_months': 0.019391880919157112}
Average value: 9.33


In [38]:
# print(len(X.columns))
# print(X.columns)

# 5 Submission Prediction

In [39]:
from sklearn.preprocessing import MinMaxScaler,  RobustScaler, StandardScaler
from sklearn.model_selection import GroupKFold, GroupShuffleSplit
from sklearn.svm import SVR, LinearSVR
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression,PoissonRegressor
from sklearn.model_selection import cross_validate,cross_val_predict
from sklearn.metrics import make_scorer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.ensemble import RandomForestRegressor
import lightgbm as lgb
import amp_pd_peptide

amp_pd_peptide.make_env.func_dict['__called__'] = False
env = amp_pd_peptide.make_env()   # initialize the environment
iter_test = env.iter_test() 

smape_metrics_dict = {}
mae_metrics_dict = {}
proteins_features_all = pd.DataFrame()
for iteration, (df_test_cli, df_test_pep, df_test_pro, sample_submission) in enumerate(iter_test):
    print(f"■イテレーション{iteration}回目")
    
    
    sample_submission['patient_id'] = sample_submission['prediction_id'].map(lambda x: int(x.split('_')[0]))
    sample_submission['visit_month'] = sample_submission['prediction_id'].map(lambda x: int(x.split('_')[1]))
    sample_submission['target_name'] = sample_submission['prediction_id'].map(lambda x: 'updrs_' + x.split('_')[3])
    sample_submission['plus_month'] = sample_submission['prediction_id'].map(lambda x: int(x.split('_')[5]))
    sample_submission['pred_month'] = sample_submission['visit_month'] + sample_submission['plus_month']
    sample_submission['visit_id'] = sample_submission['patient_id'].astype(str) + '_' + sample_submission['visit_month'].astype(str)
    
    proteins_features = pd.pivot_table(df_test_pro, values='NPX', index='visit_id', columns='UniProt', aggfunc='sum')
    proteins_features['visit_id'] = proteins_features.index
    proteins_features_all = pd.concat([proteins_features_all, proteins_features])
    proteins_features_all['patient_id'] = proteins_features_all.index.map(lambda x: int(x.split('_')[0]))
    proteins_features_all[proteins_features.columns] = proteins_features_all.groupby('patient_id')[proteins_features.columns].\
                                                                                                   fillna(method='ffill')
    proteins_features = proteins_features_all.groupby('patient_id', as_index=False).last()
    
    sample_submission = sample_submission.merge(
        proteins_features,
        on='patient_id',
        how='left'
    )

    for i in range(1, 5):
        target_i = f'updrs_{i}'
        mask_target = sample_submission['target_name'] == target_i
        sample_submission.loc[mask_target, 'rating'] = calculate_month_trend_predicitons(
            pred_month=sample_submission.loc[mask_target, 'pred_month'],
            trend=target_to_trend[target_i],
            target=target_i
        )
            
        
        for item in target_to_npx_groups_shift[target_i]:
            feature = item['feature']
            mask_feature_range = mask_target & (
                (sample_submission[feature] >= item['quantile_low_value'])
                & (sample_submission[feature] < item['quantile_high_value'])
            )
            sample_submission.loc[mask_feature_range, 'rating'] += item['shift']

        sample_submission.loc[mask_target, 'rating'] = np.round(sample_submission.loc[mask_target, 'rating'])
    

    df_test = data_test_transform(df_test_cli, df_test_pep, df_test_pro)
    df_test.set_index('visit_id', inplace=True)
    df_test, df_train = data_columns_remain(df_test, X)
    test_target = target[1:2] + target[8:11] 
    
    """
    カテゴリー変数のダミー化
    """
    # train と test データセットを一時的に結合
    df_train['is_train'] = 1
    df_test['is_train'] = 0
    combined = pd.concat([df_train, df_test], axis=0)
    # ダミー化を行う
    combined_dummies = pd.get_dummies(combined, columns=combined.select_dtypes(include='object').columns)   
    # train と test データセットを元に戻す
    df_train = combined_dummies[combined_dummies['is_train'] == 1].drop('is_train', axis=1)
    df_test = combined_dummies[combined_dummies['is_train'] == 0].drop('is_train', axis=1)
    
    
    for i,u in enumerate(test_target):
        
        params = optuna_params[u]
        feature_number = feature_number_dict[u]

        model, smape_test, mse_test, select_X,_ = model_train_operation(df_train, y, u, params,
                                                         feature_number = feature_number)
        
        smape_metrics_dict[u] = smape_test
        mae_metrics_dict[u] = mse_test
        
        model_1 = Pipeline([
            ('scaler', RobustScaler()),
            ('lsvr', LinearSVR(random_state=2023))
        ])
        model_1.fit(df_train[select_X], y[u])
        
        model_2 = Pipeline([('scaler',MinMaxScaler()),         
                          ('poisson',PoissonRegressor())])
        model_2.fit(df_train[select_X], y[u])
        
        model_3 = Pipeline([('scaler',RobustScaler()),         
                          ('svr_rbf',SVR())])
        model_3.fit(df_train[select_X], y[u])
        
        model_4 = Pipeline([('scaler',StandardScaler()),         
                          ('lr',LinearRegression())])
        model_4.fit(df_train[select_X], y[u])
        
        model_5 =  RandomForestRegressor(n_estimators=500, max_depth=3, random_state=2023)
        model_5.fit(df_train[select_X], y[u])
        
        for j, visit_id in enumerate(df_test.index):

            prediction_id = visit_id+'_'+u
            y_pred = model.predict(df_test[select_X][df_test.index==visit_id])
            rating = max(y_pred[0],0)
            
            y_pred_1 = model_1.predict(df_test[select_X][df_test.index==visit_id])
            rating_1 = max(y_pred_1[0],0)
            
            y_pred_2 = model_2.predict(df_test[select_X][df_test.index==visit_id])
            rating_2 = max(y_pred_2[0],0)
            
            y_pred_3 = model_3.predict(df_test[select_X][df_test.index==visit_id])
            rating_3 = max(y_pred_3[0],0)
            
            y_pred_4 = model_4.predict(df_test[select_X][df_test.index==visit_id])
            rating_4 = max(y_pred_4[0],0)
            
            y_pred_5 = model_5.predict(df_test[select_X][df_test.index==visit_id])
            rating_5 = max(y_pred_5[0],0)         
            
#             print(f'prediction_id: {prediction_id}, rating: {rating}')
            sample_submission['rating'][sample_submission['prediction_id']==prediction_id]\
                = 0.7 * sample_submission['rating'][sample_submission['prediction_id']==prediction_id] \
                + 0.24 * float(rating)\
                + 0.03 * float(rating_1)\
                + 0.00 * float(rating_2)\
                + 0.03 * float(rating_3)\
                + 0.00 * float(rating_4)\
                + 0.00 * float(rating_5)
    
    env.predict(sample_submission[['prediction_id', 'rating']])

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
■イテレーション0回目
■イテレーション1回目


In [40]:
print(smape_metrics_dict)
average_dict_val(smape_metrics_dict)

{'updrs_1_plus_6_months': 100.0, 'updrs_3_plus_0_months': 8.0, 'updrs_3_plus_6_months': 8.0, 'updrs_3_plus_12_months': 8.695652173913043}
Average value: 31.17


In [41]:
print(mae_metrics_dict)
average_dict_val(mae_metrics_dict)

{'updrs_1_plus_6_months': 18.76347898887707, 'updrs_3_plus_0_months': 0.29574890506864904, 'updrs_3_plus_6_months': 0.885455844050739, 'updrs_3_plus_12_months': 0.5080240581819453}
Average value: 5.11


In [42]:
test_target

['updrs_1_plus_6_months',
 'updrs_3_plus_0_months',
 'updrs_3_plus_6_months',
 'updrs_3_plus_12_months']