Important Note: This code will not replicate the lower dimensional representation used in 'Discovery of Parkinson’s disease states using a machine learning approach'. If you would like to use the specific model from the paper, please reach out to the authors.

In [None]:
import numpy as np
import pandas as pd
import pickle 
import sys

### place path to contrastive-LVM repo here
sys.path.append('path-to-repo')

from clvm_tfp import clvm
from apply_clvm_tfp import apply_clvm

from sklearn.externals import joblib

In [None]:
# imported processed data, note that these csv's are generated by 'Full_Data_Processing.ipynb'
pd_data = pd.read_csv('pd_on.csv')
hc_data = pd.read_csv('hc.csv')

In [None]:
# import train/test split
with open('train_test_split.pkl', 'rb') as handle:
    pids = pickle.load(handle)

In [None]:
ti = pids['train_ids'] #PATNO's for training

# set up training data
target = pd_data[(pd_data.PATNO.isin(ti))]
background = hc_data

In [None]:
t_pre = target.iloc[:, 5:106]
t_pre = t_pre.drop(columns=['NHY', 'ON_OFF_DOSE', 'DYSKPRES','DYSKIRAT', 'PD_MED_USE', 'Unnamed: 0_y',
                            'GDS_Depressed', 'UPSIT_TOT','ESS_Sleepy','PAG_NAME', 'CMEDTM', 
                            'EXAMTM', 'ANNUAL_TIME_BTW_DOSE_NUPDRS', 'ON_OFF_DOSE', 'PD_MED_USE', 
                            'NP4WDYSK','NP4DYSKI', 'NP4OFF', 'NP4FLCTI', 'NP4FLCTX', 'NP4DYSTN'])

b_pre = background.iloc[:, 5:106]
b_pre = b_pre.drop(columns=['NHY', 'ON_OFF_DOSE', 'DYSKPRES','DYSKIRAT', 'PD_MED_USE', 'Unnamed: 0_y',
                            'GDS_Depressed', 'UPSIT_TOT','ESS_Sleepy','PAG_NAME', 'CMEDTM', 
                            'EXAMTM', 'ANNUAL_TIME_BTW_DOSE_NUPDRS', 'ON_OFF_DOSE', 'PD_MED_USE', 
                            'NP4WDYSK','NP4DYSKI', 'NP4OFF', 'NP4FLCTI', 'NP4FLCTX', 'NP4DYSTN'])

In [None]:
t_train = t_pre.values
b_train = b_pre.values

#z-score
t_mean = np.nanmean(t_train, axis=0)
t_std = np.nanstd(np.concatenate((t_train,b_train)), axis=0)

t_train = (t_train-t_mean) / t_std
b_train = (b_train-np.nanmean(b_train, axis=0)) / t_std

In [None]:
# learn model parameters
model = clvm(t_train, b_train, 40, 40, robust_flag=True, sharedARD=True, targetARD=True, 
             target_missing=True, background_missing=True)
model.variational_inference(num_epochs=15000, seed=9, fn='cLVM_model', fp='./', saveGraph=True)

In [None]:
ti_test = pids['test_ids']

test = pd_data[(pd_data.PATNO.isin(ti_test))]

In [None]:
t_pre_test = test.iloc[:, 5:106]
t_pre_test = t_pre_test.drop(columns=['NHY', 'ON_OFF_DOSE', 'DYSKPRES','DYSKIRAT', 'PD_MED_USE', 'Unnamed: 0_y',
                            'GDS_Depressed', 'UPSIT_TOT','ESS_Sleepy','PAG_NAME', 'CMEDTM', 
                            'EXAMTM', 'ANNUAL_TIME_BTW_DOSE_NUPDRS', 'ON_OFF_DOSE', 'PD_MED_USE', 
                            'NP4WDYSK','NP4DYSKI', 'NP4OFF', 'NP4FLCTI', 'NP4FLCTX', 'NP4DYSTN'])

In [None]:
t_test = t_pre_test.values

t_test = (t_test - t_mean)/t_std
b_test = None

In [None]:
# load trained model, this was automatically saved when training the model above
trained_clvm_model = joblib.load('resultscLVM_model9iter14999.pkl')

In [None]:
# apply model to test data
model = apply_clvm(trained_clvm_model, t_test, target_missing=True)
model.variational_inference(num_epochs=15000, seed=9, fn='cLVM_model_test', fp='./', saveGraph=False)