# ClinicalUPDRS

### Pdkit uses the clinical data to calculates classifiers implementing the k-nearest neighbors vote.

In [1]:
import pdkit
import pandas as pd



  from pandas.core import datetools


In [2]:
# Read the clinicians score (labels for the supervised learning)
classification_clinical = pd.read_csv('./data/classification_clinical.csv').sort_values(by=['id'])
classification_clinical

Unnamed: 0,id,MDS_UPDRSIII,OT-RH,OT-LH,TT-RH,TT-LH,PS-RH,PS-LH,LA-RL,LA-LL,PTOTH-RH,PTOTH-LH,KTOH-RH,KTOH-LH,RTA-RH,RTA-LH,RTA-RL,RTA-LL
0,2458,OFF,3,3,3,3,3,3,3,3,1,1,1,1,2,1,0,0
1,2459,ON,1,2,1,2,2,2,1,1,0,1,1,0,1,1,0,0
2,2504,OFF,1,0,1,0,1,0,0,0,4,2,3,1,4,1,1,1
3,2505,ON,1,0,1,0,1,0,0,0,4,0,2,1,4,1,0,0
4,2656,OFF,4,4,4,4,4,3,2,2,4,3,3,2,0,0,0,0
5,2675,OFF,3,3,3,3,2,2,1,1,3,1,2,1,3,2,2,2
6,2678,OFF,2,4,2,4,2,4,2,3,1,2,1,3,0,2,0,0
7,2679,ON,1,1,1,1,0,1,1,1,0,1,0,1,0,1,0,0
9,2688,ON,2,1,2,1,1,1,0,1,0,0,0,0,0,0,0,0
8,2698,OFF,3,2,3,2,2,2,2,2,2,2,0,0,2,3,0,0


In [3]:
# Instance to the ClinicalUPDRS Class 
# labels_file_path is the path to the labels file
# data_frame_file_path is the testResultSetFile, we can also provide the data_frame of the testResultSet
clinical_UPDRS = pdkit.Clinical_UPDRS(labels_file_path='./data/classification_clinical.csv', data_frame_file_path='./tests/data/cussp_dataframe.csv')

# single KNeighborsClassifier per observation
#clinical_UPDRS.knns

In [4]:
# Lets read some random measurement from the data to predict the UPDRS score
dataframe = pd.read_csv('./data/cussp_dataframe.csv')
dataframe = dataframe.fillna(dataframe.mean())
dataframe = dataframe.sort_values(by=['id'])
measurement = dataframe.loc[[2]]
measurement

Unnamed: 0,id,LA-LL-amplitude_by_fft,LA-LL-frequency_by_fft,LA-LL-amplitude_by_welch,LA-LL-frequency_by_welch,LA-LL-bradykinesia_amplitude_by_fft,LA-LL-bradykinesia_frequency_by_fft,LA-LL-bradykinesia_amplitude_by_welch,LA-LL-bradykinesia_frequency_by_welch,LA-LL-magnitude_approximate_entropy,...,OT-LH-kinesia_scores,OT-LH-akinesia_times,OT-LH-dysmetria_score,TT-RH-frequency,TT-RH-mean_moving_time,TT-RH-incoordination_score,TT-RH-mean_alnt_target_distance,TT-RH-kinesia_scores,TT-RH-akinesia_times,TT-RH-dysmetria_score
2,2459,1.452961,2.734375,1.866645,2.34375,1.452961,2.734375,1.868093,2.34375,0.363611,...,249,0.118494,34.185506,2.18441,341.613636,0.324676,648.415327,133,0.118744,65.916961


In [5]:
# Do a prediction using ClinicalUPDRS
pred = clinical_UPDRS.predict(measurement)
pred

array([1, 0, 3, 4, 2, 2, 1, 1, 4, 0, 2, 1, 0, 0, 0, 0])

In [6]:
# Just for the sake of it, let's see how good is this prediction
a = classification_clinical.loc[[1]]
a

Unnamed: 0,id,MDS_UPDRSIII,OT-RH,OT-LH,TT-RH,TT-LH,PS-RH,PS-LH,LA-RL,LA-LL,PTOTH-RH,PTOTH-LH,KTOH-RH,KTOH-LH,RTA-RH,RTA-LH,RTA-RL,RTA-LL
1,2459,ON,1,2,1,2,2,2,1,1,0,1,1,0,1,1,0,0


In [7]:
# Estimate the percentage of similarity between measurement and prediction
a = a.drop(['id','MDS_UPDRSIII'], axis=1).values
b=pred
print('Perc of similarity: ',1 - (a != b).sum()/float(a.size),'%')

Perc of similarity:  0.4375 %
