In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import os
from math import sqrt
from math import ceil

In [None]:
control_data_path=os.path.join('data', 'control')
parkinson_data_path=os.path.join('data', 'parkinson') #Path of parkinson data folder

In [None]:
control_file_list=[os.path.join(control_data_path, x) for x in os.listdir(control_data_path)]
parkinson_file_list=[os.path.join(parkinson_data_path, x) for x in os.listdir(parkinson_data_path)] #Path of parkinson data files

In [None]:
header_row=["X", "Y", "Z", "Pressure" , "GripAngle" , "Timestamp" , "Test_ID"]
features_headers=['NO_STROKES_ST', 'NO_STROKES_DY', 'SPEED_ST', 'SPEED_DY', 
                  'VEL_MEAN_ST', 'VEL_MEDIAN_ST', 'VEL_STD_ST', 'VEL_PERCENTILE_1_ST', 'VEL_PERCENTILE_99_ST', 
                  'HOR_VEL_MEAN_ST', 'HOR_VEL_MEDIAN_ST', 'HOR_VEL_STD_ST', 'HOR_VEL_PERCENTILE_1_ST', 'HOR_VEL_PERCENTILE_99_ST', 
                  'VERT_VEL_MEAN_ST', 'VERT_VEL_MEDIAN_ST', 'VERT_VEL_STD_ST', 'VERT_VEL_PERCENTILE_1_ST', 'VERT_VEL_PERCENTILE_99_ST', 
                  'ACCL_MEAN_ST', 'ACCL_MEDIAN_ST', 'ACCL_STD_ST', 'ACCL_PERCENTILE_1_ST', 'ACCL_PERCENTILE_99_ST',
                  'HOR_ACCL_MEAN_ST', 'HOR_ACCL_MEDIAN_ST', 'HOR_ACCL_STD_ST', 'HOR_ACCL_PERCENTILE_1_ST', 'HOR_ACCL_PERCENTILE_99_ST', 
                  'VERT_ACCL_MEAN_ST', 'VERT_ACCL_MEDIAN_ST', 'VERT_ACCL_STD_ST', 'VERT_ACCL_PERCENTILE_1_ST', 'VERT_ACCL_PERCENTILE_99_ST', 
                  'JERK_MEAN_ST', 'JERK_MEDIAN_ST', 'JERK_STD_ST', 'JERK_PERCENTILE_1_ST', 'JERK_PERCENTILE_99_ST', 
                  'HOR_JERK_MEAN_ST', 'HOR_JERK_MEDIAN_ST', 'HOR_JERK_STD_ST', 'HOR_JERK_PERCENTILE_1_ST', 'HOR_JERK_PERCENTILE_99_ST', 
                  'VERT_JERK_MEAN_ST', 'VERT_JERK_MEDIAN_ST', 'VERT_JERK_STD_ST', 'VERT_JERK_PERCENTILE_1_ST', 'VERT_JERK_PERCENTILE_99_ST', 
                  'VEL_MEAN_DY', 'VEL_MEDIAN_DY', 'VEL_STD_DY', 'VEL_PERCENTILE_1_DY', 'VEL_PERCENTILE_99_DY', 
                  'HOR_VEL_MEAN_DY', 'HOR_VEL_MEDIAN_DY', 'HOR_VEL_STD_DY', 'HOR_VEL_PERCENTILE_1_DY', 'HOR_VEL_PERCENTILE_99_DY', 
                  'VERT_VEL_MEAN_DY', 'VERT_VEL_MEDIAN_DY', 'VERT_VEL_STD_DY', 'VERT_VEL_PERCENTILE_1_DY', 'VERT_VEL_PERCENTILE_99_DY', 
                  'ACCL_MEAN_DY', 'ACCL_MEDIAN_DY', 'ACCL_STD_DY', 'ACCL_PERCENTILE_1_DY', 'ACCL_PERCENTILE_99_DY', 
                  'HOR_ACCL_MEAN_DY', 'HOR_ACCL_MEDIAN_DY', 'HOR_ACCL_STD_DY', 'HOR_ACCL_PERCENTILE_1_DY', 'HOR_ACCL_PERCENTILE_99_DY', 
                  'VERT_ACCL_MEAN_DY', 'VERT_ACCL_MEDIAN_DY', 'VERT_ACCL_STD_DY', 'VERT_ACCL_PERCENTILE_1_DY', 'VERT_ACCL_PERCENTILE_99_DY', 
                  'JERK_MEAN_DY', 'JERK_MEDIAN_DY', 'JERK_STD_DY', 'JERK_PERCENTILE_1_DY', 'JERK_PERCENTILE_99_DY', 
                  'HOR_JERK_MEAN_DY', 'HOR_JERK_MEDIAN_DY', 'HOR_JERK_STD_DY', 'HOR_JERK_PERCENTILE_1_DY', 'HOR_JERK_PERCENTILE_99_DY', 
                  'VERT_JERK_MEAN_DY', 'VERT_JERK_MEDIAN_DY', 'VERT_JERK_STD_DY', 'VERT_JERK_PERCENTILE_1_DY', 'VERT_JERK_PERCENTILE_99_DY',
                  'NCV_MEAN_ST', 'NCV_MEDIAN_ST', 'NCV_STD_ST', 'NCV_PERCENTILE_1_ST', 'NCV_PERCENTILE_99_ST', 
                  'NCV_MEAN_DY', 'NCV_MEDIAN_DY', 'NCV_STD_DY', 'NCV_PERCENTILE_1_DY', 'NCV_PERCENTILE_99_DY', 
                  'NCA_MEAN_ST', 'NCA_MEDIAN_ST', 'NCA_STD_ST', 'NCA_PERCENTILE_1_ST', 'NCA_PERCENTILE_99_ST', 
                  'NCA_MEAN_DY', 'NCA_MEDIAN_DY', 'NCA_STD_DY', 'NCA_PERCENTILE_1_DY', 'NCA_PERCENTILE_99_DY',
                  'IN_AIR_STCP', 'ON_SURFACE_ST',  'ON_SURFACE_DY',
                  'TARGET']

In [None]:
def get_stat_data(values):
    data = []
    data.append(np.mean(values))
    data.append(np.median(values))
    data.append(np.std(values))
    data.append(np.percentile(values, 1))
    data.append(np.percentile(values, 99))
    return data

In [None]:
class KineticsData:
    def __init__(self):
        self.val = []
        self.hor_val = []
        self.ver_val = []
        self.magnitude = []
        self.hor_magnitude = []
        self.ver_magnitude = []

In [None]:
def get_no_strokes(df):
    pressure_data=df['Pressure'].values
    on_surface = (pressure_data>600).astype(int)
    return ((np.roll(on_surface, 1) - on_surface) != 0).astype(int).sum()

In [None]:
def get_speed(df):
    total_dist=0
    duration=df['Timestamp'].values[-1]
    coords=df[['X', 'Y', 'Z']].values
    for i in range(10, df.shape[0]):
        temp=np.linalg.norm(coords[i, :]-coords[i-10, :])
        total_dist+=temp
    speed=total_dist/duration
    return speed

In [None]:
def get_in_air_time(data):
    data=data['Pressure'].values
    return (data<600).astype(int).sum()

In [None]:
def get_on_surface_time(data):
    data=data['Pressure'].values
    return (data>600).astype(int).sum()

In [None]:
def find_kinametics_value(data_pat):
    timestamp_diff = []
    
    data_len = len(data_pat) - 10
    vel = KineticsData()
    
    for t in range(0, data_len, 10):
        timestamp_diff.append(data_pat['Timestamp'].values[t+10]-data_pat['Timestamp'].values[t])
        
        vel.val.append(((data_pat['X'].values[t+10] - data_pat['X'].values[t])/ timestamp_diff[-1], (data_pat['Y'].values[t+10]-data_pat['Y'].values[t])/timestamp_diff[-1]))
        vel.hor_val.append((data_pat['X'].values[t+10] - data_pat['X'].values[t])/timestamp_diff[-1])
        vel.ver_val.append((data_pat['Y'].values[t+10] - data_pat['Y'].values[t])/timestamp_diff[-1])
        vel.magnitude.append(sqrt(((data_pat['X'].values[t+10]-data_pat['X'].values[t])/timestamp_diff[-1])**2 + (((data_pat['Y'].values[t+10]-data_pat['Y'].values[t])/timestamp_diff[-1])**2)))
        
        vel.hor_magnitude.append(abs(vel.hor_val[-1]))
        vel.ver_magnitude.append(abs(vel.ver_val[-1]))
    
    data_len = len(vel.val) - 1
    accl = KineticsData()
    
    for i in range(data_len):
        accl.val.append(((vel.val[i+1][0]-vel.val[i][0])/timestamp_diff[i] , (vel.val[i+1][1]-vel.val[i][1])/timestamp_diff[i]))
        accl.hor_val.append((vel.hor_val[i+1]-vel.hor_val[i])/timestamp_diff[i])
        accl.ver_val.append((vel.ver_val[i+1]-vel.ver_val[i])/timestamp_diff[i])
        accl.hor_magnitude.append(abs(accl.hor_val[-1]))
        accl.ver_magnitude.append(abs(accl.ver_val[-1]))
        accl.magnitude.append(sqrt(((vel.val[i+1][0]-vel.val[i][0])/timestamp_diff[i])**2 + ((vel.val[i+1][1]-vel.val[i][1])/timestamp_diff[i])**2))
    
    
    data_len = len(accl.val) - 1
    jerk = KineticsData()
    
    for i in range(data_len):
        jerk.val.append(((accl.val[i+1][0]-accl.val[i][0])/timestamp_diff[i] , (accl.val[i+1][1]-accl.val[i][1])/timestamp_diff[i]))
        jerk.hor_val.append((accl.hor_val[i+1]-accl.hor_val[i])/timestamp_diff[i])
        jerk.ver_val.append((accl.ver_val[i+1]-accl.ver_val[i])/timestamp_diff[i])
        jerk.hor_magnitude.append(abs(jerk.hor_val[-1]))
        jerk.ver_magnitude.append(abs(jerk.ver_val[-1]))
        jerk.magnitude.append(sqrt(((accl.val[i+1][0]-accl.val[i][0])/timestamp_diff[i])**2 + ((accl.val[i+1][1]-accl.val[i][1])/timestamp_diff[i])**2))
    data = []
    data.extend(get_stat_data(vel.magnitude))
    data.extend(get_stat_data(vel.hor_magnitude))
    data.extend(get_stat_data(vel.ver_magnitude))
    data.extend(get_stat_data(accl.magnitude))
    data.extend(get_stat_data(accl.hor_magnitude))
    data.extend(get_stat_data(accl.ver_magnitude))
    data.extend(get_stat_data(jerk.magnitude))
    data.extend(get_stat_data(jerk.hor_magnitude))
    data.extend(get_stat_data(jerk.ver_magnitude))
    return data

In [None]:
def NCV_per_halfcircle(f):
    data_pat=f
    Vel = []
    ncv = []
    temp_ncv = 0
    basex = data_pat['X'].values[0]
    for i in range(len(data_pat)-2):
        if data_pat['X'].values[i] == basex:
            ncv.append(temp_ncv)
            temp_ncv = 0
            continue
            
        Vel.append(((data_pat['X'].values[i+1] - data_pat['X'].values[i])/(data_pat['Timestamp'].values[i+1]-data_pat['Timestamp'].values[i]) , (data_pat['Y'].values[i+1]-data_pat['Y'].values[i])/(data_pat['Timestamp'].values[i+1]-data_pat['Timestamp'].values[i])))
        if Vel[-1] != (0,0):
            temp_ncv+=1
    ncv.append(temp_ncv)
    #ncv = list(filter((2).__ne__, ncv))
    ncv = ncv[ncv != 0] 
    data = get_stat_data(ncv) 
    return data
   

In [None]:
def NCA_per_halfcircle(data_pat):
    timestamp_diff = []
    Vel = []
    data_len = len(data_pat) - 10
    for t in range(0, data_len, 10):
        timestamp_diff.append(data_pat['Timestamp'].values[t+10]-data_pat['Timestamp'].values[t])
        Vel.append(((data_pat['X'].values[t+10] - data_pat['X'].values[t])/ timestamp_diff[-1], (data_pat['Y'].values[t+10]-data_pat['Y'].values[t])/timestamp_diff[-1]))
        
    accl = []
    nca = []
    temp_nca = 0
    basex = data_pat['X'].values[0]
    for i in range(len(Vel)-2):
        if data_pat['X'].values[i] == basex:
            nca.append(temp_nca)
            #print ('tempNCa::',temp_nca)
            temp_nca = 0
            continue
            
        accl.append(((Vel[i+1][0]-Vel[i][0])/timestamp_diff[i] , (Vel[i+1][1]-Vel[i][1])/timestamp_diff[i]))
        if accl[-1] != (0,0):
            temp_nca+=1
    nca.append(temp_nca)
    nca = list(filter((2).__ne__, nca))
    nca = nca[nca != 0] 
    data = get_stat_data(nca) 
    return data

In [None]:
def generate_features(f, parkinson_target):
    global header_row
    df=pd.read_csv(f, sep=';', header=None, names=header_row)
    
    df_static=df[df["Test_ID"]==0]    # static test
    df_dynamic=df[df["Test_ID"]==1]    # dynamic test
    df_stcp=df[df["Test_ID"]==2]    # STCP(Stability test on certain point)
    #df_static_dynamic=pd.concat([df_static, df_dynamic])
    
    initial_timestamp=df['Timestamp'][0]
    df['Timestamp']=df['Timestamp']- initial_timestamp # offset timestamps
    
    duration_static = df_static['Timestamp'].values[-1] if df_static.shape[0] else 1
    duration_dynamic = df_dynamic['Timestamp'].values[-1] if df_dynamic.shape[0] else 1
    duration_STCP = df_stcp['Timestamp'].values[-1] if df_stcp.shape[0] else 1

    
    data_point=[]
    data_point.append(get_no_strokes(df_static) if df_static.shape[0] else 0) # no. of strokes for static test
    data_point.append(get_no_strokes(df_dynamic) if df_dynamic.shape[0] else 0) # no. of strokes for dynamic test
    data_point.append(get_speed(df_static) if df_static.shape[0] else 0) # speed for static test
    data_point.append(get_speed(df_dynamic) if df_dynamic.shape[0] else 0) # speed for dynamic test

    values = find_kinametics_value(df_static) if df_static.shape[0] else (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
    data_point.extend(values)
    values = find_kinametics_value(df_dynamic) if df_dynamic.shape[0] else (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
    data_point.extend(values)

    ncv=NCV_per_halfcircle(df_static) if df_static.shape[0] else (0,0,0,0,0) # NCV for static test 
    data_point.extend(ncv) 
    ncv=NCV_per_halfcircle(df_dynamic) if df_dynamic.shape[0] else (0,0,0,0,0) # NCV for dynamic test 
    data_point.extend(ncv) 
        
    nca=NCA_per_halfcircle(df_static) if df_static.shape[0] else (0,0,0,0,0) # NCA for static test 
    data_point.extend(nca) 
    nca=NCA_per_halfcircle(df_dynamic) if df_dynamic.shape[0] else (0,0,0,0,0) # NCA for dynamic test 
    data_point.extend(nca)
    
    data_point.append(get_in_air_time(df_stcp) if df_stcp.shape[0] else 0) # in air time for STCP
    data_point.append(get_on_surface_time(df_static) if df_static.shape[0] else 0) # on surface time for static test
    data_point.append(get_on_surface_time(df_dynamic) if df_dynamic.shape[0] else 0) # on surface time for dynamic test
    
    data_point.append(parkinson_target)    # traget. 1 for parkinson. 0 for control.
    
    return data_point

In [None]:
def get_features():
    raw=[]
    for x in parkinson_file_list:
        raw.append(generate_features(x, 1))
    for x in control_file_list:
        raw.append(generate_features(x, 0))
    #print(raw)
    raw=np.array(raw)
    #print(len(raw))
    #print(len(raw[0]))
    data=pd.DataFrame(raw, columns=features_headers)
    data.fillna(0, inplace=True)
    y=data['TARGET']
    x=data.drop(['TARGET'], axis=1)
    return x, y

In [None]:
from statistics import mean, stdev
class ClassifierScore:
    def __init__(self, values):
        self.values = values
        self.max = max(values)*100
        self.min = min(values)*100
        self.mean = mean(values)*100
        self.stdev = stdev(values)
    def print(self):
        print('List of possible accuracy:', self.values)
        print('Maximum Accuracy:', self.max, '%')
        print('Minimum Accuracy:', self.min , '%')
        print('Overall Accuracy:', self.mean, '%')
        print('Standard Deviation is:', self.stdev)

In [None]:
from sklearn import preprocessing
def preprocess_with_minmaxscaler(x):
    scaler = preprocessing.MinMaxScaler()
    x = scaler.fit_transform(x)
    return x

In [None]:
#Oversampling as control data is less than parkinson data
#Class - SMOTE, BorderlineSMOTE, SVMSMOTE, ADASYN
#ref - https://machinelearningmastery.com/smote-oversampling-for-imbalanced-classification/

from imblearn.over_sampling import ADASYN
def oversampling_with_smote_adasyn(x,y):
    oversample = ADASYN(random_state=3)
    x, y = oversample.fit_resample(x, y)
    return x, y

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif
import matplotlib.pyplot as plt

def select_using_selectkbest(x,y,feature_count):
    fs = SelectKBest(score_func=mutual_info_classif, k=feature_count) # k is number of features to be selected
    x = fs.fit_transform(x, y)
    cols = fs.get_support(indices=True)
    print(cols)
    return x, cols

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
def select_using_selectfrommodel(x, y):
    clf = ExtraTreesClassifier(n_estimators=50)
    clf = clf.fit(x, y)

    model = SelectFromModel(clf, prefit=True)
    X_new = model.transform(x)
    print(X_new)
    return X_new

In [None]:
def plot_feature_score(x,y,feature_count):
    scores = fs.fit(x, y).scores_
    barY = []
    for i in range(feature_count):
        barY.append(scores[cols[i]])
    print(barY)
    barX = []
    for i in range(feature_count):
        barX.append(str(cols[i]))
    plt.bar(barX, barY)
    plt.show()

In [None]:
raw_x, y = get_features()
scaled_x = preprocess_with_minmaxscaler(raw_x)
oversampled_x, y = oversampling_with_smote_adasyn(scaled_x,y)

In [None]:
x, selected_features_idx = select_using_selectkbest(oversampled_x,y,15)

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score

def run_test(classifier):
    fold = 10
    print(fold)
    skf = StratifiedKFold(n_splits=fold, shuffle=True, random_state=1)
    # n_splits = number of groups for cross validation
    predicted_targets = np.array([])
    actual_targets = np.array([])
    accuracy_list = list()
    
    for train_idx, test_idx in skf.split(x,y):
        train_x, train_y, test_x, test_y = x[train_idx], y[train_idx], x[test_idx], y[test_idx]
        
        classifier.fit(train_x, train_y)
        #scores.append(classifier.score(x_test_fold, y_test_fold))
        
        predicted_labels = classifier.predict(test_x)
        accuracy = accuracy_score(test_y, predicted_labels)
        
        predicted_targets = np.append(predicted_targets, predicted_labels)
        actual_targets = np.append(actual_targets, test_y)
        accuracy_list.append(accuracy)
        
    ClassifierScore(accuracy_list).print()
    plot_confusion_matrix(predicted_targets, actual_targets)
    calculate_scores(actual_targets, predicted_targets)

In [None]:
def plot_confusion_matrix(predicted_labels_list, y_test_list):
    cnf_matrix = confusion_matrix(y_test_list, predicted_labels_list)
    np.set_printoptions(precision=2)
    res_class = ['control', 'parkinson']
    # Plot non-normalized confusion matrix
    plt.figure()
    generate_confusion_matrix(cnf_matrix, classes=res_class, title='Confusion matrix, without normalization')
    plt.show()

    # Plot normalized confusion matrix
    #plt.figure()
    #generate_confusion_matrix(cnf_matrix, classes=res_class, normalize=True, title='Normalized confusion matrix')
    #plt.show()

In [None]:
import itertools

def generate_confusion_matrix(cnf_matrix, classes, normalize=False, title='Confusion matrix'):
    if normalize:
        cnf_matrix = cnf_matrix.astype('float') / cnf_matrix.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cnf_matrix, interpolation='nearest', cmap=plt.get_cmap('Blues'))
    plt.title(title)
    plt.colorbar()
    
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cnf_matrix.max() / 2.

    for i, j in itertools.product(range(cnf_matrix.shape[0]), range(cnf_matrix.shape[1])):
        plt.text(j, i, format(cnf_matrix[i, j], fmt), horizontalalignment="center",
                 color="white" if cnf_matrix[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    return cnf_matrix

In [None]:
def calculate_scores(y_test_list, predicted_labels_list):
    TN, FP, FN, TP = confusion_matrix(y_test_list, predicted_labels_list).ravel()
    sensitivity = TP / float(FN + TP) #recall
    print(sensitivity)
    specificity = TN / float(TN + FP)
    print(specificity)
    FNR =  FN/ float(FN+TP)
    print(FNR)
    precision = TP / float(TP + FP)
    print(precision)
    F1 = 2 * (precision * sensitivity) / (precision + sensitivity)
    print(F1)

In [None]:
from sklearn.ensemble import RandomForestClassifier
run_test(RandomForestClassifier(n_estimators=100,criterion="entropy",random_state=np.random.seed(15)))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
run_test(KNeighborsClassifier())

In [None]:
from sklearn.tree import DecisionTreeClassifier
run_test(DecisionTreeClassifier(criterion="entropy",random_state=np.random.seed(3)))

In [None]:
from sklearn.ensemble import AdaBoostClassifier
run_test(AdaBoostClassifier(n_estimators=100, learning_rate=1.0, random_state=0))

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
run_test(ExtraTreesClassifier(n_estimators=100,criterion="entropy",random_state=np.random.seed(15)))

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
run_test(GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0))

In [None]:
def sample_ploting(f):
    df=pd.read_csv(f, sep=';', header=None, names=header_row)
    df_static=df[df["Test_ID"]==0]
    plt.plot(df_static['X'], df_static['Y'])
    df_dynamic=df[df["Test_ID"]==1]
    plt.plot(df_dynamic['X'], df_dynamic['Y'])
    plt.xlabel("X")
    plt.ylabel("Y")
    #plt.savefig("parkinson_data", facecolor='w', bbox_inches="tight", pad_inches=0.3, transparent=True)
    
def get_kinametics_value(data_pat):
    timestamp_diff = []
    
    data_len = len(data_pat) - 10
    vel = KineticsData()
    
    for t in range(0, data_len, 10):
        timestamp_diff.append(data_pat['Timestamp'].values[t+10]-data_pat['Timestamp'].values[t])
        
        vel.val.append(((data_pat['X'].values[t+10] - data_pat['X'].values[t])/ timestamp_diff[-1], (data_pat['Y'].values[t+10]-data_pat['Y'].values[t])/timestamp_diff[-1]))
        vel.hor_val.append((data_pat['X'].values[t+10] - data_pat['X'].values[t])/timestamp_diff[-1])
        vel.ver_val.append((data_pat['Y'].values[t+10] - data_pat['Y'].values[t])/timestamp_diff[-1])
        vel.magnitude.append(sqrt(((data_pat['X'].values[t+10]-data_pat['X'].values[t])/timestamp_diff[-1])**2 + (((data_pat['Y'].values[t+10]-data_pat['Y'].values[t])/timestamp_diff[-1])**2)))
        
        vel.hor_magnitude.append(abs(vel.hor_val[-1]))
        vel.ver_magnitude.append(abs(vel.ver_val[-1]))
    
    data_len = len(vel.val) - 1
    accl = KineticsData()
    
    for i in range(data_len):
        accl.val.append(((vel.val[i+1][0]-vel.val[i][0])/timestamp_diff[i] , (vel.val[i+1][1]-vel.val[i][1])/timestamp_diff[i]))
        accl.hor_val.append((vel.hor_val[i+1]-vel.hor_val[i])/timestamp_diff[i])
        accl.ver_val.append((vel.ver_val[i+1]-vel.ver_val[i])/timestamp_diff[i])
        accl.hor_magnitude.append(abs(accl.hor_val[-1]))
        accl.ver_magnitude.append(abs(accl.ver_val[-1]))
        accl.magnitude.append(sqrt(((vel.val[i+1][0]-vel.val[i][0])/timestamp_diff[i])**2 + ((vel.val[i+1][1]-vel.val[i][1])/timestamp_diff[i])**2))
    
    
    data_len = len(accl.val) - 1
    jerk = KineticsData()
    
    for i in range(data_len):
        jerk.val.append(((accl.val[i+1][0]-accl.val[i][0])/timestamp_diff[i] , (accl.val[i+1][1]-accl.val[i][1])/timestamp_diff[i]))
        jerk.hor_val.append((accl.hor_val[i+1]-accl.hor_val[i])/timestamp_diff[i])
        jerk.ver_val.append((accl.ver_val[i+1]-accl.ver_val[i])/timestamp_diff[i])
        jerk.hor_magnitude.append(abs(jerk.hor_val[-1]))
        jerk.ver_magnitude.append(abs(jerk.ver_val[-1]))
        jerk.magnitude.append(sqrt(((accl.val[i+1][0]-accl.val[i][0])/timestamp_diff[i])**2 + ((accl.val[i+1][1]-accl.val[i][1])/timestamp_diff[i])**2))
    return vel, accl, jerk

def feature_ploting(f, test_id):
    df=pd.read_csv(parkinson_file_list[1], sep=';', header=None, names=header_row)
    df_static=df[df["Test_ID"]==0]
    velst, acclst, jerkst = get_kinametics_value(df_static)
    plt.plot(jerkst.hor_val)
    plt.xlabel("Time")
    plt.ylabel("Jerk")
    #plt.savefig("parkinson_jerk_data_st", facecolor='w', bbox_inches="tight", pad_inches=0.3, transparent=True)

def best_feature_ploting():
    selector = SelectKBest(mutual_info_classif, k='all').fit(oversampled_x,y)
    x_new = selector.transform(oversampled_x)
    scores = selector.scores_
    subplot(1,2,1)
    plt.bar(range(0, len(scores)), scores)
    
    sc = []
    sc.append(scores[70])
    sc.append(scores[75])
    sc.append(scores[78])
    sc.append(scores[85])
    sc.append(scores[90])

    n = []
    n.append(features_headers[70])
    n.append(features_headers[75])
    n.append(features_headers[78])
    n.append(features_headers[85])
    n.append(features_headers[90])

    subplot(1,2,2)
    plt.barh(n, sc)
    #plt.savefig("features", facecolor='w', bbox_inches="tight", pad_inches=0.3, transparent=True)

#sample_ploting(parkinson_file_list[1])
#sample_ploting(control_file_list[1])

#feature_ploting(parkinson_file_list[1], 0)
#feature_ploting(parkinson_file_list[1], 1)
#feature_ploting(control_file_list[1], 0)
#feature_ploting(control_file_list[1], 1)
#best_feature_ploting()