In [2]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import roc_auc_score

from mlxtend.feature_selection import SequentialFeatureSelector as SFS

In [3]:
data = pd.read_csv('data.csv')
data.head(2)

Unnamed: 0.1,Unnamed: 0,no_strokes_st,no_strokes_dy,speed_st,speed_dy,magnitude_vel_st,magnitude_horz_vel_st,magnitude_vert_vel_st,magnitude_vel_dy,magnitude_horz_vel_dy,...,magnitude_horz_jerk_dy,magnitude_vert_jerk_dy,ncv_st,ncv_dy,nca_st,nca_dy,in_air_stcp,on_surface_st,on_surface_dy,target
0,0,12.0,2.0,0.000293,0.000431,0.061342,0.038319,0.03905,0.084891,0.053885,...,6e-06,6e-06,185.25,412.857143,61.833333,470.0,0.0,3678.0,4852.0,1.0
1,1,4.0,6.0,0.000286,0.000281,0.119159,0.077012,0.074216,0.160497,0.10136,...,1e-05,9e-06,192.777778,173.875,102.5,54.0,0.0,1688.0,1587.0,1.0


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    data.drop(labels=['target', 'Unnamed: 0'], axis=1),
    data['target'],
    test_size=0.3,
    random_state=0)

X_train.shape, X_test.shape

((53, 29), (24, 29))

In [5]:
sfs1 = SFS(RandomForestClassifier(n_jobs=4), 
           k_features=10, 
           forward=True, 
           floating=False, 
           verbose=2,
           scoring='roc_auc',
           cv=3)

sfs1 = sfs1.fit(np.array(X_train.fillna(0)), y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:   14.8s finished

[2021-09-30 20:32:50] Features: 1/10 -- score: 0.9355158730158731[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:   13.5s finished

[2021-09-30 20:33:03] Features: 2/10 -- score: 0.9642857142857143[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:   12.2s finished

[2021-09-30 20:33:15] Features: 3/10 -- score: 0.9940476190476191[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  

In [6]:
selected_feat= X_train.columns[list(sfs1.k_feature_idx_)]
selected_feat

Index(['no_strokes_st', 'no_strokes_dy', 'magnitude_horz_vel_dy',
       'magnitude_acc_dy', 'magnitude_vert_acc_dy', 'magnitude_horz_jerk_dy',
       'ncv_dy', 'nca_st', 'nca_dy', 'in_air_stcp'],
      dtype='object')

In [7]:
sfs1.k_score_

0.9821428571428571

In [8]:
pd.DataFrame.from_dict(sfs1.get_metric_dict()).T

Unnamed: 0,feature_idx,cv_scores,avg_score,feature_names,ci_bound,std_dev,std_err
1,"(15,)","[0.9375, 0.9642857142857143, 0.9047619047619049]",0.935516,"(15,)",0.0547752,0.024341,0.0172117
2,"(15, 26)","[0.9642857142857143, 0.9285714285714286, 1.0]",0.964286,"(15, 26)",0.0656209,0.0291606,0.0206197
3,"(15, 20, 26)","[1.0, 0.9821428571428572, 1.0]",0.994048,"(15, 20, 26)",0.0189431,0.00841794,0.00595238
4,"(15, 20, 24, 26)","[1.0, 1.0, 0.9761904761904763]",0.992063,"(15, 20, 24, 26)",0.0252575,0.0112239,0.00793651
5,"(15, 20, 23, 24, 26)","[1.0, 0.9821428571428572, 1.0]",0.994048,"(15, 20, 23, 24, 26)",0.0189431,0.00841794,0.00595238
6,"(1, 15, 20, 23, 24, 26)","[1.0, 0.9642857142857143, 1.0]",0.988095,"(1, 15, 20, 23, 24, 26)",0.0378863,0.0168359,0.0119048
7,"(1, 13, 15, 20, 23, 24, 26)","[1.0, 0.9642857142857143, 1.0]",0.988095,"(1, 13, 15, 20, 23, 24, 26)",0.0378863,0.0168359,0.0119048
8,"(0, 1, 13, 15, 20, 23, 24, 26)","[1.0, 0.9553571428571428, 1.0]",0.985119,"(0, 1, 13, 15, 20, 23, 24, 26)",0.0473578,0.0210448,0.014881
9,"(0, 1, 8, 13, 15, 20, 23, 24, 26)","[1.0, 0.9285714285714286, 0.9761904761904763]",0.968254,"(0, 1, 8, 13, 15, 20, 23, 24, 26)",0.0668251,0.0296957,0.020998
10,"(0, 1, 8, 13, 15, 20, 23, 24, 25, 26)","[1.0, 0.9464285714285714, 1.0]",0.982143,"(0, 1, 8, 13, 15, 20, 23, 24, 25, 26)",0.0568294,0.0252538,0.0178571


In [9]:
sfs1 = SFS(RandomForestClassifier(n_jobs=4), 
           k_features=(1,26), 
           forward=True, 
           floating=False, 
           verbose=2,
           scoring='roc_auc',
           cv=3)

sfs1 = sfs1.fit(np.array(X_train.fillna(0)), y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:   17.4s finished

[2021-09-30 20:35:06] Features: 1/26 -- score: 0.9017857142857144[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:   17.9s finished

[2021-09-30 20:35:24] Features: 2/26 -- score: 0.9642857142857143[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:   14.3s finished

[2021-09-30 20:35:39] Features: 3/26 -- score: 0.9821428571428572[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  

In [10]:
sfs1.k_score_

1.0

In [11]:
selected_feat= X_train.columns[list(sfs1.k_feature_idx_)]
selected_feat

Index(['no_strokes_dy', 'speed_st', 'speed_dy', 'magnitude_vert_acc_dy',
       'magnitude_horz_jerk_st', 'magnitude_jerk_dy', 'in_air_stcp',
       'on_surface_dy'],
      dtype='object')

In [23]:
features=data[['magnitude_vert_acc_dy','magnitude_jerk_dy','in_air_stcp',]]
features.head(70)

Unnamed: 0,magnitude_vert_acc_dy,magnitude_jerk_dy,in_air_stcp
0,0.000282,0.000010,0.0
1,0.000409,0.000015,0.0
2,0.001782,0.000051,0.0
3,0.000289,0.000011,0.0
4,0.000298,0.000013,0.0
...,...,...,...
65,0.000198,0.000007,0.0
66,0.000197,0.000007,0.0
67,0.000216,0.000007,0.0
68,0.000215,0.000007,0.0


In [13]:
y=data[['target']]


In [14]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler((-1,1))
x=scaler.fit_transform(features)

In [15]:
x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.2, random_state=5)

In [16]:
from sklearn.metrics import accuracy_score,mean_squared_error
model=RandomForestClassifier()
model.fit(x_train,y_train)

RandomForestClassifier()

In [17]:
y_predtr=model.predict(x_train)
print(accuracy_score(y_train,y_predtr)*100)

100.0


In [18]:
y_pred=model.predict(x_test)
print(accuracy_score(y_test, y_pred)*100)

93.75


In [24]:
input_data = (0.000148,0.000006,0.0)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
std_data = scaler.transform(input_data_reshaped)
prediction = model.predict(std_data)
print(prediction)
if (prediction[0] == 0):
  print("The Person does not have Parkinsons Disease")

else:
  print("The Person has Parkinsons")


[0.]
The Person does not have Parkinsons Disease
