In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as pl
import seaborn as sns
%matplotlib inline

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import roc_auc_score

from mlxtend.feature_selection import SequentialFeatureSelector as SFS

In [2]:
data = pd.read_csv('data.csv')
data.head(2)

Unnamed: 0.1,Unnamed: 0,no_strokes_st,no_strokes_dy,speed_st,speed_dy,magnitude_vel_st,magnitude_horz_vel_st,magnitude_vert_vel_st,magnitude_vel_dy,magnitude_horz_vel_dy,...,magnitude_horz_jerk_dy,magnitude_vert_jerk_dy,ncv_st,ncv_dy,nca_st,nca_dy,in_air_stcp,on_surface_st,on_surface_dy,target
0,0,12.0,2.0,0.000293,0.000431,0.061342,0.038319,0.03905,0.084891,0.053885,...,6e-06,6e-06,185.25,412.857143,61.833333,470.0,0.0,3678.0,4852.0,1.0
1,1,4.0,6.0,0.000286,0.000281,0.119159,0.077012,0.074216,0.160497,0.10136,...,1e-05,9e-06,192.777778,173.875,102.5,54.0,0.0,1688.0,1587.0,1.0


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    data.drop(labels=['target', 'Unnamed: 0'], axis=1),
    data['target'],
    test_size=0.2,
    random_state=0)

X_train.shape, X_test.shape

((61, 29), (16, 29))

In [4]:
sfs1 = SFS(RandomForestClassifier(n_jobs=4), 
           k_features=15, 
           forward=True, 
           floating=False, 
           verbose=2,
           scoring='roc_auc',
           cv=3)

sfs1 = sfs1.fit(np.array(X_train.fillna(0)), y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:   15.0s finished

[2022-01-23 00:00:14] Features: 1/15 -- score: 0.9114583333333334[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:   10.7s finished

[2022-01-23 00:00:25] Features: 2/15 -- score: 0.9638480392156863[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:   10.0s finished

[2022-01-23 00:00:35] Features: 3/15 -- score: 0.9791666666666666[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  

In [5]:
selected_feat= X_train.columns[list(sfs1.k_feature_idx_)]
selected_feat

Index(['no_strokes_st', 'no_strokes_dy', 'speed_st', 'speed_dy',
       'magnitude_vert_vel_st', 'magnitude_vel_dy', 'magnitude_horz_vel_dy',
       'magnitude_vert_vel_dy', 'magnitude_vert_acc_dy',
       'magnitude_vert_jerk_dy', 'ncv_st', 'ncv_dy', 'nca_dy', 'in_air_stcp',
       'on_surface_dy'],
      dtype='object')

In [6]:
sfs1.k_score_

0.9583333333333334

In [7]:
pd.DataFrame.from_dict(sfs1.get_metric_dict()).T

Unnamed: 0,feature_idx,cv_scores,avg_score,feature_names,ci_bound,std_dev,std_err
1,"(21,)","[1.0, 0.921875, 0.8125]",0.911458,"(21,)",0.173051,0.0769001,0.0543766
2,"(21, 26)","[0.9852941176470589, 0.96875, 0.9375]",0.963848,"(21, 26)",0.0445956,0.0198174,0.014013
3,"(21, 25, 26)","[1.0, 0.9375, 1.0]",0.979167,"(21, 25, 26)",0.066301,0.0294628,0.0208333
4,"(1, 21, 25, 26)","[1.0, 0.90625, 1.0]",0.96875,"(1, 21, 25, 26)",0.0994514,0.0441942,0.03125
5,"(1, 21, 22, 25, 26)","[1.0, 0.9375, 1.0]",0.979167,"(1, 21, 22, 25, 26)",0.066301,0.0294628,0.0208333
6,"(1, 15, 21, 22, 25, 26)","[0.9705882352941176, 0.953125, 1.0]",0.974571,"(1, 15, 21, 22, 25, 26)",0.0435276,0.0193428,0.0136774
7,"(1, 15, 21, 22, 23, 25, 26)","[1.0, 0.9375, 1.0]",0.979167,"(1, 15, 21, 22, 23, 25, 26)",0.066301,0.0294628,0.0208333
8,"(1, 7, 15, 21, 22, 23, 25, 26)","[0.9926470588235294, 0.9296875, 1.0]",0.974112,"(1, 7, 15, 21, 22, 23, 25, 26)",0.0710106,0.0315556,0.0223132
9,"(1, 6, 7, 15, 21, 22, 23, 25, 26)","[0.9705882352941176, 0.9375, 1.0]",0.969363,"(1, 6, 7, 15, 21, 22, 23, 25, 26)",0.0574514,0.0255302,0.0180526
10,"(1, 6, 7, 15, 21, 22, 23, 25, 26, 28)","[0.9852941176470589, 0.875, 1.0]",0.953431,"(1, 6, 7, 15, 21, 22, 23, 25, 26, 28)",0.125531,0.0557834,0.0394448


In [8]:
selected_feat= X_train.columns[list(sfs1.k_feature_idx_)]
selected_feat

Index(['no_strokes_st', 'no_strokes_dy', 'speed_st', 'speed_dy',
       'magnitude_vert_vel_st', 'magnitude_vel_dy', 'magnitude_horz_vel_dy',
       'magnitude_vert_vel_dy', 'magnitude_vert_acc_dy',
       'magnitude_vert_jerk_dy', 'ncv_st', 'ncv_dy', 'nca_dy', 'in_air_stcp',
       'on_surface_dy'],
      dtype='object')

In [9]:
features=data[['no_strokes_st', 'speed_st', 'magnitude_vel_st', 'magnitude_vel_dy',
       'magnitude_horz_vel_dy', 'magnitude_vert_vel_dy',
       'magnitude_vert_acc_st', 'magnitude_acc_dy', 'magnitude_vert_jerk_st',
       'magnitude_vert_jerk_dy', 'ncv_st', 'ncv_dy', 'nca_dy', 'in_air_stcp',
       'on_surface_dy']]

features.head(2)

Unnamed: 0,no_strokes_st,speed_st,magnitude_vel_st,magnitude_vel_dy,magnitude_horz_vel_dy,magnitude_vert_vel_dy,magnitude_vert_acc_st,magnitude_acc_dy,magnitude_vert_jerk_st,magnitude_vert_jerk_dy,ncv_st,ncv_dy,nca_dy,in_air_stcp,on_surface_dy
0,12.0,0.000293,0.061342,0.084891,0.053885,0.052829,0.000212,0.000466,5e-06,6e-06,185.25,412.857143,470.0,0.0,4852.0
1,4.0,0.000286,0.119159,0.160497,0.10136,0.099211,0.000327,0.000728,7e-06,9e-06,192.777778,173.875,54.0,0.0,1587.0


In [10]:
list(features)

['no_strokes_st',
 'speed_st',
 'magnitude_vel_st',
 'magnitude_vel_dy',
 'magnitude_horz_vel_dy',
 'magnitude_vert_vel_dy',
 'magnitude_vert_acc_st',
 'magnitude_acc_dy',
 'magnitude_vert_jerk_st',
 'magnitude_vert_jerk_dy',
 'ncv_st',
 'ncv_dy',
 'nca_dy',
 'in_air_stcp',
 'on_surface_dy']

In [11]:
y=data[['target']]


In [12]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler((-1,1))
x=scaler.fit_transform(features)

In [13]:
from sklearn.linear_model import LogisticRegression
from xgboost import XGBRFClassifier,XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier,ExtraTreesClassifier,GradientBoostingClassifier,RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [14]:
#model_params = {
#     'svm': {
#         'model': svm.SVC(gamma='auto'),
#         'params' : {
#             'C': [1,10,20],
#             'kernel': ['poly','linear','rbf']
#         }  
#     },
#     'xgb': {
#         'model': GradientBoostingClassifier(),
#         'params' : {
#             'learning_rate': [0.01, 0.1, 0.2, 1],
#             'min_samples_split': np.linspace(0.1, 0.5, 10),
#             'max_depth':[3,5,8],
#             'subsample':[0.5, 0.9, 1.0],
#             'n_estimators':[100,1000]
#         }  
  #  },
#     'xgb':{
#         'model':XGBClassifier(),
#             'params':{
#                 'booster':['gblinear'],
#                 'learning_rate': [0.01, 0.1, 0.2, 1],
#                 "max_depth":[3,5,8],
#                 'min_samples_split': np.linspace(0.1, 0.5, 10),
#             }
        
        
#     },
}   
#     'decision_tree': {
#         'model': DecisionTreeClassifier(),
#         'params' : {
            
#              'max_depth':[3,5,8],
#              'criterion':['gini','entropy'],
#              'min_samples_split': np.linspace(0.1, 0.5, 10),
#              'splitter':['best','random'],
#               'random_state':[2,3,4,5,6,7],
#         }
#     },
#     'random_forest': {
#         'model': RandomForestClassifier(),
#         'params' : {
#             'n_estimators': [1,5,10],
#             'max_depth':[3,5,8],
#             'criterion':['gini','entropy'],
#             'min_samples_split': np.linspace(0.1, 0.5, 10),
#             'max_features':['auto','sqrt','log2'],
#             'random_state':[2,3,4,5,6,7],
#         }
            
        
#     },
#      'extra_tree': {
#         'model': ExtraTreesClassifier(),
#         'params' : {
#             'n_estimators': [1,5,10],
#             'max_depth':[3,5,8],
#             'criterion':['gini','entropy'],
#             'min_samples_split': np.linspace(0.1, 0.5, 10),
#             'max_features':['auto','sqrt','log2'],
#             'random_state':[2,3,4,5,6,7],
            
#         }
            
        
#     },
#     'logistic_regression' : {
#         'model': LogisticRegression(solver='liblinear',multi_class='auto'),
#         'params': {
#             'C': [1,5,10]
#         }
#     }
# }

SyntaxError: unmatched '}' (<ipython-input-14-fe0066d6b041>, line 30)

In [None]:
# scores = []

# for model_name, mp in model_params.items():
#     clf =  GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
#     clf.fit(x,y)
#     scores.append({
#         'model': model_name,
#         'best_score': clf.best_score_,
#         'best_params': clf.best_params_
#     })
    
# df = pd.DataFrame(scores,columns=['model','best_params','best_score'])
# df

In [None]:
#  df.to_csv('stepParam.csv')

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.2, random_state=5)

In [None]:
xgb=XGBClassifier()
xgb.fit(x_train,y_train)


In [None]:
y_predtr=xgb.predict(x_train)
print(accuracy_score(y_train,y_predtr)*100)

In [None]:
xgb_pred=xgb.predict(x_test)
print(accuracy_score(y_test,xgb_pred)*100)

In [None]:
xgb_conf=confusion_matrix(y_test,xgb_pred)
xgb_conf

In [None]:
sns.heatmap(xgb_conf, annot=True, annot_kws={"size": 60}) # font size

pl.show()
pl.savefig('xgb_confusion.png')

In [None]:
print(classification_report(y_test,xgb_pred))

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

In [None]:
etc=ExtraTreesClassifier(n_estimators=10,criterion='entropy',min_samples_split=.144,
                             max_depth=8,max_features='auto',random_state=5)
etc.fit(x_train,y_train)

In [None]:
y_predtr=etc.predict(x_train)
print(accuracy_score(y_train,y_predtr)*100)

In [None]:
etc_pred=etc.predict(x_test)
print(accuracy_score(y_test,etc_pred)*100)

In [None]:
etc_conf=confusion_matrix(y_test,etc_pred)
etc_conf

In [None]:
sns.heatmap(etc_conf, annot=True, annot_kws={"size": 60}) # font size

pl.show()
pl.savefig('etc_confusion.png')

In [None]:
print(classification_report(y_test,etc_pred))

In [None]:
from sklearn.metrics import accuracy_score,mean_squared_error
model=RandomForestClassifier()
model.fit(x_train,y_train)

In [None]:
y_predtr=model.predict(x_train)
print(accuracy_score(y_train,y_predtr)*100)

In [None]:
y_pred=model.predict(x_test)
print(accuracy_score(y_test, y_pred)*100)

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier(n_estimators=10,criterion='gini',min_samples_split=.1,
                             max_depth=5,max_features='auto',random_state=2)
rfc.fit(x_train,y_train)


In [None]:
y_predtr=rfc.predict(x_train)
print(accuracy_score(y_train,y_predtr)*100)

In [None]:
rfc_pred=rfc.predict(x_test)
print(accuracy_score(y_test,rfc_pred)*100)

In [None]:
rfc_conf=confusion_matrix(y_test,rfc_pred)
rfc_conf

In [None]:
sns.heatmap(rfc_conf, annot=True, annot_kws={"size": 60}) # font size

pl.show()
pl.savefig('step_confusion.png')

In [None]:
print(classification_report(y_test,rfc_pred))

In [None]:
svm=SVC(kernel='poly')
svm.fit(x_train,y_train)

In [None]:
y_predtr=svm.predict(x_train)
print(accuracy_score(y_train,y_predtr)*100)

In [None]:
svm_pred=svm.predict(x_test)
print(accuracy_score(y_test, svm_pred)*100)

In [None]:
svm_conf=confusion_matrix(y_test,svm_pred)
svm_conf

In [None]:
sns.heatmap(svm_conf, annot=True, annot_kws={"size": 60}) # font size

pl.show()

In [None]:
print(classification_report(y_test,svm_pred))

In [None]:
# input_data = (0.000148,0.000006,0.0)
# input_data_as_numpy_array = np.asarray(input_data)
# input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
# std_data = scaler.transform(input_data_reshaped)
# prediction = model.predict(std_data)
# print(prediction)
# if (prediction[0] == 0):
#   print("The Person does not have Parkinsons Disease")

# else:
#   print("The Person has Parkinsons")


In [None]:
from sklearn.linear_model import LogisticRegression

model=LogisticRegression()
model.fit(x_train,y_train)

In [None]:
y_predtr=model.predict(x_train)
print(accuracy_score(y_train,y_predtr)*100)

In [None]:
y_pred=model.predict(x_test)
print(accuracy_score(y_test, y_pred)*100)

In [None]:
from sklearn.tree import DecisionTreeClassifier

model=DecisionTreeClassifier()
model.fit(x_train,y_train)

In [None]:
y_predtr=model.predict(x_train)
print(accuracy_score(y_train,y_predtr)*100)

In [None]:
y_pred=model.predict(x_test)
print(accuracy_score(y_test, y_pred)*100)

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))