<a href="https://colab.research.google.com/github/klaxonlet/CE888-February-2021/blob/main/ParkinsonsDisease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

In [None]:
# Loading the dataset

data = pd.read_csv('parkinsonsdata_new.csv')

In [None]:
# Seperating target variable for test-train split and deleting 'status' and 'name' from the data.

data_target = data['status']
del data['status'] 
del data['name']

In [None]:
# Scaling our features

scaler = StandardScaler()
data = pd.DataFrame(scaler.fit_transform(data))

In [None]:
# Splitting data set into training and testing.

data_train, data_test, target_train, target_test = train_test_split(data, data_target, test_size=0.20, random_state=42)

In [None]:
# XGBoost model

model_xg_n = XGBClassifier().fit(data_train, target_train)
prediction_xg_n = model_xg_n.predict(data_test)
accuracy_xg_n = accuracy_score(target_test, prediction_xg_n)
accuracy_xg_n

0.9487179487179487

In [None]:
# Hyperparameter tuning for XGBoost. No increase in accuracy was found after 
# running a search. The search took 25 minutes to complete.

from sklearn.model_selection import GridSearchCV
parameters =  {"max_depth": [3, 6, 10, 20],
               'min_child_weight': np.arange(1, 10, 1),
               'gamma': np.arange(0, 10, 1),
               'n_estimators': [50, 100, 150],
               'learning_rate': [0.1, 0.2, 0.3],
               'subsample': np.arange(0.5, 1.0, 0.1)}

model_xg = XGBClassifier()
model_xg_tuned = GridSearchCV(model_xg, parameters, n_jobs=5, 
                     scoring='accuracy', verbose=2)

model_xg_tuned.fit(data_train,target_train)
print(model_xg_tuned.best_score_)
print(model_xg_tuned.best_params_)

Fitting 5 folds for each of 16200 candidates, totalling 81000 fits


[Parallel(n_jobs=5)]: Using backend LokyBackend with 5 concurrent workers.
[Parallel(n_jobs=5)]: Done  74 tasks      | elapsed:    1.9s
[Parallel(n_jobs=5)]: Done 558 tasks      | elapsed:   10.4s
[Parallel(n_jobs=5)]: Done 1370 tasks      | elapsed:   24.9s
[Parallel(n_jobs=5)]: Done 2502 tasks      | elapsed:   45.7s
[Parallel(n_jobs=5)]: Done 3962 tasks      | elapsed:  1.2min
[Parallel(n_jobs=5)]: Done 5742 tasks      | elapsed:  1.7min
[Parallel(n_jobs=5)]: Done 7850 tasks      | elapsed:  2.2min
[Parallel(n_jobs=5)]: Done 10278 tasks      | elapsed:  3.0min
[Parallel(n_jobs=5)]: Done 13034 tasks      | elapsed:  3.8min
[Parallel(n_jobs=5)]: Done 16110 tasks      | elapsed:  4.6min
[Parallel(n_jobs=5)]: Done 19514 tasks      | elapsed:  5.7min
[Parallel(n_jobs=5)]: Done 23238 tasks      | elapsed:  6.8min
[Parallel(n_jobs=5)]: Done 27290 tasks      | elapsed:  8.0min
[Parallel(n_jobs=5)]: Done 31662 tasks      | elapsed:  9.4min
[Parallel(n_jobs=5)]: Done 36362 tasks      | elapse

0.9419354838709678
{'gamma': 0, 'learning_rate': 0.3, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 100, 'subsample': 0.6}


In [None]:
# calculating feature importance

for i,j in zip(model_xg_n.feature_importances_,range(len(model_xg_n.feature_importances_))):
  print(j,i)

0 0.13686451
1 0.056720246
2 0.019343492
3 0.01056292
4 0.0062322696
5 0.03922324
6 0.005513336
7 0.0
8 0.04941801
9 0.0
10 0.022173584
11 0.0
12 0.013496209
13 0.0
14 0.017452193
15 0.015343522
16 0.008028957
17 0.0353341
18 0.007658815
19 0.022729656
20 0.020579726
21 0.03938291
22 0.26374558
23 0.21019669


In [None]:
# KNN model

model_knn = KNeighborsClassifier()
model_knn.fit(data_train, target_train)
prediction_knn = model_knn.predict(data_test)
accuracy_knn = accuracy_score(target_test, prediction_knn)
accuracy_knn

0.9487179487179487

In [None]:
# MLP model

model_mlp = MLPClassifier(random_state=1, max_iter=300)
model_mlp.fit(data_train, target_train)
prediction_mlp = model_mlp.predict(data_test)
accuracy_mlp = accuracy_score(target_test, prediction_mlp)
accuracy_mlp

In [None]:
# Weighted average ensemble

weighted_avg = (0.3*prediction_xg_n+0.3*prediction_knn+0.4*prediction_mlp)
accuracy_ensemble = accuracy_score(target_test,np.ndarray.round(weighted_avg))
accuracy_ensemble


0.9487179487179487