# Parkinson's Disease Detection

## 1.Read the data

In [8]:
import numpy as np 
import pandas as pd
import os
df = pd.read_csv('parkinsons.data')
df.tail

# describe data
df.describe()

# info of the data
df.info()

# shape of the dataset
df.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              195 non-null    object 
 1   MDVP:Fo(Hz)       195 non-null    float64
 2   MDVP:Fhi(Hz)      195 non-null    float64
 3   MDVP:Flo(Hz)      195 non-null    float64
 4   MDVP:Jitter(%)    195 non-null    float64
 5   MDVP:Jitter(Abs)  195 non-null    float64
 6   MDVP:RAP          195 non-null    float64
 7   MDVP:PPQ          195 non-null    float64
 8   Jitter:DDP        195 non-null    float64
 9   MDVP:Shimmer      195 non-null    float64
 10  MDVP:Shimmer(dB)  195 non-null    float64
 11  Shimmer:APQ3      195 non-null    float64
 12  Shimmer:APQ5      195 non-null    float64
 13  MDVP:APQ          195 non-null    float64
 14  Shimmer:DDA       195 non-null    float64
 15  NHR               195 non-null    float64
 16  HNR               195 non-null    float64
 1

(195, 24)

## 2. Preprocessing the data

In [10]:
# get the all columns except "status"
features = df.loc[:, df.columns != 'status'].values[:, 1:]

# get status values in array format
labels = df.loc[:, 'status'].values

# count values equal to 1 and 0
df['status'].value_counts()

1    147
0     48
Name: status, dtype: int64

### 2.1 Scale the data

In [11]:
from sklearn.preprocessing import MinMaxScaler

#  Initialize MinMax Scaler classs for -1 to 1
scaler = MinMaxScaler((-1, 1))

# fit_transform() method fits to the data and then transforms it.
X = scaler.fit_transform(features)
y = labels

#  Show X and y  here
print(X, y)



[[-0.63138346 -0.77481654 -0.89037042 ...  0.17153026 -0.21867743
  -0.0053808 ]
 [-0.6033463  -0.81013911 -0.4433544  ...  0.48267409 -0.05370956
   0.34265204]
 [-0.66992292 -0.88174367 -0.46942324 ...  0.37274182 -0.18236124
   0.19336492]
 ...
 [ 0.00546073 -0.43717403 -0.89854572 ... -0.31484696  0.11793486
  -0.63884033]
 [ 0.28578581  0.20361309 -0.89144127 ... -0.09423055 -0.36355605
  -0.67372646]
 [ 0.46654868 -0.35441175 -0.85610326 ... -0.16981039  0.00734563
  -0.5690805 ]] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1
 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0
 0 0 0 0 0 0 0 0 0 0]


### 2.2 Split the data to train and test sets

In [32]:
from sklearn.model_selection import train_test_split

# split the dataset into training and testing sets
x_train, x_test, y_train, y_test=train_test_split(X, y, test_size=0.15)

## 3. Build the Classifier models

### 3.1 Xgboost classifier

### 3.1.1 Train the model

In [22]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

model_xgboost = XGBClassifier()
model_xgboost.fit(x_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, ...)

### 3.1.2 Model prediction

In [23]:
y_prediction_xgboost = model_xgboost.predict(x_test)

print("Accuracy Score is", accuracy_score(y_test, y_prediction_xgboost) * 100)

Accuracy Score is 96.66666666666667


### 3.1.3 Confusion Matrix

In [25]:
from sklearn.metrics import confusion_matrix

pd.DataFrame(

    confusion_matrix(y_test, y_prediction_xgboost),

    columns=['Predicted Healthy', 'Predicted Parkinsons'],

    index=['True Healthy', 'True Parkinsons']

)

Unnamed: 0,Predicted Healthy,Predicted Parkinsons
True Healthy,8,1
True Parkinsons,0,21


### 3.2 SVM classifier

### 3.2.1 Train the model

In [33]:
from sklearn.svm import SVC
model_SVM = SVC()
model_SVM.fit(x_train,y_train)

SVC()

### 3.2.2 Model prediction

In [34]:
y_prediction_SVM = model_SVM.predict(x_test)

print("Accuracy Score is", accuracy_score(y_test, y_prediction_SVM) * 100)

Accuracy Score is 86.66666666666667


### 3.2.3 Confusion Matrix

In [35]:
pd.DataFrame(

    confusion_matrix(y_test, y_prediction_SVM),

    columns=['Predicted Healthy', 'Predicted Parkinsons'],

    index=['True Healthy', 'True Parkinsons']

)

Unnamed: 0,Predicted Healthy,Predicted Parkinsons
True Healthy,7,4
True Parkinsons,0,19


### 3.3 K-Nearest Neighbours classifier

### 3.3.1 Train the model

In [36]:
from sklearn.neighbors import KNeighborsClassifier

model_KNN = KNeighborsClassifier(n_neighbors=2)
model_KNN.fit(x_train,y_train)


#from sklearn.decomposition import PCA
#pca = PCA(n_components = 2)
#x_train = pca.fit_transform(x_train)
#x_test = pca.transform(x_test)
#variance = pca.explained_variance_ratio_
#classifi = KNeighborsClassifier(n_neighbors = 8,p=2,metric ='minkowski')


KNeighborsClassifier(n_neighbors=2)

### 3.3.2 Model prediction

In [37]:
y_prediction_KNN = model_KNN.predict(x_test)

print("Accuracy Score is", accuracy_score(y_test, y_prediction_KNN) * 100)

Accuracy Score is 96.66666666666667
