# Parkinsons Disease Detection

### Import all Dependencies

In [68]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

### Data Collection and Analysis

In [26]:
# load dataset
parkinsons_data = pd.read_csv("Data/parkinsons.csv")

In [154]:
# show dataset
parkinsons_data.head()

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


In [28]:
# dimention of dataset
parkinsons_data.shape

(195, 24)

In [29]:
# dataset information
parkinsons_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              195 non-null    object 
 1   MDVP:Fo(Hz)       195 non-null    float64
 2   MDVP:Fhi(Hz)      195 non-null    float64
 3   MDVP:Flo(Hz)      195 non-null    float64
 4   MDVP:Jitter(%)    195 non-null    float64
 5   MDVP:Jitter(Abs)  195 non-null    float64
 6   MDVP:RAP          195 non-null    float64
 7   MDVP:PPQ          195 non-null    float64
 8   Jitter:DDP        195 non-null    float64
 9   MDVP:Shimmer      195 non-null    float64
 10  MDVP:Shimmer(dB)  195 non-null    float64
 11  Shimmer:APQ3      195 non-null    float64
 12  Shimmer:APQ5      195 non-null    float64
 13  MDVP:APQ          195 non-null    float64
 14  Shimmer:DDA       195 non-null    float64
 15  NHR               195 non-null    float64
 16  HNR               195 non-null    float64
 1

In [30]:
# checking null values
parkinsons_data.isnull().sum()

name                0
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64

In [31]:
# statistical measures of dataset
parkinsons_data.describe()

Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
count,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,...,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0
mean,154.228641,197.104918,116.324631,0.00622,4.4e-05,0.003306,0.003446,0.00992,0.029709,0.282251,...,0.046993,0.024847,21.885974,0.753846,0.498536,0.718099,-5.684397,0.22651,2.381826,0.206552
std,41.390065,91.491548,43.521413,0.004848,3.5e-05,0.002968,0.002759,0.008903,0.018857,0.194877,...,0.030459,0.040418,4.425764,0.431878,0.103942,0.055336,1.090208,0.083406,0.382799,0.090119
min,88.333,102.145,65.476,0.00168,7e-06,0.00068,0.00092,0.00204,0.00954,0.085,...,0.01364,0.00065,8.441,0.0,0.25657,0.574282,-7.964984,0.006274,1.423287,0.044539
25%,117.572,134.8625,84.291,0.00346,2e-05,0.00166,0.00186,0.004985,0.016505,0.1485,...,0.024735,0.005925,19.198,1.0,0.421306,0.674758,-6.450096,0.174351,2.099125,0.137451
50%,148.79,175.829,104.315,0.00494,3e-05,0.0025,0.00269,0.00749,0.02297,0.221,...,0.03836,0.01166,22.085,1.0,0.495954,0.722254,-5.720868,0.218885,2.361532,0.194052
75%,182.769,224.2055,140.0185,0.007365,6e-05,0.003835,0.003955,0.011505,0.037885,0.35,...,0.060795,0.02564,25.0755,1.0,0.587562,0.761881,-5.046192,0.279234,2.636456,0.25298
max,260.105,592.03,239.17,0.03316,0.00026,0.02144,0.01958,0.06433,0.11908,1.302,...,0.16942,0.31482,33.047,1.0,0.685151,0.825288,-2.434031,0.450493,3.671155,0.527367


In [32]:
# check target distribution
parkinsons_data["status"].value_counts()

status
1    147
0     48
Name: count, dtype: int64

1 = Parkinsons / 0 = Healthy

In [46]:
# target grouping excluding the string values
parkinsons_data.drop(columns = ["name"]).groupby("status").mean()

Unnamed: 0_level_0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,MDVP:APQ,Shimmer:DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE
status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,181.937771,223.63675,145.207292,0.003866,2.3e-05,0.001925,0.002056,0.005776,0.017615,0.162958,...,0.013305,0.028511,0.011483,24.67875,0.442552,0.695716,-6.759264,0.160292,2.154491,0.123017
1,145.180762,188.441463,106.893558,0.006989,5.1e-05,0.003757,0.0039,0.011273,0.033658,0.321204,...,0.0276,0.053027,0.029211,20.974048,0.516816,0.725408,-5.33342,0.248133,2.456058,0.233828


### Separate Features and Target

In [48]:
X = parkinsons_data.drop(columns = ["name", "status"], axis = 1)
Y = parkinsons_data["status"]

In [49]:
print(X)
print(Y)

     MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0        119.992       157.302        74.997         0.00784   
1        122.400       148.650       113.819         0.00968   
2        116.682       131.111       111.555         0.01050   
3        116.676       137.871       111.366         0.00997   
4        116.014       141.781       110.655         0.01284   
..           ...           ...           ...             ...   
190      174.188       230.978        94.261         0.00459   
191      209.516       253.017        89.488         0.00564   
192      174.688       240.005        74.287         0.01360   
193      198.764       396.961        74.904         0.00740   
194      214.289       260.277        77.973         0.00567   

     MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  \
0             0.00007   0.00370   0.00554     0.01109       0.04374   
1             0.00008   0.00465   0.00696     0.01394       0.06134   
2             0.00

### Split Train and Test data

In [51]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [52]:
print("Total data: ", X.shape)
print("Train data: ", X_train.shape)
print("Test data: ", X_test.shape)

Total data:  (195, 22)
Train data:  (156, 22)
Test data:  (39, 22)


### Data Standardization

In [53]:
scaler = StandardScaler()

In [55]:
# understand nature of data analysing mean and std
scaler.fit(X_train)

In [58]:
# convert all values in same range
scaler.transform(X_train)

array([[ 0.63239631, -0.02731081, -0.87985049, ..., -0.97586547,
        -0.55160318,  0.07769494],
       [-1.05512719, -0.83337041, -0.9284778 , ...,  0.3981808 ,
        -0.61014073,  0.39291782],
       [ 0.02996187, -0.29531068, -1.12211107, ..., -0.43937044,
        -0.62849605, -0.50948408],
       ...,
       [-0.9096785 , -0.6637302 , -0.160638  , ...,  1.22001022,
        -0.47404629, -0.2159482 ],
       [-0.35977689,  0.19731822, -0.79063679, ..., -0.17896029,
        -0.47272835,  0.28181221],
       [ 1.01957066,  0.19922317, -0.61914972, ..., -0.716232  ,
         1.23632066, -0.05829386]])

In [59]:
scaler.transform(X_test)

array([[-1.70008583e+00, -9.67968410e-01, -7.70130215e-01,
        -2.75000683e-01,  4.16156683e-01, -2.92615113e-01,
        -9.70869783e-02, -2.91621655e-01, -4.94706656e-01,
        -4.90058396e-01, -5.32488171e-01, -4.26848854e-01,
        -3.60251422e-01, -5.32484688e-01, -3.57189713e-01,
        -1.08840337e-01,  1.06963705e+00,  1.05628304e+00,
         3.72180199e-01,  1.94886208e+00,  3.66935071e-02,
         4.44314482e-01],
       [-1.39044095e+00, -9.29681132e-01, -7.37045677e-01,
         7.42068829e-01,  1.50451280e+00,  8.54349819e-01,
         7.33639862e-01,  8.53234751e-01, -3.12538562e-03,
         3.01660094e-01,  1.16511011e-01, -7.67595149e-02,
        -2.23967413e-01,  1.16829276e-01, -1.19644974e-01,
        -5.22790834e-01,  9.12650090e-01,  1.31721995e+00,
         6.70118138e-01,  4.74318608e-01,  1.42454868e-02,
         7.46859799e-01],
       [-1.35302065e+00, -6.29175292e-01, -7.29027225e-01,
         4.92094897e-01,  1.23242377e+00,  4.52288742e-01,
    

### Model Training

In [73]:
# load model
model = SVC(kernel = "linear")

In [74]:
# training model with the svm model
model.fit(X_train, Y_train)

### Model Evaluation

In [75]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print("Accuracy on training data: ", training_data_accuracy)

Accuracy on training data:  0.8717948717948718


In [76]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print("Accuracy on training data: ", test_data_accuracy)

Accuracy on training data:  0.8717948717948718


### Predictive System

In [160]:
# see those values that has doesnt have perkinsons
parkinsons_data[parkinsons_data["status"] == 0].head(20)

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
30,phon_R01_S07_1,197.076,206.896,192.055,0.00289,1e-05,0.00166,0.00168,0.00498,0.01098,...,0.01689,0.00339,26.775,0,0.422229,0.741367,-7.3483,0.177551,1.743867,0.085569
31,phon_R01_S07_2,199.228,209.512,192.091,0.00241,1e-05,0.00134,0.00138,0.00402,0.01015,...,0.01513,0.00167,30.94,0,0.432439,0.742055,-7.682587,0.173319,2.103106,0.068501
32,phon_R01_S07_3,198.383,215.203,193.104,0.00212,1e-05,0.00113,0.00135,0.00339,0.01263,...,0.01919,0.00119,30.775,0,0.465946,0.738703,-7.067931,0.175181,1.512275,0.09632
33,phon_R01_S07_4,202.266,211.604,197.079,0.0018,9e-06,0.00093,0.00107,0.00278,0.00954,...,0.01407,0.00072,32.684,0,0.368535,0.742133,-7.695734,0.17854,1.544609,0.056141
34,phon_R01_S07_5,203.184,211.526,196.16,0.00178,9e-06,0.00094,0.00106,0.00283,0.00958,...,0.01403,0.00065,33.047,0,0.340068,0.741899,-7.964984,0.163519,1.423287,0.044539
35,phon_R01_S07_6,201.464,210.565,195.708,0.00198,1e-05,0.00105,0.00115,0.00314,0.01194,...,0.01758,0.00135,31.732,0,0.344252,0.742737,-7.777685,0.170183,2.447064,0.05761
42,phon_R01_S10_1,237.226,247.326,225.227,0.00298,1e-05,0.00169,0.00182,0.00507,0.01752,...,0.03104,0.0074,22.736,0,0.305062,0.654172,-7.31055,0.098648,2.416838,0.095032
43,phon_R01_S10_2,241.404,248.834,232.483,0.00281,1e-05,0.00157,0.00173,0.0047,0.0176,...,0.03017,0.00675,23.145,0,0.457702,0.634267,-6.793547,0.158266,2.256699,0.117399
44,phon_R01_S10_3,243.439,250.912,232.435,0.0021,9e-06,0.00109,0.00137,0.00327,0.01419,...,0.0233,0.00454,25.368,0,0.438296,0.635285,-7.057869,0.091608,2.330716,0.09147
45,phon_R01_S10_4,242.852,255.034,227.911,0.00225,9e-06,0.00117,0.00139,0.0035,0.01494,...,0.02542,0.00476,25.032,0,0.431285,0.638928,-6.99582,0.102083,2.3658,0.102706


In [182]:
input_data = parkinsons_data.iloc[42].drop(["status", "name"]).values
input_data

array([237.226, 247.326, 225.227, 0.00298, 1e-05, 0.00169, 0.00182,
       0.00507, 0.01752, 0.164, 0.01035, 0.01024, 0.01133, 0.03104,
       0.0074, 22.736, 0.305062, 0.654172, -7.31055, 0.098648, 2.416838,
       0.095032], dtype=object)

In [175]:
type(input_data)

numpy.ndarray

In [184]:
input_data = (237.226, 247.326, 225.227, 0.00298, 1e-05, 0.00169, 0.00182,
       0.00507, 0.01752, 0.164, 0.01035, 0.01024, 0.01133, 0.03104,
       0.0074, 22.736, 0.305062, 0.654172, -7.31055, 0.098648, 2.416838,
       0.095032)

# changing input data to a numpy array
input_data_as_numpy_arr = np.asarray(input_data)

# reshape the numpy array as we're predicting for one instance
input_data_reshaped = input_data_as_numpy_arr.reshape(1,-1)


# standardise input predictions are comming wrong
# standardise input
#input_data_std = scaler.transform(input_data_reshaped)

prediction = model.predict(input_data_reshaped)
print(prediction)

if(prediction[0] == 0):
    print("The person does not have Perkinson's Disease.")
else:
    print("The person has Perkinson's Disease.")

[0]
The person does not have Perkinson's Disease.


