## Data Preprocessing

### Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

### Importing the dataset

In [2]:
data = pd.read_csv(r'Parkinsons Train Data_3.csv', header=0)

In [3]:
data.head()

Unnamed: 0,Jitter(local),"Jitter(local, absolute)",Jitter (rap),Jitter (ppq5),Jitter (ddp),Shimmer (local),"Shimmer (local, db)",Shimmer (apq3),Shimmer (apq5),Shimmer (apq11),...,Minimum pitch,Maximum pitch,Number of pulses,Number of periods,Mean period,Standard deviation of period,Fraction of locally unvoiced frames,Number of voice breaks,Degree of voice breaks,status
0,1.488,9e-05,0.9,0.794,2.699,8.334,0.779,4.517,4.609,6.802,...,142.229,187.576,160,159,0.006065,0.000416,0.0,0,0.0,1
1,0.728,3.8e-05,0.353,0.376,1.059,5.864,0.642,2.058,3.18,7.194,...,159.515,234.505,170,169,0.005181,0.000403,2.247,0,0.0,1
2,1.22,7.4e-05,0.732,0.67,2.196,8.719,0.875,4.347,5.166,7.548,...,146.445,211.442,1431,1427,0.006071,0.000474,10.656,1,0.178,1
3,2.502,0.000123,1.156,1.634,3.469,13.513,1.273,5.263,8.771,16.779,...,182.713,220.23,94,92,0.00491,0.00032,0.0,0,0.0,1
4,3.509,0.000167,1.715,1.539,5.145,9.112,1.04,3.102,4.927,12.823,...,182.821,225.162,117,114,0.004757,0.00038,18.182,1,13.318,1


In [4]:
data.shape

(1036, 27)

In [5]:
data.index

RangeIndex(start=0, stop=1036, step=1)

In [6]:
data.columns

Index(['Jitter(local)', 'Jitter(local, absolute)', 'Jitter (rap)',
       'Jitter (ppq5)', 'Jitter (ddp)', 'Shimmer (local)',
       'Shimmer (local, db)', 'Shimmer (apq3)', 'Shimmer (apq5)',
       'Shimmer (apq11)', 'Shimmer (dda)', 'AC', 'NDH', 'HTM', 'Median Pitch',
       'Mean Pitch', 'Standard deviation', 'Minimum pitch', 'Maximum pitch',
       'Number of pulses', 'Number of periods', 'Mean period',
       'Standard deviation of period', 'Fraction of locally unvoiced frames',
       'Number of voice breaks', 'Degree of voice breaks', 'status'],
      dtype='object')

### Checking for null values (missing values)

In [7]:
data.isnull().sum()

Jitter(local)                          0
Jitter(local, absolute)                0
Jitter (rap)                           0
Jitter (ppq5)                          0
Jitter (ddp)                           0
Shimmer (local)                        0
Shimmer (local, db)                    0
Shimmer (apq3)                         0
Shimmer (apq5)                         0
Shimmer (apq11)                        0
Shimmer (dda)                          0
AC                                     0
NDH                                    0
HTM                                    0
Median Pitch                           0
Mean Pitch                             0
Standard deviation                     0
Minimum pitch                          0
Maximum pitch                          0
Number of pulses                       0
Number of periods                      0
Mean period                            0
Standard deviation of period           0
Fraction of locally unvoiced frames    0
Number of voice 

### Selecting independent and dependent variables

In [8]:
X = data.loc[:, data.columns != 'status'].values
y = data.loc[:, 'status'].values

### Spliting the dataset for training and testing

In [27]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 12)

In [28]:
X_train

array([[1.92400e+00, 1.67343e-04, 7.79000e-01, ..., 5.47900e+00,
        3.00000e+00, 6.70400e+00],
       [3.50900e+00, 1.66927e-04, 1.71500e+00, ..., 1.81820e+01,
        1.00000e+00, 1.33180e+01],
       [2.55500e+00, 1.89761e-04, 1.09400e+00, ..., 2.42420e+01,
        2.00000e+00, 1.27050e+01],
       ...,
       [1.03700e+00, 7.75070e-05, 5.27000e-01, ..., 0.00000e+00,
        0.00000e+00, 0.00000e+00],
       [1.21900e+00, 8.18590e-05, 7.41000e-01, ..., 0.00000e+00,
        0.00000e+00, 0.00000e+00],
       [4.26800e+00, 3.00057e-04, 2.02600e+00, ..., 2.50000e+01,
        6.00000e+00, 2.30260e+01]])

In [29]:
X_test

array([[1.28900e+00, 7.31470e-05, 6.23000e-01, ..., 3.50650e+01,
        1.00000e+00, 1.87490e+01],
       [3.32600e+00, 1.80944e-04, 1.66600e+00, ..., 4.16670e+01,
        1.00000e+00, 4.32260e+01],
       [1.74400e+00, 1.22739e-04, 4.09000e-01, ..., 4.00000e+01,
        1.00000e+00, 2.73550e+01],
       ...,
       [3.05000e+00, 1.74775e-04, 1.37800e+00, ..., 1.30430e+01,
        1.00000e+00, 6.27000e+00],
       [5.29100e+00, 2.82661e-04, 2.97200e+00, ..., 3.25000e+01,
        2.00000e+00, 4.14720e+01],
       [4.64000e-01, 2.05700e-05, 2.35000e-01, ..., 0.00000e+00,
        0.00000e+00, 0.00000e+00]])

In [30]:
y_train

array([0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1,

In [31]:
y_test

array([0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
       1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1], dtype=int64)

### Limiting the range of variables -> to compare them commonly

In [32]:
from sklearn.preprocessing import StandardScaler
standard_X = StandardScaler()

In [33]:
X_train = standard_X.fit_transform(X_train)
X_test = standard_X.fit_transform(X_test)

In [34]:
X_train

array([[-0.43050319, -0.0461431 , -0.48523774, ..., -1.05543246,
         1.16273547, -0.37858984],
       [ 0.48503326, -0.04992807,  0.49053754, ..., -0.45270219,
        -0.08770807,  0.06008697],
       [-0.06602148,  0.15782669, -0.15685183, ..., -0.16516811,
         0.5375137 ,  0.01942944],
       ...,
       [-0.94285702, -0.86351443, -0.74794647, ..., -1.31539933,
        -0.71292984, -0.82323595],
       [-0.83772918, -0.82391784, -0.52485255, ..., -1.31539933,
        -0.71292984, -0.82323595],
       [ 0.92345103,  1.16135299,  0.81475347, ..., -0.12920262,
         3.03840078,  0.70397486]])

In [35]:
X_test

array([[-0.77086647, -0.96730854, -0.61472747, ...,  0.35626742,
        -0.07648239,  0.41256772],
       [ 0.32312003,  0.16456519,  0.38887836, ...,  0.67609802,
        -0.07648239,  1.99668928],
       [-0.52650523, -0.44659013, -0.82064467, ...,  0.59534104,
        -0.07648239,  0.96953754],
       ...,
       [ 0.17489211,  0.09979039,  0.11175613, ..., -0.71057741,
        -0.07648239, -0.39505792],
       [ 1.37843838,  1.23259863,  1.64555065, ...,  0.23200727,
         0.52378846,  1.88317254],
       [-1.21393906, -1.51936959, -0.98807268, ..., -1.34243906,
        -0.67675323, -0.80084467]])

## Neural Networks

In [36]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(300,400,300),max_iter=1000)
mlp.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(300, 400, 300), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=1000,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [37]:
y_pred = mlp.predict(X_test)

In [38]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print("Confusion Matrix")
print(confusion_matrix(y_test,y_pred))
print()

print("Classification report")
print(classification_report(y_test,y_pred))
print()

print("Accuracy Score")
print(accuracy_score(y_test, y_pred))

Confusion Matrix
[[101  27]
 [ 49  82]]

Classification report
              precision    recall  f1-score   support

           0       0.67      0.79      0.73       128
           1       0.75      0.63      0.68       131

    accuracy                           0.71       259
   macro avg       0.71      0.71      0.70       259
weighted avg       0.71      0.71      0.70       259


Accuracy Score
0.7065637065637066


## Saving the model

In [39]:
import joblib
filename = 'Neural Network model.sav'
joblib.dump(mlp, filename)

['Neural Network model.sav']

In [22]:
from sklearn.externals import joblib  
joblib.dump(mlp, "trained-model-nn.pkl")

['trained-model-nn.pkl']

In [40]:
import pandas as pd
pred_data = pd.read_csv(r'Parkinsons Test Data_3.csv', header=0)
pred_data=pred_data.dropna()
print(pred_data.shape)
print(list(pred_data.columns))
pred_data.head()

(189, 26)
['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25']


Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25
0,0.135,7e-06,0.067,0.078,0.202,2.033,0.178,1.074,1.336,1.576,...,0.859,184.502,187.88,183,182,0.005368,2.5e-05,0.0,0,0.0
1,0.143,7e-06,0.073,0.081,0.219,1.236,0.107,0.612,0.904,0.894,...,0.755,198.665,202.214,194,193,0.004988,2e-05,0.0,0,0.0
2,0.162,8e-06,0.087,0.089,0.26,1.338,0.117,0.63,0.948,1.017,...,1.86,197.22,206.06,198,197,0.00494,4.6e-05,0.0,0,0.0
3,0.14,7e-06,0.075,0.089,0.224,1.086,0.094,0.556,0.747,0.823,...,0.931,202.324,206.182,200,199,0.0049,2.3e-05,0.0,0,0.0
4,0.15,7e-06,0.08,0.097,0.24,1.049,0.091,0.533,0.698,0.777,...,0.88,205.407,209.927,204,203,0.00482,2.2e-05,0.0,0,0.0


In [41]:
pred_X = pred_data.loc[:, :].values
pred_X

array([[1.35000e-01, 7.30000e-06, 6.70000e-02, ..., 0.00000e+00,
        0.00000e+00, 0.00000e+00],
       [1.43000e-01, 7.10000e-06, 7.30000e-02, ..., 0.00000e+00,
        0.00000e+00, 0.00000e+00],
       [1.62000e-01, 8.00000e-06, 8.70000e-02, ..., 0.00000e+00,
        0.00000e+00, 0.00000e+00],
       ...,
       [2.81600e+00, 1.53056e-04, 1.09000e+00, ..., 4.52380e+01,
        2.00000e+00, 5.31940e+01],
       [1.14500e+00, 5.30630e-05, 7.03000e-01, ..., 6.90480e+01,
        1.00000e+00, 5.74790e+01],
       [1.59200e+00, 8.91590e-05, 6.53000e-01, ..., 5.52630e+01,
        0.00000e+00, 0.00000e+00]])

In [42]:
pred_X = standard_X.fit_transform(pred_X)

In [43]:
pred_X

array([[-0.70099042, -0.7168957 , -0.67816489, ..., -0.31940471,
        -0.29960235, -0.24794817],
       [-0.69188618, -0.72016035, -0.66496499, ..., -0.31940471,
        -0.29960235, -0.24794817],
       [-0.67026361, -0.70546945, -0.63416524, ..., -0.31940471,
        -0.29960235, -0.24794817],
       ...,
       [ 2.35006831,  1.66231185,  1.57241691, ...,  4.31160964,
         2.05976619,  7.94301349],
       [ 0.44841999,  0.03010391,  0.7210238 , ...,  6.74903949,
         0.88008192,  8.60282983],
       [ 0.95711945,  0.61930693,  0.61102469, ...,  5.33786895,
        -0.29960235, -0.24794817]])

In [44]:
pred_y = mlp.predict(pred_X)
res = pd.DataFrame(pred_y)
res.columns = ["status"]
res.to_csv("NN - predictions - all 2.csv")