### Importing Libraries 

In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import svm
from sklearn.preprocessing import StandardScaler

### extracting zip file (unziping)

In [None]:
import zipfile
path = "datasets/archive.zip"
with zipfile.ZipFile(path,'r') as file:
    file.extractall("datasets")
    print("file extracted")


### checking the name list

In [8]:
import zipfile

with zipfile.ZipFile(path, "r") as z:
    print(z.namelist())


['parkinsons.data']


#### as we see it the dataset is in .data extension not in .csv so we have to covert it

In [9]:
import os

old = "datasets/parkinsons.data"
new = "datasets/parkinsons.csv"

os.rename(old, new)


#### Importing the dataset or Loading to pandas dataframe

In [10]:
data = pd.read_csv("datasets/parkinsons.csv")

#### displaying the first five row of the dataframe

In [11]:
data.head()

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


### displaying the number of rows and columns

In [13]:
data.shape

(195, 24)

### to view the information of the daataframe

In [14]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              195 non-null    object 
 1   MDVP:Fo(Hz)       195 non-null    float64
 2   MDVP:Fhi(Hz)      195 non-null    float64
 3   MDVP:Flo(Hz)      195 non-null    float64
 4   MDVP:Jitter(%)    195 non-null    float64
 5   MDVP:Jitter(Abs)  195 non-null    float64
 6   MDVP:RAP          195 non-null    float64
 7   MDVP:PPQ          195 non-null    float64
 8   Jitter:DDP        195 non-null    float64
 9   MDVP:Shimmer      195 non-null    float64
 10  MDVP:Shimmer(dB)  195 non-null    float64
 11  Shimmer:APQ3      195 non-null    float64
 12  Shimmer:APQ5      195 non-null    float64
 13  MDVP:APQ          195 non-null    float64
 14  Shimmer:DDA       195 non-null    float64
 15  NHR               195 non-null    float64
 16  HNR               195 non-null    float64
 1

### to check if there is any null value in each column

In [15]:
data.isnull().sum()

name                0
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64

### statistical description of the numerical features

In [16]:
data.describe()

Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
count,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,...,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0,195.0
mean,154.228641,197.104918,116.324631,0.00622,4.4e-05,0.003306,0.003446,0.00992,0.029709,0.282251,...,0.046993,0.024847,21.885974,0.753846,0.498536,0.718099,-5.684397,0.22651,2.381826,0.206552
std,41.390065,91.491548,43.521413,0.004848,3.5e-05,0.002968,0.002759,0.008903,0.018857,0.194877,...,0.030459,0.040418,4.425764,0.431878,0.103942,0.055336,1.090208,0.083406,0.382799,0.090119
min,88.333,102.145,65.476,0.00168,7e-06,0.00068,0.00092,0.00204,0.00954,0.085,...,0.01364,0.00065,8.441,0.0,0.25657,0.574282,-7.964984,0.006274,1.423287,0.044539
25%,117.572,134.8625,84.291,0.00346,2e-05,0.00166,0.00186,0.004985,0.016505,0.1485,...,0.024735,0.005925,19.198,1.0,0.421306,0.674758,-6.450096,0.174351,2.099125,0.137451
50%,148.79,175.829,104.315,0.00494,3e-05,0.0025,0.00269,0.00749,0.02297,0.221,...,0.03836,0.01166,22.085,1.0,0.495954,0.722254,-5.720868,0.218885,2.361532,0.194052
75%,182.769,224.2055,140.0185,0.007365,6e-05,0.003835,0.003955,0.011505,0.037885,0.35,...,0.060795,0.02564,25.0755,1.0,0.587562,0.761881,-5.046192,0.279234,2.636456,0.25298
max,260.105,592.03,239.17,0.03316,0.00026,0.02144,0.01958,0.06433,0.11908,1.302,...,0.16942,0.31482,33.047,1.0,0.685151,0.825288,-2.434031,0.450493,3.671155,0.527367


#### to see how mnay values are there in each class of the target or status

In [17]:
data['status'].value_counts()

status
1    147
0     48
Name: count, dtype: int64

#### 0 => healthy
#### 1 => parkison

In [19]:
data.groupby('status').mean(numeric_only=True)


Unnamed: 0_level_0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,MDVP:APQ,Shimmer:DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE
status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,181.937771,223.63675,145.207292,0.003866,2.3e-05,0.001925,0.002056,0.005776,0.017615,0.162958,...,0.013305,0.028511,0.011483,24.67875,0.442552,0.695716,-6.759264,0.160292,2.154491,0.123017
1,145.180762,188.441463,106.893558,0.006989,5.1e-05,0.003757,0.0039,0.011273,0.033658,0.321204,...,0.0276,0.053027,0.029211,20.974048,0.516816,0.725408,-5.33342,0.248133,2.456058,0.233828


### Data preprocessing

In [31]:
# separate the input feature and staute or the labeled
X = data.drop(columns= ['name','status'], axis = 1)
y= data.status

In [32]:
print(X.shape)
X.head(2)

(195, 22)


Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,MDVP:APQ,Shimmer:DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE
0,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,0.426,...,0.02971,0.06545,0.02211,21.033,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,0.626,...,0.04368,0.09403,0.01929,19.085,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674


In [33]:
y

0      1
1      1
2      1
3      1
4      1
      ..
190    0
191    0
192    0
193    0
194    0
Name: status, Length: 195, dtype: int64

### spliting the feature into training and testing data

In [34]:
X_train,X_test,y_train,y_test = train_test_split(X,y , test_size=0.2,random_state=42,stratify=y)

In [35]:
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

(156, 22) (39, 22) (156,) (39,)


### Data standardization

In [37]:
scaler = StandardScaler()

In [39]:
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [41]:
X_train_scaled[:4]

array([[-1.06344643, -0.9157626 , -0.33580464, -0.55736615, -0.42216914,
        -0.54572802, -0.53371905, -0.5457972 , -0.80761002, -0.78182843,
        -0.81778696, -0.72094174, -0.68559265, -0.81810818, -0.50697921,
         1.05569199, -1.27819709,  1.0411946 , -0.37859861, -0.42503608,
        -1.26010198, -0.39131778],
       [ 0.58342091,  0.12149404, -0.93052313,  0.19156775, -0.150364  ,
         0.22668457,  0.20738945,  0.22659085,  0.84954945,  0.67225948,
         0.9755243 ,  0.92109609,  0.53691262,  0.97553705,  0.63813485,
        -1.68978323,  0.54036702,  0.19346526,  0.36289997,  1.44380997,
         1.76000577,  0.35952829],
       [-1.07057001, -0.75769779, -0.20592359, -0.29303654, -0.150364  ,
        -0.31525007, -0.14632142, -0.31636473,  0.51571515,  0.40262729,
         0.55630868,  0.58032569,  0.43032115,  0.55662649, -0.03007556,
        -0.86247482,  1.48100262, -0.22335502,  0.32128826,  1.18302374,
         0.59341426,  0.20707672],
       [ 2.08704449

### Model training

In [47]:
model = svm.SVC(kernel ='linear' , random_state =42)

In [48]:
model.fit(X_train_scaled,y_train)

0,1,2
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


### model evaluation
##### on training

In [50]:
prediction_train = model.predict(X_train_scaled)
accuracy = accuracy_score(y_train , prediction_train)
print(accuracy)

0.9038461538461539


### model evaluation
##### on testing

In [52]:
prediction_test = model.predict(X_test_scaled)
accuracy_t = accuracy_score(y_test,prediction_test)
print(accuracy_t)

0.9487179487179487


### Making a predictive system

In [59]:
#accepting new data
data =(117.27400,129.91600,110.40200,0.00752,0.00006,0.00299,0.00469,0.00898,0.02293,0.22100,0.01189,0.01459,0.01948,0.03568,0.00681,22.81700,0.530529,0.817756,-4.608260,0.290024,2.021591,0.314464)
#changing to array
data_np = np.asarray(data)
#reshaping the array
data_np_reshaped = data_np.reshape(1,-1)  # to tell the model that this is single data
#scaling the data
data_scaled = scaler.transform(data_np_reshaped) 


## making prediction
prediction = model.predict(data_scaled)
if prediction[0] == 0: # prediction[0] means the first list 
    print("The person is healthy")
else :
    print("The person has parkison")

The person has parkison


