In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import zscore
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.utils import class_weight
import pickle
from sklearn import svm

In [2]:
parkinsons_data = pd.read_csv('Datasets/parkinsons.csv')
parkinsons_data

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,0.00007,0.00370,0.00554,0.01109,0.04374,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.400,148.650,113.819,0.00968,0.00008,0.00465,0.00696,0.01394,0.06134,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.335590,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.01050,0.00009,0.00544,0.00781,0.01633,0.05233,...,0.08270,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,0.00009,0.00502,0.00698,0.01505,0.05492,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,...,0.10470,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.332180,0.410335
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,phon_R01_S50_2,174.188,230.978,94.261,0.00459,0.00003,0.00263,0.00259,0.00790,0.04087,...,0.07008,0.02764,19.517,0,0.448439,0.657899,-6.538586,0.121952,2.657476,0.133050
191,phon_R01_S50_3,209.516,253.017,89.488,0.00564,0.00003,0.00331,0.00292,0.00994,0.02751,...,0.04812,0.01810,19.147,0,0.431674,0.683244,-6.195325,0.129303,2.784312,0.168895
192,phon_R01_S50_4,174.688,240.005,74.287,0.01360,0.00008,0.00624,0.00564,0.01873,0.02308,...,0.03804,0.10715,17.883,0,0.407567,0.655683,-6.787197,0.158453,2.679772,0.131728
193,phon_R01_S50_5,198.764,396.961,74.904,0.00740,0.00004,0.00370,0.00390,0.01109,0.02296,...,0.03794,0.07223,19.020,0,0.451221,0.643956,-6.744577,0.207454,2.138608,0.123306


In [3]:
parkinsons_data.isnull().sum()

name                0
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64

In [4]:
numerical_columns = ['MDVP:Fo(Hz)','MDVP:Fhi(Hz)','MDVP:Flo(Hz)','MDVP:Jitter(%)','MDVP:Jitter(Abs)','MDVP:RAP','MDVP:PPQ','Jitter:DDP','MDVP:Shimmer','MDVP:Shimmer(dB)','Shimmer:APQ3','Shimmer:APQ5','MDVP:APQ','Shimmer:DDA','NHR','HNR','RPDE','DFA','spread1','spread2','D2','PPE']
z_scores = parkinsons_data[numerical_columns].apply(zscore)
z_scores

Unnamed: 0,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),...,MDVP:APQ,Shimmer:DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE
0,-0.829300,-0.436165,-0.952037,0.334914,0.749759,0.132963,0.760800,0.131755,0.745985,0.739536,...,0.332985,0.607532,-0.067893,-0.193225,-0.807838,1.760814,0.801323,0.480477,-0.210531,0.868886
1,-0.770972,-0.530974,-0.057721,0.715418,1.037674,0.453892,1.276809,0.452684,1.681731,1.768464,...,1.159454,1.548254,-0.137843,-0.634508,-0.387524,1.837562,1.479853,1.311185,0.275077,1.803605
2,-0.909476,-0.723168,-0.109875,0.884991,1.325589,0.720770,1.585687,0.721813,1.202693,1.027636,...,0.699187,1.175323,-0.291633,-0.279760,-0.662075,1.942048,1.141445,1.017682,-0.103629,1.402661
3,-0.909622,-0.649092,-0.114229,0.775389,1.325589,0.578885,1.284076,0.577677,1.340396,1.207698,...,0.806859,1.340229,-0.280719,-0.281346,-0.613134,1.832380,1.440945,1.293840,0.062145,1.806954
4,-0.925657,-0.606245,-0.130608,1.368893,1.901418,1.095750,2.047187,1.096793,1.836448,1.552389,...,1.216839,1.899461,-0.178026,-0.506745,-0.783021,1.909364,1.780940,0.096195,-0.130026,2.267082
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,0.483467,0.371185,-0.508265,-0.337173,-0.401899,-0.228505,-0.311189,-0.227459,0.593395,0.631498,...,0.199282,0.759930,0.069278,-0.536647,-0.483208,-1.090704,-0.785527,-1.256837,0.721944,-0.817703
191,1.339202,0.612690,-0.618218,-0.120037,-0.401899,0.001213,-0.191272,0.002258,-0.116922,-0.099041,...,-0.313046,0.037108,-0.167360,-0.620463,-0.644916,-0.631503,-0.469859,-1.168475,1.054135,-0.418929
192,0.495578,0.470104,-0.968393,1.526058,1.037674,0.991026,0.797139,0.992069,-0.352453,-0.135053,...,-0.438466,-0.294679,2.041513,-0.906799,-0.877441,-1.130853,-1.014154,-0.818079,0.780338,-0.832410
193,1.078761,2.190044,-0.954180,0.243924,-0.113985,0.132963,0.164847,0.131755,-0.358834,-0.212223,...,-0.485202,-0.297970,1.175327,-0.649233,-0.456374,-1.343323,-0.974960,-0.229066,-0.637003,-0.926105


In [5]:
threshold = 3
outliers = (z_scores.abs() > threshold).any(axis=1)
outliers

0      False
1      False
2      False
3      False
4      False
       ...  
190    False
191    False
192    False
193    False
194    False
Length: 195, dtype: bool

In [11]:
data_without_outliers = parkinsons_data[~outliers]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data_without_outliers[numerical_columns])
scaled_data = pd.DataFrame(scaled_features, columns = [col + '_scaled' for col in numerical_columns])
categorical_columns = [col for col in parkinsons_data.columns if col not in numerical_columns]
final_data = pd.concat([data_without_outliers[categorical_columns].reset_index(drop =True), scaled_data], axis=1)
final_data

Unnamed: 0,name,status,MDVP:Fo(Hz)_scaled,MDVP:Fhi(Hz)_scaled,MDVP:Flo(Hz)_scaled,MDVP:Jitter(%)_scaled,MDVP:Jitter(Abs)_scaled,MDVP:RAP_scaled,MDVP:PPQ_scaled,Jitter:DDP_scaled,...,MDVP:APQ_scaled,Shimmer:DDA_scaled,NHR_scaled,HNR_scaled,RPDE_scaled,DFA_scaled,spread1_scaled,spread2_scaled,D2_scaled,PPE_scaled
0,phon_R01_S01_1,1,-0.828460,-0.435532,-0.940444,0.807620,1.294938,0.499457,1.419955,0.497499,...,0.606422,0.863060,0.193944,-0.326809,-0.759782,1.751873,0.973338,0.554993,-0.136010,1.071435
1,phon_R01_S01_2,1,-0.770944,-0.578076,-0.055103,1.431116,1.712884,1.050262,2.224676,1.048431,...,1.721648,2.002769,0.057021,-0.825146,-0.341078,1.828791,1.710893,1.410121,0.387179,2.112318
2,phon_R01_S01_3,1,-0.907521,-0.867037,-0.106734,1.708979,2.130830,1.508301,2.706375,1.510441,...,1.100570,1.550953,-0.244016,-0.424532,-0.614577,1.933509,1.343048,1.107991,-0.020835,1.665836
3,phon_R01_S01_4,1,-0.907665,-0.755664,-0.111044,1.529385,2.130830,1.264787,2.236010,1.263005,...,1.245861,1.750741,-0.222653,-0.426323,-0.565823,1.823598,1.668601,1.392266,0.157768,2.116047
4,phon_R01_S01_5,1,-0.923477,-0.691245,-0.127258,2.501903,2.966721,2.151874,3.426090,2.154161,...,1.799083,2.428266,-0.021638,-0.680864,-0.735059,1.900752,2.038173,0.159416,-0.049275,2.628437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176,phon_R01_S50_2,0,0.466039,0.778303,-0.501126,-0.293663,-0.376844,-0.120924,-0.251824,-0.119158,...,0.426006,1.047695,0.462449,-0.714632,-0.436395,-1.105962,-0.751554,-1.233391,0.868629,-0.806715
177,phon_R01_S50_3,0,1.309867,1.141402,-0.609975,0.062136,-0.376844,0.273337,-0.064811,0.275194,...,-0.265323,0.171977,-0.000759,-0.809285,-0.597483,-0.645744,-0.408425,-1.142431,1.226529,-0.362649
178,phon_R01_S50_4,0,0.477982,0.927026,-0.956636,2.759434,1.712884,1.972137,1.476626,1.974383,...,-0.434562,-0.229992,4.323005,-1.132641,-0.829117,-1.146200,-1.000069,-0.781735,0.931543,-0.823093
179,phon_R01_S50_5,0,1.053050,3.512924,-0.942565,0.658524,0.041102,0.499457,0.490560,0.497499,...,-0.497628,-0.233979,2.627488,-0.841774,-0.409664,-1.359140,-0.957466,-0.175407,-0.595487,-0.927429


In [13]:
X = final_data.drop(columns =['name','status'],axis=1)
Y=final_data['status']
X,Y

(     MDVP:Fo(Hz)_scaled  MDVP:Fhi(Hz)_scaled  MDVP:Flo(Hz)_scaled  \
 0             -0.828460            -0.435532            -0.940444   
 1             -0.770944            -0.578076            -0.055103   
 2             -0.907521            -0.867037            -0.106734   
 3             -0.907665            -0.755664            -0.111044   
 4             -0.923477            -0.691245            -0.127258   
 ..                  ...                  ...                  ...   
 176            0.466039             0.778303            -0.501126   
 177            1.309867             1.141402            -0.609975   
 178            0.477982             0.927026            -0.956636   
 179            1.053050             3.512924            -0.942565   
 180            1.423872             1.261013            -0.872576   
 
      MDVP:Jitter(%)_scaled  MDVP:Jitter(Abs)_scaled  MDVP:RAP_scaled  \
 0                 0.807620                 1.294938         0.499457   
 1          

In [15]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, stratify=Y, random_state=2)

In [17]:
# print("Class Distribution Before SMOTE:", np.unique(Y_train, return_counts=True))
# smote = SMOTE(random_state=42)
# X_train_resampled, Y_train_resampled = smote.fit_resample(X_train, Y_train)
# print("Class Distribution After SMOTE:", np.unique(Y_train_resampled, return_counts=True))

In [19]:
scaler= StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
base_estimators =[
    ('rf',RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)),
    ('svm', SVC(kernel='rbf', C=100, gamma=0.001, probability=True, class_weight='balanced'))
]

model = StackingClassifier(estimators=base_estimators, final_estimator=LogisticRegression(class_weight='balanced', random_state=42))
model.fit(X_train_scaled, Y_train)
print("Train Accuracy:", model.score(X_train_scaled, Y_train))
print("Test Accuracy:", model.score(X_test_scaled, Y_test))
print("Classification Report:\n", classification_report(Y_test, model.predict(X_test_scaled)))

Train Accuracy: 1.0
Test Accuracy: 0.972972972972973
Classification Report:
               precision    recall  f1-score   support

           0       0.90      1.00      0.95         9
           1       1.00      0.96      0.98        28

    accuracy                           0.97        37
   macro avg       0.95      0.98      0.96        37
weighted avg       0.98      0.97      0.97        37



In [23]:
def predict_heart_disease(input_data):
    np_array = np.asarray(input_data)
    input_data_reshaped = np_array.reshape(1, -1)
    scaled_data = scaler.transform(input_data_reshaped)
    prediction = model.predict(scaled_data)
    probabilities = model.predict_proba(scaled_data)
    print(f"Prediction: {'Parkinson\'s Disease' if prediction[0] == 1 else 'No Parkinson\'s Disease'}")
    print(f"Prediction Probabilities: {probabilities}")

In [25]:
input_data1 = (160, 220, 85, 0.004, 0.00003, 0.002, 0.003, 0.009, 0.015, 0.15, 0.007, 0.010, 0.015, 0.020, 0.01, 25, 0.4, 0.65, -3.0, 0.3, 2.0, 0.1)
predict_heart_disease(input_data1)

Prediction: Parkinson's Disease
Prediction Probabilities: [[0.4803023 0.5196977]]




In [27]:
input_data2 = (200, 240, 90, 0.002, 0.00002, 0.0018, 0.002, 0.0055,0.015, 0.1, 0.004, 0.005, 0.0048, 0.012, 0.009, 22.5,0.4, 0.6, -4.5, 0.1, 2.1, 0.1)
predict_heart_disease(input_data2)

Prediction: Parkinson's Disease
Prediction Probabilities: [[0.48926556 0.51073444]]




In [29]:
filename = 'Models/parkinsons_model.sav'
pickle.dump(model, open(filename,'wb'))

In [31]:
loaded_model = pickle.load(open('Models/heart_disease_model.sav','rb'))