In [52]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

In [3]:
parkinsons_data = pd.read_csv('/content/parkinsons.csv')

In [33]:
parkinsons_data.head()

Unnamed: 0,name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,...,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
0,phon_R01_S01_1,119.992,157.302,74.997,0.00784,7e-05,0.0037,0.00554,0.01109,0.04374,...,0.06545,0.02211,21.033,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
1,phon_R01_S01_2,122.4,148.65,113.819,0.00968,8e-05,0.00465,0.00696,0.01394,0.06134,...,0.09403,0.01929,19.085,1,0.458359,0.819521,-4.075192,0.33559,2.486855,0.368674
2,phon_R01_S01_3,116.682,131.111,111.555,0.0105,9e-05,0.00544,0.00781,0.01633,0.05233,...,0.0827,0.01309,20.651,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
3,phon_R01_S01_4,116.676,137.871,111.366,0.00997,9e-05,0.00502,0.00698,0.01505,0.05492,...,0.08771,0.01353,20.644,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
4,phon_R01_S01_5,116.014,141.781,110.655,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,...,0.1047,0.01767,19.649,1,0.417356,0.823484,-3.747787,0.234513,2.33218,0.410335


In [6]:
parkinsons_data.isnull().sum() #ckeck the missing values

name                0
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64

In [7]:
#data pre processing
# Separate features and target variable
X = parkinsons_data.drop(columns=['status', 'name']) #features
Y = parkinsons_data['status'] #target

In [9]:
# Split the data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [11]:
#scale the feature data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [37]:
# Hyperparameter Tuning using Grid Search
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100], 'max_iter': [100, 200, 300, 400, 500]}
grid_search = GridSearchCV(LogisticRegression(random_state=42), param_grid, cv=5)
grid_search.fit(X_train_scaled, Y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [39]:
# Get the Best hyperparameters from grid search
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

Best Hyperparameters: {'C': 1, 'max_iter': 100}


In [40]:
# Model Training with Logistic Regression
parkinson_model = LogisticRegression(**best_params,random_state=42)
parkinson_model.fit(X_train_scaled, Y_train)

In [41]:
# Model Evaluation
# Accuracy on training data
train_predictions = parkinson_model.predict(X_train_scaled)
accuracy = accuracy_score(Y_train, train_predictions)
print('Accuracy on Training data:', accuracy)

Accuracy on Training data: 0.8717948717948718


In [42]:
# Accuracy on test data
test_predictions = parkinson_model.predict(X_test_scaled)
accuracy = accuracy_score(Y_test, test_predictions)
print('Accuracy on Test data:', accuracy)

Accuracy on Test data: 0.8974358974358975


In [51]:
# Building a Predictive System

sample_input = (150.44000, 163.44100, 144.73600, 0.00396, 0.00003, 0.00206, 0.00233, 0.00619, 0.02551, 0.23700, 0.01321, 0.01574, 0.02148, 0.03964, 0.00611, 23.13300, 0.352396, 0.759320, -6.261446, 0.183218, 2.264226, 0.144105)

# Scale the sample input using the same scaler used during training
# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(sample_input)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

scale_data = scaler.transform(input_data_reshaped)
prediction = parkinson_model.predict(scale_data)
print(prediction)


if (prediction[0] == 0):
  print("has not Indicate Parkinsons Disease")

else:
  print("has Indicated Parkinsons Disease")


[1]
has Indicated Parkinsons Disease




In [53]:
#Save the created model
filename = 'parkinsons_disease_prediction_model.sav'
pickle.dump(parkinson_model, open(filename, 'wb'))