In [1]:
pip install matplotlib

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit
from xgboost import XGBClassifier
from sklearn.ensemble import VotingClassifier

ModuleNotFoundError: No module named 'seaborn'

In [None]:
parkinsons_data = pd.read_csv('../datasets/parkinsson_disease.csv')

In [None]:
parkinsons_data.head()

In [None]:
parkinsons_data.shape

In [None]:
parkinsons_data.info()

In [None]:
parkinsons_data.isnull().sum()

In [None]:
parkinsons_data['status'].value_counts()

In [None]:
X = parkinsons_data.drop(["status","name"], axis=1)
y = parkinsons_data["status"]
xTrain, xTest, yTrain, yTest = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X

In [None]:
xTest

In [None]:
y

In [None]:
yTest

In [None]:
rf_model = RandomForestClassifier(random_state=42)
svm_model = SVC(random_state=42)
xgb_model = XGBClassifier()

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

rf_scores = cross_val_score(rf_model, X, y, cv=kfold)
print(f"Random Forest CV Scores: {rf_scores}")
print(f"Random Forest CV Mean Score: {rf_scores.mean()}")
print("--"*40)

svm_scores = cross_val_score(svm_model, X, y, cv=kfold)
print(f"SVM CV Scores: {svm_scores}")
print(f"SVM CV Mean Score: {svm_scores.mean()}")
print("--"*40)

xgb_scores = cross_val_score(xgb_model, X, y, cv=kfold)
print(f"XGBoost CV Scores: {xgb_scores}")
print(f"XGBoost CV Mean Score: {xgb_scores.mean()}")
print("--"*40)

Applying the Support Vector Machine Model

In [None]:
svm_model.fit(xTrain, yTrain)
preds_test = svm_model.predict(xTest)
preds_train = svm_model.predict(xTrain)

print(f"Accuracy on train data by SVM Classifier\
: {accuracy_score(yTrain, svm_model.predict(xTrain))*100}")
cf_matrix_train = confusion_matrix(yTrain, preds_train)
plt.figure(figsize=(6,4))
sns.heatmap(cf_matrix_train, annot=True, cmap='Blues')
plt.title("Confusion Matrix on train data for SVM Classifier")
plt.show()

print(f"Accuracy on test data by SVM Classifier\
: {accuracy_score(yTest, preds_test)*100}")
cf_matrix = confusion_matrix(yTest, preds_test)
plt.figure(figsize=(6,4))
sns.heatmap(cf_matrix, annot=True, cmap='Blues')
plt.title("Confusion Matrix on test data for SVM Classifier")
plt.show()

In [None]:
rf_model.fit(xTrain, yTrain)
preds_test = rf_model.predict(xTest)
preds_train = rf_model.predict(xTrain)

print(f"Accuracy on train data by Random Forest Classifier\
: {accuracy_score(yTrain, rf_model.predict(xTrain))*100}")

cf_matrix_train = confusion_matrix(yTrain, preds_train)
plt.figure(figsize=(6,4))
sns.heatmap(cf_matrix_train, annot=True, cmap='Blues')
plt.title("Confusion Matrix on train data for Random Forset Classifier")
plt.show()

print(f"Accuracy on test data by Random Forest Classifier\
: {accuracy_score(yTest, preds_test)*100}")

cf_matrix = confusion_matrix(yTest, preds_test)
plt.figure(figsize=(6,4))
sns.heatmap(cf_matrix, annot=True, cmap='Blues')
plt.title("Confusion Matrix on test data for Random Forest Classifier")
plt.show()

In [None]:
xgb_model.fit(xTrain, yTrain)
predict_test = xgb_model.predict(xTest)
predict_train = xgb_model.predict(xTrain)

print(f"Accuracy on the train data XGBoost Classifier : {accuracy_score(yTrain, xgb_model.predict(xTrain)) * 100}")
cf_matrix_train = confusion_matrix(yTrain, predict_train)
plt.figure(figsize=(6,4))
sns.heatmap(cf_matrix_train, annot=True, cmap='Blues')
plt.title("Confusion Matrix on train data for SVM Classifier")
plt.show()

print(f"Accuracy on the test data by XGBoost Classifier: {accuracy_score(yTest, predict_test) * 100}")

cf_matrix = confusion_matrix(yTest, preds_test)
plt.figure(figsize=(6, 4))
sns.heatmap(cf_matrix, annot=True, cmap='Blues')
plt.title("Confusion Matrix on test for XGBoost Classifier")
plt.show()

Applying the combined model

In [None]:
input_data = (199.22800,209.51200,192.09100,0.00241,0.00001,0.00134,0.00138,0.00402,0.01015,0.08900,0.00504,0.00641,0.00762,0.01513,0.00167,30.94000,0.432439,0.742055,-7.682587,0.173319,2.103106,0.068501)

input_data = np.asarray(input_data)

input_data_reshaped = input_data.reshape(1, -1)

In [None]:
input_data

In [None]:
input_data_reshaped

In [None]:
svm_model = SVC(probability=True)
rf_model = RandomForestClassifier()
xgb_model = XGBClassifier()

result = VotingClassifier(estimators=[('svm', svm_model), ('rf', rf_model), ('xgb', xgb_model)], voting='soft')

result.fit(xTrain, yTrain)
prediction = result.predict(input_data_reshaped)

if prediction[0] == 0:
    print("The person is diseased with Parkinson's disease")
else:
    print("The person is not diseased with Parkinson's disease")

Saving the trained model

In [None]:
import pickle

In [None]:
filename = '../models/trained_model.sav'
pickle.dump(result, open(filename, 'wb'))

Loading the saved model

In [None]:
loaded_model = pickle.load(open('../models/trained_model.sav', 'rb'))

In [None]:
input_data = (199.22800,209.51200,192.09100,0.00241,0.00001,0.00134,0.00138,0.00402,0.01015,0.08900,0.00504,0.00641,0.00762,0.01513,0.00167,30.94000,0.432439,0.742055,-7.682587,0.173319,2.103106,0.068501)

input_data = np.asarray(input_data)

input_data_reshaped = input_data.reshape(1, -1)

result = loaded_model.predict(input_data_reshaped)

if result[0] == 0:
    print("The person dose not have Parkinson's Disease")
else:
    print("The person has Parkinson's Disease")