Importing the Dependencies

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score

Data Collection & Analysis

In [2]:
# loading the data from csv file to a Pandas DataFrame
parkinsons_data = pd.read_csv('./parkinsons.csv')

In [None]:
# printing the first 5 rows of the dataframe
parkinsons_data.head()

In [None]:
# number of rows and columns in the dataframe
parkinsons_data.shape

In [None]:
# getting more information about the dataset
parkinsons_data.info()

In [None]:
# checking for missing values in each column
parkinsons_data.isnull().sum()

In [None]:
# getting some statistical measures about the data
parkinsons_data.describe()

In [None]:
# distribution of target Variable
parkinsons_data['status'].value_counts()

1  --> Parkinson's Positive

0 --> Healthy


In [None]:
# Select only numeric columns
numeric_data = parkinsons_data.select_dtypes(include=[np.number])

# Group by 'status' and calculate the mean
numeric_means = numeric_data.groupby(parkinsons_data['status']).mean()
numeric_means

Data Pre-Processing

Separating the features & Target

In [11]:
X = parkinsons_data.drop(columns=['name','status'], axis=1)
Y = parkinsons_data['status']

In [None]:
print(X)

In [None]:
print(Y)

Splitting the data to training data & Test data

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [15]:
print(X.shape, X_train.shape, X_test.shape)

(195, 22) (156, 22) (39, 22)


Data Standardization

In [16]:
scaler = StandardScaler()

In [None]:
scaler.fit(X_train)

In [18]:
X_train = scaler.transform(X_train)

X_test = scaler.transform(X_test)

In [None]:
print(X_train)

Model Training

Support Vector Machine Model

In [20]:
model = svm.SVC(kernel='linear')

In [None]:
# training the SVM model with training data
model.fit(X_train, Y_train)

Model Evaluation

Accuracy Score

In [22]:
# accuracy score on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

In [None]:
print('Accuracy score of training data : ', training_data_accuracy)

In [24]:
# accuracy score on training data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

In [None]:
print('Accuracy score of test data : ', test_data_accuracy)

Building a Predictive System

In [None]:
input_data = (197.07600,206.89600,192.05500,0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.09700,0.00563,0.00680,0.00802,0.01689,0.00339,26.77500,0.422229,0.741367,-7.348300,0.177551,1.743867,0.085569)

# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

# standardize the data
std_data = scaler.transform(input_data_reshaped)

prediction = model.predict(std_data)
print(prediction)


if (prediction[0] == 0):
  print("The Person does not have Parkinsons Disease")

else:
  print("The Person has Parkinsons")
