In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv('parkinsons.csv')

In [None]:
# distribution of target variable
df['status'].value_counts()

In [None]:
# group the data based on the target variable
df.groupby('status').mean()
# there is distinct difference in healthy people and parkinsons people

In [None]:
# separating features and target
# x = df.drop(columns=['name', 'status'], axis=1)
x = df.drop(columns=['name', 'status', 'spread1', 'spread2', 'PPE', 'RPDE', 'D2', 'NHR', 'DFA'], axis=1)
y = df['status']

In [None]:
# group the data based on the target variable after removing unnecessary features
df_test = df.drop(columns=['name', 'spread1', 'spread2', 'PPE', 'RPDE', 'D2', 'NHR', 'DFA'], axis=1)
df_test.groupby('status').mean()

In [None]:
# splitting the data into training and testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)

In [None]:
# standardizing the data so that all features are in the same range
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)

In [None]:
# we're not fitting the scaler on the test data because we don't want to leak any information from the test data
x_test = scaler.transform(x_test)

In [None]:
# Model training using svm
model = svm.SVC(kernel = 'linear')

In [None]:
# training the svm model with training data
model.fit(x_train, y_train)

In [None]:
# model evaluation
# accuracy score on training data
x_train_prediction = model.predict(x_train)
training_data_accuracy = accuracy_score(y_train, x_train_prediction)

In [None]:
print(f'Training data accuracy: {training_data_accuracy}')

In [None]:
x_test_prediction = model.predict(x_test)
test_data_accuracy = accuracy_score(y_test, x_test_prediction)
print(f'Test data accuracy: {test_data_accuracy}')

In [None]:
# Building a predictive system
def predict(input_data):
    # changing the input data to a numpy array
    input_data = np.asarray(input_data)
    # reshape the array as we are predicting for one instance
    input_data_reshape = input_data.reshape(1, -1)
    # standardizing the input data
    std_data = scaler.transform(input_data_reshape)
    prediction = model.predict(std_data)
    if prediction[0] == 0:
        return 'The person does not have Parkinsons disease'
    else:
        return 'The person has Parkinsons disease'

In [None]:
input_data = [132.9588888,446.4488196,75.42733589,0.028006881,0.000211396,0.014384277,0.016734109,0.043152832,0.194968269,1.685448298,0.089904364,0.147112444,0.207948142,0.269713092,10.33065404]
print(predict(input_data))