# importing necessary libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# loading csv file

In [None]:
df = pd.read_csv("../input/parkinsonsdataset/parkinsons.csv")

# knowing data

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.columns

# finding nan values

In [None]:
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')

# cleaning data

In [None]:
df.drop(['name'],axis=1,inplace=True)

In [None]:
df['status'] = df.status.astype(bool)

# finding relation

In [None]:
sns.countplot(x = 'status',data=df)

In [None]:
a = ['MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)']
i = 1
plt.figure(figsize=(15,7))
for b in a:
    plt.subplot(1,3,i)
    sns.boxplot(y = b,data=df)
    i+=1
    plt.tight_layout()

In [None]:
sns.catplot(x = 'status',y = 'MDVP:Fo(Hz)',data=df)

# Machine learning models

## k-nearest neighbors

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn.model_selection import train_test_split
X = df.drop('status',axis=1)

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(df.drop('status',axis=1))

In [None]:
scaler_feature = scaler.transform(df.drop('status',axis=1))

In [None]:
df_feature = pd.DataFrame(scaler_feature,columns=X.columns)

In [None]:
df_feature.head()

# splitting of data

In [None]:
X = df_feature
y = df['status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# finding the best value for 'k'

In [None]:
err_rate = []
for i in range (1,40):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    predi = knn.predict(X_test)
    err_rate.append(np.mean(predi != y_test))

In [None]:
plt.figure(figsize=(10,6))
plt.grid()
plt.plot(range(1,40),err_rate,color = 'blue',linestyle='--',marker='.',markerfacecolor='red',markersize=10)

# prediction

In [None]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,y_train)
prediction = knn.predict(X_test)

In [None]:
prediction

# evaluation of the model

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print('Classification report')
print(classification_report(y_test,prediction))
print('\n')
print('Confission matrix')
print(confusion_matrix(y_test,prediction))

# heatmap for confusion matrix  

In [None]:
sns.heatmap(confusion_matrix(y_test,prediction),cmap='viridis')

# accuracy of the model

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,prediction)*100
print(f'model accuracy is : {accuracy.round()} %')

# LogisticRegression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logireg = LogisticRegression()

In [None]:
X = df_feature
y = df['status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
logireg.fit(X_train,y_train)

In [None]:
prediction_logi = logireg.predict(X_test)

In [None]:
prediction_logi

In [None]:
print(classification_report(y_test,prediction_logi))

In [None]:
print(confusion_matrix(y_test,prediction_logi))

In [None]:
sns.heatmap(confusion_matrix(y_test,prediction_logi),cmap='viridis')

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,prediction_logi)*100
print(f'model accuracy is : {accuracy.round()} %')

# DecisionTree

In [None]:
X = df.drop('status',axis=1)
y = df['status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dtree = DecisionTreeClassifier()

In [None]:
dtree.fit(X_train,y_train)

In [None]:
prediction = dtree.predict(X_test)

In [None]:
prediction

In [None]:
print('Classification report')
print(classification_report(y_test,prediction))
print('\n')
print('Confission matrix')
print(confusion_matrix(y_test,prediction))

In [None]:
sns.heatmap(confusion_matrix(y_test,prediction),cmap='viridis')

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,prediction)*100
print(f'model accuracy is : {accuracy.round()} %')

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc = RandomForestClassifier()

In [None]:
rfc.fit(X_train,y_train)

In [None]:
prediction = rfc.predict(X_test)

In [None]:
prediction

In [None]:
print('Classification report')
print(classification_report(y_test,prediction))
print('\n')
print('Confission matrix')
print(confusion_matrix(y_test,prediction))

In [None]:
sns.heatmap(confusion_matrix(y_test,prediction_logi),cmap='viridis')

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,prediction)*100
print(f'model accuracy is : {accuracy.round()} %')