In [1]:
import pandas as pd
import numpy as np

In [2]:
# load the data
df = pd.read_csv("../parkinsons.data")
df.set_index("name", inplace=True)

In [3]:
# features and target
X = df.loc[:, ["MDVP:Fo(Hz)", "RPDE",
               "PPE", "spread2", "DFA"]].values
y = df["status"]

In [4]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [5]:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.fit_transform(X)

In [11]:
# parameters for SVC
param_grid_svc = {"model__kernel": ["rbf", "linear", "poly", "sigmoid"],
              "model__C": [0.1, 1, 10, 100],
              "model__gamma": [0.001, 0.01, 0.1, 1, 10, "auto", "scale"],
              "model__decision_function_shape": ["ovr", "ovo"]}

In [12]:
# SVC pipeline
svm_pipe = Pipeline([("model", SVC())])

In [13]:
# Grid search best params
search_svc = GridSearchCV(estimator=svm_pipe, cv=5, param_grid=param_grid_svc)
search_svc.fit(X, y)

GridSearchCV(cv=5, estimator=Pipeline(steps=[('model', SVC())]),
             param_grid={'model__C': [0.1, 1, 10, 100],
                         'model__decision_function_shape': ['ovr', 'ovo'],
                         'model__gamma': [0.001, 0.01, 0.1, 1, 10, 'auto',
                                          'scale'],
                         'model__kernel': ['rbf', 'linear', 'poly', 'sigmoid']})

In [14]:
print(f"Best parameters: {search_svc.best_params_},\nBest score: {search_svc.best_score_}")

Best parameters: {'model__C': 1, 'model__decision_function_shape': 'ovr', 'model__gamma': 'auto', 'model__kernel': 'rbf'},
Best score: 0.8512820512820513


In [15]:
from sklearn.ensemble import RandomForestClassifier

In [16]:
# parameters for RandomForestClassifier
param_grid_ran = {"model__n_estimators": [int(x) for x in np.linspace(start=100, stop=500, num=5)],
                  "model__max_features": ("auto", "sqrt"),
                  "model__max_depth": (2, 4, 6),
                  "model__min_samples_split": (1.0, 2, 4),
                  "model__min_samples_leaf": (1, 2),
                  "model__bootstrap": (True, False)}

In [17]:
ran_pipe = Pipeline([("model", RandomForestClassifier())])

In [18]:
search_ran = GridSearchCV(estimator=ran_pipe, cv=3, param_grid=param_grid_ran)
search_ran.fit(X, y)

GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('model', RandomForestClassifier())]),
             param_grid={'model__bootstrap': (True, False),
                         'model__max_depth': (2, 4, 6),
                         'model__max_features': ('auto', 'sqrt'),
                         'model__min_samples_leaf': (1, 2),
                         'model__min_samples_split': (1.0, 2, 4),
                         'model__n_estimators': [100, 200, 300, 400, 500]})

In [19]:
print(f"Best parameters: {search_ran.best_params_},\nBest score: {search_ran.best_score_}")

Best parameters: {'model__bootstrap': False, 'model__max_depth': 6, 'model__max_features': 'auto', 'model__min_samples_leaf': 1, 'model__min_samples_split': 2, 'model__n_estimators': 500},
Best score: 0.8666666666666667


In [20]:
from sklearn.neighbors import KNeighborsClassifier

In [21]:
param_grid_kn = {"model__n_neighbors": (3, 4, 5, 6),
                 "model__weights": ("uniform", "distance"),
                 "model__metric": ("euclidean", "minkowski")}

In [22]:
kn_pipe = Pipeline([("model", KNeighborsClassifier())])
kn_pipe.get_params()

{'memory': None,
 'steps': [('model', KNeighborsClassifier())],
 'verbose': False,
 'model': KNeighborsClassifier(),
 'model__algorithm': 'auto',
 'model__leaf_size': 30,
 'model__metric': 'minkowski',
 'model__metric_params': None,
 'model__n_jobs': None,
 'model__n_neighbors': 5,
 'model__p': 2,
 'model__weights': 'uniform'}

In [23]:
search_kn = GridSearchCV(estimator=kn_pipe, cv=5, param_grid=param_grid_kn)
search_kn.fit(X, y)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('model', KNeighborsClassifier())]),
             param_grid={'model__metric': ('euclidean', 'minkowski'),
                         'model__n_neighbors': (3, 4, 5, 6),
                         'model__weights': ('uniform', 'distance')})

In [24]:
print(f"Best parameters: {search_kn.best_params_},\nBest score: {search_kn.best_score_}")

Best parameters: {'model__metric': 'euclidean', 'model__n_neighbors': 3, 'model__weights': 'distance'},
Best score: 0.8461538461538461


In [25]:
from sklearn.tree import DecisionTreeClassifier

In [26]:
parameters = {'model__max_depth' : (10,30,50,70,90,100)
             ,'model__criterion' : ('gini','entropy')
             ,'model__max_depth' : (3,5,7,9,10)
             ,'model__max_features' : ('auto','sqrt','log2')
             ,'model__min_samples_split' : (2,4,6)}

In [27]:
dt_pipe = Pipeline([("model", DecisionTreeClassifier())])

In [28]:
search_dt = GridSearchCV(estimator=dt_pipe, cv=3, param_grid = parameters)
search_dt.fit(X, y)

GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('model', DecisionTreeClassifier())]),
             param_grid={'model__criterion': ('gini', 'entropy'),
                         'model__max_depth': (3, 5, 7, 9, 10),
                         'model__max_features': ('auto', 'sqrt', 'log2'),
                         'model__min_samples_split': (2, 4, 6)})

In [29]:
print(f"Best parameters: {search_dt.best_params_},\nBest score: {search_dt.best_score_}")

Best parameters: {'model__criterion': 'entropy', 'model__max_depth': 7, 'model__max_features': 'log2', 'model__min_samples_split': 6},
Best score: 0.8871794871794871


In [31]:
from sklearn.ensemble import StackingClassifier

In [38]:
clf1 = search_svc.best_estimator_
clf2 = search_kn.best_estimator_
clf3 = search_dt.best_estimator_
estimators = [("svc", clf1),("kn", clf2), ("dt", clf3)]

In [39]:
stack_model = StackingClassifier(estimators=estimators, final_estimator=clf1)

In [40]:
stack_model.fit(X, y)

StackingClassifier(estimators=[('svc',
                                Pipeline(steps=[('model',
                                                 SVC(C=1, gamma='auto'))])),
                               ('kn',
                                Pipeline(steps=[('model',
                                                 KNeighborsClassifier(metric='euclidean',
                                                                      n_neighbors=3,
                                                                      weights='distance'))])),
                               ('dt',
                                Pipeline(steps=[('model',
                                                 DecisionTreeClassifier(criterion='entropy',
                                                                        max_depth=7,
                                                                        max_features='log2',
                                                                        min_samples_split=6))

In [46]:
from sklearn.ensemble import VotingClassifier

In [47]:
clf1 = search_svc.best_estimator_
clf2 = search_kn.best_estimator_
clf3 = search_dt.best_estimator_
estimators = [("svc", clf1),("kn", clf2), ("dt", clf3)]

In [48]:
vot_model = VotingClassifier(estimators=estimators, voting="soft")

In [49]:
vot_model.fit(X, y)

VotingClassifier(estimators=[('svc',
                              Pipeline(steps=[('model',
                                               SVC(C=1, gamma='auto'))])),
                             ('kn',
                              Pipeline(steps=[('model',
                                               KNeighborsClassifier(metric='euclidean',
                                                                    n_neighbors=3,
                                                                    weights='distance'))])),
                             ('dt',
                              Pipeline(steps=[('model',
                                               DecisionTreeClassifier(criterion='entropy',
                                                                      max_depth=7,
                                                                      max_features='log2',
                                                                      min_samples_split=6))]))],
                 votin