In [1]:
# Module imports.

import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential

import sklearn as sk
from sklearn import svm
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn import tree
from sklearn import metrics

# Dataframe setup.

df_raw = pd.read_csv("parkinsons.data")
df_raw.pop("name")
y = df_raw.iloc[:,16]
X = df_raw.iloc[:,:16]
df_rawSize = len(df_raw.index)

# Logistic regression model.

reg_log = LogisticRegression()
reg_log.fit(X, y)
y_pred_reg = reg_log.predict(X)

# Support vector machine model.

svm_pipe = Pipeline([("model", SVC())])
svm_pipe.set_params(model__C = 10,
                    model__decision_function_shape = "ovr",
                    model__gamma = 0.1,
                    model__kernel = "rbf")
svm_pipe.fit(X, y)
y_pred_svm = svm_pipe.predict(X)

# Random forest model.

rf_model = RandomForestClassifier(n_estimators = 600,
                              min_samples_split = 10,
                              min_samples_leaf = 4,
                              max_features = "sqrt",
                              max_depth = 110,
                              bootstrap = True)
cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 3, random_state = 1)
rf_model.fit(X, y)
y_pred_rf = rf_model.predict(X)

# Neural network model.

nn_model = Sequential([keras.layers.Flatten(input_shape = (16,)),
                        keras.layers.Dense(16, activation = tf.nn.relu),
                        keras.layers.Dense(16, activation = tf.nn.relu),
                        keras.layers.Dense(1, activation = tf.nn.sigmoid),
                        ])
nn_model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
nn_model.fit(X, y, epochs = 500, batch_size = 1, verbose = 0)
y_pred_nn = nn_model.predict(X)

# Decision tree model.

clf_model = tree.DecisionTreeClassifier(max_depth = 7, 
                                        max_features = "auto",
                                        min_samples_split = 4,
                                        criterion = "entropy")
clf_model.fit(X, y)
y_pred_clf = clf_model.predict(X)

# Neural network binary conversion.
y_pred_nn = y_pred_nn.tolist()
for i in range(df_rawSize):
    j = y_pred_nn.pop(0)[0]
    if j == 0.5:
        j = 1
    else:
        j = round(j)
    y_pred_nn.append(j)
    
# Weighting calculations.

y_pred_regZero = metrics.classification_report(y, y_pred_reg, output_dict = True).get("0", {}).get("precision", None)
y_pred_regOne = metrics.classification_report(y, y_pred_reg, output_dict = True).get("1", {}).get("precision", None)
y_pred_svmZero = metrics.classification_report(y, y_pred_svm, output_dict = True).get("0", {}).get("precision", None)
y_pred_svmOne = metrics.classification_report(y, y_pred_svm, output_dict = True).get("1", {}).get("precision", None)
y_pred_rfZero = metrics.classification_report(y, y_pred_rf, output_dict = True).get("0", {}).get("precision", None)
y_pred_rfOne = metrics.classification_report(y, y_pred_rf, output_dict = True).get("1", {}).get("precision", None)
y_pred_nnZero = metrics.classification_report(y, y_pred_nn, output_dict = True).get("0", {}).get("precision", None)
y_pred_nnOne = metrics.classification_report(y, y_pred_nn, output_dict = True).get("1", {}).get("precision", None)
y_pred_clfZero = metrics.classification_report(y, y_pred_clf, output_dict = True).get("0", {}).get("precision", None)
y_pred_clfOne = metrics.classification_report(y, y_pred_clf, output_dict = True).get("1", {}).get("precision", None)

# Weighted voting occurance.

predictions = []
for i in range(df_rawSize):
    totalOne = 0
    if y_pred_reg[i] == 1:
        totalOne += round(y_pred_regOne * 100)
    else:
        totalOne += round((1 - y_pred_regZero) * 100)
    if y_pred_svm[i] == 1:
        totalOne += round(y_pred_svmOne * 100)
    else:
        totalOne += round((1 - y_pred_svmZero) * 100)
    if y_pred_rf[i] == 1:
        totalOne += round(y_pred_rfOne * 100)
    else:
        totalOne += round((1 - y_pred_rfZero) * 100)
    if y_pred_nn[i] == 1:
        totalOne += round(y_pred_nnOne * 100)
    else:
        totalOne += round((1 - y_pred_nnZero) * 100)
    if y_pred_clf[i] == 1:
        totalOne += round(y_pred_clfOne * 100)
    else:
        totalOne += round((1 - y_pred_clfZero) * 100)
    if totalOne / 500 == 0.5:
        predictions.append(1)
    else:
        predictions.append(round(totalOne / 500))

# Comparison of prediction to status.

df_raw["predictions"] = predictions
totalResults = []
for i in range(df_rawSize):
    if df_raw.at[i, "status"] == df_raw.at[i, "predictions"]:
        totalResults.append(1)
    else:
        totalResults.append(0)
        
# Accuracy report.

print("The accuracy of this model is " + str(sum(totalResults) / len(totalResults) * 100) + "%")

The accuracy of this model is 95.8974358974359%
