#ADNIMERGE Modeling
##(SVM, Bayes, MLP... )

In [None]:
# Imports
import numpy as np
import pandas as pd
from google.colab import drive
import matplotlib.pyplot as plt
import sklearn as sk
import math
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPClassifier

## Lead Cleaned Data
###(see first modeling notebook for cleaning)

In [None]:
drive.mount('/content/drive')
train_df = pd.read_csv('/content/drive/MyDrive/Data_Science_Alzheimers_ADNI/adnimerge_08Feb2024_cleaned_train_2.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Data_Science_Alzheimers_ADNI/adnimerge_08Feb2024_cleaned_test_2.csv')

Mounted at /content/drive


## Support Vector Machine (SVM)

In [None]:
# input everything but the diagnosis code
inputs = train_df.drop(['DX'], axis=1)

# predict the diagnosis code
outputs = train_df['DX']

clf = SVC()
clf.fit(inputs,outputs)
SVC()

In [None]:
# examine results SVM

print("SVM Results")
# then test the predictions on the test dataset
test_inputs = test_df.drop(['DX'], axis=1)

# prediction
test_df['svm_prediction'] = clf.predict(test_inputs)



# number of incorrect answers
incorrect = test_df[test_df['DX'] != test_df['svm_prediction']]
print("Wrong: ", len(incorrect))

# difference (num correct)
num_correct = len(test_df)-len(incorrect)

# percent
print("Percent Right:", num_correct/len(test_df))

SVM Results
Wrong:  255
Percent Right: 0.6473029045643154


## Naive Bayes Model
Documentation: https://scikit-learn.org/stable/modules/naive_bayes.html

In [None]:
# reset train and test by reloading
train_df = pd.read_csv('/content/drive/MyDrive/Data_Science_Alzheimers_ADNI/adnimerge_08Feb2024_cleaned_train_2.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Data_Science_Alzheimers_ADNI/adnimerge_08Feb2024_cleaned_test_2.csv')

# input everything but the diagnosis code
inputs = train_df.drop(['DX'], axis=1)

# predict the diagnosis code
outputs = train_df['DX']

gnb = GaussianNB()
y_pred = gnb.fit(inputs, outputs).predict(test_inputs)

In [None]:
# examine results Bayes

print("Bayes Results")

# then test the predictions on the test dataset
test_inputs = test_df.drop(['DX'], axis=1)

# prediction
test_df['bayes_prediction'] = gnb.predict(test_inputs)



# number of incorrect answers
incorrect = test_df[test_df['DX'] != test_df['bayes_prediction']]
print("Wrong: ", len(incorrect))

# difference (num correct)
num_correct = len(test_df)-len(incorrect)

# percent
print("Percent Right:", num_correct/len(test_df))

Bayes Results
Wrong:  53
Percent Right: 0.9266943291839558


## Multi-Layer Perceptron (MLP)

In [None]:
# reset train and test by reloading
train_df = pd.read_csv('/content/drive/MyDrive/Data_Science_Alzheimers_ADNI/adnimerge_08Feb2024_cleaned_train_2.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Data_Science_Alzheimers_ADNI/adnimerge_08Feb2024_cleaned_test_2.csv')

# input everything but the diagnosis code
inputs = train_df.drop(['DX'], axis=1)

# predict the diagnosis code
outputs = train_df['DX']

In [None]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5,2), random_state=1)
clf.fit(inputs, outputs)

In [None]:
# examine results Bayes

print("MLP Results")

# then test the predictions on the test dataset
test_inputs = test_df.drop(['DX'], axis=1)

# prediction
test_df['mlp_prediction'] = clf.predict(test_inputs)



# number of incorrect answers
incorrect = test_df[test_df['DX'] != test_df['mlp_prediction']]
print("Wrong: ", len(incorrect))

# difference (num correct)
num_correct = len(test_df)-len(incorrect)

# percent
print("Percent Right:", num_correct/len(test_df))

MLP Results
Wrong:  269
Percent Right: 0.627939142461964
