# ParkinVision Models

## Define Problem

In [11]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import glob

## Prepare Data

In [12]:
train_healthy = [cv2.imread(file) for file in glob.glob("dataset/spiral/training/healthy/*.png")]
train_healthy_labels = [0 for _ in range(len(train_healthy))]
train_parkinson = [cv2.imread(file) for file in glob.glob("dataset/spiral/training/parkinson/*.png")]
train_parkinson_labels = [1 for _ in range(len(train_parkinson))]
test_healthy = [cv2.imread(file) for file in glob.glob("dataset/spiral/testing/healthy/*.png")]
test_healthy_labels = [0 for _ in range(len(test_healthy))]
test_parkinson = [cv2.imread(file) for file in glob.glob("dataset/spiral/testing/parkinson/*.png")]
test_parkinson_labels = [1 for _ in range(len(test_parkinson))]

train_images = train_healthy + train_parkinson
test_images = test_healthy + test_parkinson
train_labels = train_healthy_labels + train_parkinson_labels
test_labels = test_healthy_labels + test_parkinson_labels

In [13]:
from skimage.data import camera
from skimage.filters import roberts, sobel, sobel_h, sobel_v, scharr, \
    scharr_h, scharr_v, prewitt, prewitt_v, prewitt_h

def features_edge(image):
    edge_roberts = roberts(image)
    edge_sobel = sobel(image)
    edge_scharr = scharr(image)
    edge_prewitt = prewitt(image)

    fig, ax = plt.subplots(ncols=4, sharex=True, sharey=True,
                           figsize=(12, 10))

    ax[0].imshow(edge_roberts, cmap=plt.cm.gray)
    ax[0].set_title('Roberts Edge Detection')

    ax[1].imshow(edge_sobel, cmap=plt.cm.gray)
    ax[1].set_title('Sobel Edge Detection')

    ax[2].imshow(edge_scharr, cmap=plt.cm.gray)
    ax[2].set_title('Scharr Edge Detection')
    
    ax[3].imshow(edge_prewitt, cmap=plt.cm.gray)
    ax[3].set_title('Prewitt Edge Detection')

    for a in ax:
        a.axis('off')

    plt.tight_layout()
    plt.show()
    
    return edge_roberts

# Must call on 2D image
#features_edge(grey)

In [14]:
from skimage.feature import hog
from skimage import data, exposure

def plot_histogram(hog_image):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 10), sharex=True, sharey=True)

    ax1.axis('off')
    ax1.imshow(image, cmap=plt.cm.gray)
    ax1.set_title('Input image')

    # Rescale histogram for better display
    hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

    ax2.axis('off')
    ax2.imshow(hog_image_rescaled, cmap=plt.cm.gray)
    ax2.set_title('Histogram of Oriented Gradients')
    plt.show()
    
def features_hog(image):
    features = hog(image, orientations=9,
                pixels_per_cell=(10, 10), cells_per_block=(2, 2),
                transform_sqrt=True, block_norm="L1")

    
    return features

In [15]:
# Convert OpenCV images to numpy arrays for training
train_data = []

for image in train_images:
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (300, 300))
    
    features = features_hog(image)
    train_data.append(features)

(trainX, trainY) = (np.array(train_data), np.array(train_labels))

In [16]:
test_data = []

for image in test_images:
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (300, 300))
    
    features = features_hog(image)
    test_data.append(features)

(testX, testY) = (np.array(test_data), np.array(test_labels))
    
print(trainX.shape)
print(trainY.shape)
print(testX.shape)

(0,)
(0,)
(0,)


## Evaluate Models

In [18]:
# TODO: Train Naive-Bayes, logistic regression, decision trees (random forest), SVM, maybe try DL with Keras
# TODO: Select model that performs best on validation data
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('RFC', RandomForestClassifier(n_estimators=100)))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))

results = []
names = []
for name, model in models:
    kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
    cv_results = cross_val_score(model, trainX, trainY, cv=kfold, scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))

LR: 0.692857 (0.155839)
LDA: 0.733929 (0.136569)
KNN: 0.591071 (0.142063)
RFC: 0.680357 (0.089089)
CART: 0.608929 (0.169116)
NB: 0.762500 (0.112387)
SVM: 0.492857 (0.128571)


In [27]:
# Make predictions on validation dataset
model = RandomForestClassifier(n_estimators=250, max_depth=8)
model.fit(trainX, trainY)
predictions = model.predict(testX)

In [28]:
# Evaluate predictions
print(accuracy_score(testY, predictions))
print(confusion_matrix(testY, predictions))
print(classification_report(testY, predictions))

0.8
[[12  3]
 [ 3 12]]
              precision    recall  f1-score   support

           0       0.80      0.80      0.80        15
           1       0.80      0.80      0.80        15

    accuracy                           0.80        30
   macro avg       0.80      0.80      0.80        30
weighted avg       0.80      0.80      0.80        30



In [None]:
from sklearn.model_selection import GridSearchCV

svc = SVC()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}

clf = GridSearchCV(svc, parameters)
clf.fit(trainX, trainY)
    
print("Best predictions are:", clf.best_params_)

In [None]:
rfc = RandomForestClassifier()
parameters = {
    "n_estimators":[5,10,50,100,250],
    "max_depth":[2,4,8,16,32,None]
    
}

clf = GridSearchCV(rfc, parameters)
clf.fit(trainX, trainY)
    
print("Best predictions are:", clf.best_params_)

In [19]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

In [22]:
input_shape = (300, 300, 1)

model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(2, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

NameError: name 'keras' is not defined

## Export Model

In [296]:
import joblib
joblib.dump(model, "model.pkl")

['model.pkl']