In [None]:
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import scipy.io
import scipy
from scipy import signal

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.naive_bayes import GaussianNB
from sklearn.mixture import GaussianMixture as GMM
from sklearn import preprocessing

In [None]:
DATABASE = {
    'Database 1': ['female_1',
                  'female_2',
                  'female_3',
                  'male_1',
                  'male_2'],
    'Database 2': ['male_day_1',
                  'male_day_2',
                  'male_day_3']
}

COLUMNS = ['cyl_ch1',
            'cyl_ch2',
            'hook_ch1',
            'hook_ch2',
            'tip_ch1',
            'tip_ch2',
            'palm_ch1',
            'palm_ch2',
            'spher_ch1',
            'spher_ch2',
            'lat_ch1',
            'lat_ch2']

LABELS = [
    'Spherical',
    'Tip',
    'Palmar',
    'Lateral',
    'Cylindrical',
    'Hook'
]

COLUMN_MAPPINGS = {
            'cyl_ch1': 'Cylindrical',
            'cyl_ch2': 'Cylindrical',
            'hook_ch1': 'Hook',
            'hook_ch2': 'Hook',
            'tip_ch1': 'Tip',
            'tip_ch2': 'Tip',
            'palm_ch1': 'Palmar',
            'palm_ch2': 'Palmar',
            'spher_ch1': 'Spherical',
            'spher_ch2': 'Spherical',
            'lat_ch1': 'Lateral',
            'lat_ch2': 'Lateral'
}

In [None]:
DB_NAME = 'Database 1'
dfs = []
for fname in DATABASE[DB_NAME]:
    tmp_data = scipy.io.loadmat(f'/content/{DB_NAME}/{fname}.mat')
    tmp_data = {k:v for k,v in tmp_data.items() if k in COLUMNS}
    for c in COLUMNS:
        tmp_dfx = pd.DataFrame(tmp_data[c])
        tmp_dfx['identifier'] = fname
        tmp_dfx['label'] = COLUMN_MAPPINGS[c]
        dfs.append(pd.DataFrame(tmp_dfx))


dataset_ = pd.concat(dfs)

new_dfs = []
for lab in LABELS:
    new_temp = dataset_[dataset_['label'] == lab].values
    new_temp_comb = pd.DataFrame(np.concatenate((new_temp[:150,:3000],new_temp[150:,:3002]),axis=1))
    new_dfs.append(new_temp_comb)

dataset = pd.concat(new_dfs)
dataset.rename(columns={6001:'label', 6000:'identifier'}, inplace=True)
dataset.head()

df_features = dataset.iloc[:,:6000].copy()
df_labels = dataset.iloc[:, 6001]

In [None]:
#PCA Transform
n = 20
pca = PCA(n_components=n).fit(df_holt.T)
df_pca = pca.components_.T
iso = Isomap(n_components=n)
df_iso = iso.fit_transform(df_holt)

In [None]:
label_dict = {
    'Spherical':1,
    'Tip':2,
    'Palmar':3,
    'Lateral':4,
    'Cylindrical':5,
    'Hook':6}

col = [label_dict[i] for i in df_labels.to_list()]

In [None]:
svm_res = []
log_res = []
nn_res = []
nb_res = []
kmeans_res = []
gmm_res = []


for i in range(0,10):
    X_train, X_test, y_train, y_test = train_test_split(df_iso,
                                                        df_labels,
                                                        test_size=0.2,
                                                        random_state=i)


    le = preprocessing.LabelEncoder()
    le.fit(y_train)
    clf_svc = make_pipeline(StandardScaler(), SVC(gamma='auto', C=10))
    clf_log = make_pipeline(StandardScaler(), LogisticRegression(C=10, max_iter=1000))
    clf_nn = make_pipeline(StandardScaler(), MLPClassifier(hidden_layer_sizes=(200,200,200), max_iter=2000))
    clf_nb = make_pipeline(StandardScaler(), GaussianNB())
    clf_kmeans = make_pipeline(StandardScaler(), KMeans(n_clusters=len(LABELS)))
    clf_gmm = make_pipeline(StandardScaler(), GMM(n_components=len(LABELS)))

    clfs = [clf_svc,
            clf_log,
            clf_nn,
            clf_nb,
            clf_kmeans,
            clf_gmm]

    for c in clfs:
        c.fit(X_train, y_train)

    svm_labels = clf_svc.predict(X_test)
    log_labels = clf_log.predict(X_test)
    nn_labels = clf_nn.predict(X_test)
    nb_labels = clf_nb.predict(X_test)
    kmeans_labels = clf_kmeans.predict(X_test)
    gmm_labels = clf_gmm.predict(X_test)

    svm_res.append(((svm_labels == y_test).value_counts()/len(X_test))[1])
    log_res.append(((log_labels == y_test).value_counts()/len(X_test))[1])
    nn_res.append(((nn_labels == y_test).value_counts()/len(X_test))[1])
    nb_res.append(((nb_labels == y_test).value_counts()/len(X_test))[1])
    kmeans_res.append(1 - (sum([abs(i[0] - i[1]) for i in zip(sorted(np.bincount(le.transform(y_test))),
                                       sorted(np.bincount(kmeans_labels)))])/len(y_test)))
    gmm_res.append(1 - (sum([abs(i[0] - i[1]) for i in zip(sorted(np.bincount(le.transform(y_test))),
                                       sorted(np.bincount(gmm_labels)))])/len(y_test)))

svm_score = np.max(np.max(np.array(svm_res)))
log_score = np.max(np.max(np.array(log_res)))
nn_score = np.max(np.max(np.array(nn_res)))
nb_score = np.max(np.max(np.array(nb_res)))
kmeans_score = np.max(np.max(np.array(kmeans_res)))
gmm_score = np.max(np.max(np.array(gmm_res)))



In [None]:
print("SVM Accuracy: ", svm_score)
print("LR Accuracy: ", log_score)
print("Neural Network accuracy:", nn_score)
print("Naive Bayes accuracy", nb_score)
print("K-Means accuracy:", kmeans_score)
print("GMM accuracy", gmm_score)

SVM Accuracy:  0.49444444444444446
LR Accuracy:  0.4388888888888889
Neural Network accuracy: 0.5
Naive Bayes accuracy 0.5
K-Means accuracy: 0.6444444444444444
GMM accuracy 0.7333333333333334
