In [None]:
# imports
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score

In [None]:
# listing the current directory
print(os.listdir("."))

In [None]:
# dataset path
filepath = "./Ninapro_DB1/Ninapro_DB1.csv"

# creating the data frame
df = pd.read_csv(filepath_or_buffer=filepath, na_filter=False)

print("[+] Data frame created")

In [None]:
# # Loading only some rows
# df_periodic = pd.read_csv(filepath_or_buffer=filepath,
#                           skiprows=0,
#                           nrows=1000,
#                           na_filter=False)

# print("[+] Data frame created")

In [None]:
# showing the data frame
df.head()
# df_periodic.head()

In [None]:
# function to print info
def show_info(df):
    print("Shape:", df.shape)
    print("Null Values:", df[df.isnull().any(axis=1)].shape[0])
    print("Data types:\n", df.dtypes)

In [None]:
# Information about the df
show_info(df)
# show_info(df_periodic)

In [None]:
# pre processing
cols = ["Unnamed: 0", "stimulus", "restimulus",
        "repetition", "rerepetition", "subject"]
df.drop(labels=cols, axis=1, inplace=True)

print("[+] Columns dropped:", cols)

In [None]:
# new data frame
df.head()

In [None]:
# features / labels
X = df.iloc[0:, :-1]
y = df.iloc[0:, -1]

print("features:", X.shape[1])
print("label:", y.shape)

In [None]:
# Information about labels
print(y.value_counts(sort=True, ascending=True, dropna=False))

In [None]:
# train test split
X_train, y_train, X_test, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    train_size=0.8,
                                                    shuffle=True)

print("Splitted into train(80%) and test (20%)")

In [None]:
# Models
KNN = KNeighborsClassifier(n_neighbors=100,
                           weights="distance",
                           algorithm="auto",
                           leaf_size=50)

SVM = svm.SVC(kernel="sigmoid",
              decision_function_shape="ovo")

RNDF = RandomForestClassifier(n_estimators=100,
                              criterion="log_loss")

In [None]:
# fitting to knn
KNN.fit(X_train, y_train)

In [None]:
# predict using knn
knn_predicts = KNN.predict(X_test)

In [None]:
# fitting to svm
SVM.fit(X_train, y_train)

In [None]:
# predicting using svm
svm_predicts = SVM.predict(X_test)

In [None]:
# fitting to random forest
RNDF.fit(X_train, y_train)

In [None]:
# predicting using random forest
rndf_predicts = RNDF.predict(X_test)

In [None]:
# accuracies
def calc_accuracy(classifier):
    accuracies = cross_val_score(estimator=classifier, X=X, y=y, cv=10)
    print(accuracies.mean())
    print(accuracies.std())

# confusion matrix
def show_matrix(y_pred):
    cm = confusion_matrix(y_test, y_pred)
    print(cm)

In [None]:
calc_accuracy(KNN)
calc_accuracy(SVM)
calc_accuracy(RNDF)

show_matrix(knn_predicts)
show_matrix(svm_predicts)
show_matrix(rndf_predicts)