In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [None]:
data0 = pd.read_csv("/kaggle/input/emg-4/0.csv", header=None) # For class 0: rock
data1 = pd.read_csv("/kaggle/input/emg-4/1.csv", header=None) # For class 1: scissors
data2 = pd.read_csv("/kaggle/input/emg-4/2.csv", header=None) # For class 2: paper
data3 = pd.read_csv("/kaggle/input/emg-4/3.csv", header=None) # For class 3: ok

# 8 consecutive readings of all 8 sensors which is why 64 columns plus last column is the class = 65 columns

# Now, we will combine all the dataset into 1 big dataset
data = pd.concat([data0,data1,data2,data3], axis=0)
data.shape

In [None]:
# Split into X and Y
Y = data.iloc[:,-1]
X = data.drop(data.columns[-1], axis=1)

# Now, train test split
X_train, Xtest, Y_train, Ytest = train_test_split(X, Y, train_size=0.8, random_state=10)

## Using different models for classification

### 1) Linear Discriminant Analysis

In [None]:
lda = LinearDiscriminantAnalysis()
y_pred = lda.fit(X_train, Y_train).predict(Xtest)
f1_lda = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Linear Discriminant Analysis Classifier is", f1_lda)

### 2) Quadratic Discriminant Analysis

In [None]:
qda = QuadraticDiscriminantAnalysis()
y_pred = qda.fit(X_train, Y_train).predict(Xtest)
f1_qda = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Quadratic Discriminant Analysis Classifier is", f1_qda)

### 3) Naive Bayes

In [None]:
gnb = GaussianNB()
y_pred = gnb.fit(X_train, Y_train).predict(Xtest)
f1_nb = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Naive Bayes Classifier is", f1_nb)

### 4) Random Forest

In [None]:
rfc=RandomForestClassifier(random_state=100)

param_grid = { 
    'n_estimators': [200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8,9,10],
    'criterion' :['gini', 'entropy']
}

CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train, Y_train)

In [None]:
CV_rfc.best_params_

In [None]:
# Best parameters after tuning:
rf = RandomForestClassifier(random_state=100, n_estimators=500, criterion='gini', max_depth=10, max_features='log2')
y_pred = rf.fit(X_train, Y_train).predict(Xtest)
f1_rf = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Random Forest Classifier is", f1_rf)

### Support Vector Classifier

In [None]:
svc=SVC(random_state=100)

param_grid = { 
    'C': [0.01, 0.1, 1],
    'kernel': ['linear','rbf'],
}

CV_svc = GridSearchCV(estimator=svc, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train, Y_train)

In [None]:
CV_svc.best_params_

In [None]:
# Best parameters after tuning:
rf = SVC(random_state=100, C=1, kernel="rbf")
y_pred = rf.fit(X_train, Y_train).predict(Xtest)
f1_svc = f1_score(Ytest, y_pred, average='micro')
print("F1 Score for Support Vector Classifier is", f1_svc)

## Evaluating all models

In [None]:
# Dataframe to contain model results
model_results = pd.DataFrame(columns=["Models","F1 Score"])

# LDA
model_results = model_results.append(pd.DataFrame({"Models":"Linear Discriminant Analysis", 
                                  "F1 Score":f1_lda}, index = [0]), ignore_index = False)
# QDA
model_results = model_results.append(pd.DataFrame({"Models":"Quadratic Discriminant Analysis", 
                                  "F1 Score":f1_qda}, index = [1]), ignore_index = False)
# Naive Bayes
model_results = model_results.append(pd.DataFrame({"Models":"Naive Bayes", 
                                  "F1 Score":f1_nb}, index = [2]), ignore_index = False)
# Random Forest
model_results = model_results.append(pd.DataFrame({"Models":"Random Forest", 
                                  "F1 Score":f1_rf}, index = [3]), ignore_index = False)
# Support Vector Classifier
model_results = model_results.append(pd.DataFrame({"Models":"Support Vector Classifier", 
                                  "F1 Score":f1_svc}, index = [4]), ignore_index = False)

model_results.sort_values(by="F1 Score", ascending = False)