In [1]:
import numpy as np
import pandas as pd

In [2]:
import pandas as pd
import glob
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [3]:
class FCLayer:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(input_size, output_size) / np.sqrt(input_size + output_size)
        self.bias = np.random.randn(1, output_size) / np.sqrt(input_size + output_size)

    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.bias

    def backward(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # print((learning_rate * weights_error).shape, self.weights.shape)
        # bias_error = output_error
        self.weights -= (learning_rate * weights_error).reshape(self.weights.shape)
        self.bias -= learning_rate * output_error
        return input_error

In [4]:
class ActivationLayer:
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(input)

    def backward(self, output_error, learning_rate):
        return output_error * self.activation_prime(self.input)

In [5]:
# bonus
class FlattenLayer:
    def __init__(self, input_shape):
        self.input_shape = input_shape

    def forward(self, input):
        return np.reshape(input, (1, -1))

    def backward(self, output_error, learning_rate):
        return np.reshape(output_error, self.input_shape)

In [6]:
# bonus
class SoftmaxLayer:
    def __init__(self, input_size):
        self.input_size = input_size

    def forward(self, input):
        self.input = input
        tmp = np.exp(input)
        self.output = tmp / np.sum(tmp)
        return self.output

    def backward(self, output_error, learning_rate):
        input_error = np.zeros(output_error.shape)
        out = np.tile(self.output.T, self.input_size)
        return self.output * np.dot(output_error, np.identity(self.input_size) - out)

In [7]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return np.exp(-x) / (1 + np.exp(-x))**2

def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x)**2

def relu(x):
    return np.maximum(x, 0)

def relu_prime(x):
    return np.array(x >= 0).astype('int')

In [8]:
def binary_cross_entropy(y_true, y_pred):
    return -np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
    return (y_pred - y_true) / (y_pred * (1 - y_pred))


In [9]:
# Load and label EMG files
def feature_ext(df,w=200,p=20):
    df = np.array(df)
    # Performs segmentation in windows
    X=[]
    for k in range(0, df.shape[0] - w + 1 ,w):

        d = df[k:k+w,:]
        d = np.abs(d) # Absolute value of the EMG data
        # Moving average filter for each channel
        for n in range(d.shape[1]-1):
            d[:,n] = np.convolve(d[:,n], np.ones(p)/p, mode='same')
        X.append(d)
    X = np.sqrt(np.sum(np.array(X)**2, axis=2))
    X = pd.DataFrame(X)

    return X

In [10]:
path_normal = "/kaggle/input/emg-physical-action-data-set/sub/Normal/*.csv"
path_aggressive = "/kaggle/input/emg-physical-action-data-set/sub/Aggressive/*.csv"
idx = 0
df_normal = pd.DataFrame()
for file in glob.glob(path_normal):
    prev_df = pd.read_csv(file)
    df_normal = pd.concat([df_normal,feature_ext(prev_df.drop("label",axis=1))],ignore_index=True)
    
df_aggressive = pd.DataFrame()
for file in glob.glob(path_aggressive):
    prev_df = pd.read_csv(file)
    df_aggressive = pd.concat([df_aggressive,feature_ext(prev_df.drop("label",axis=1))],ignore_index=True)
    

In [11]:
df_normal['target'] = np.zeros(len(df_normal))
df_aggressive['target'] = np.ones(len(df_aggressive))
df_aggressive.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,191,192,193,194,195,196,197,198,199,target
0,1613.077099,1761.820974,1858.121919,1917.632966,1960.344861,2015.945332,2034.698189,2054.740918,2083.145548,2092.393935,...,1814.3297,1715.825755,1627.193037,1944.59328,2569.425111,2491.570648,2211.111723,2446.314967,2271.141753,1.0
1,2173.715139,1991.830976,1912.160885,1970.351327,1964.319021,2028.878703,1907.999416,1896.255612,1880.313516,1932.54385,...,2086.932191,1958.329936,1831.052254,1726.669425,1657.383428,1614.858352,1609.760111,1227.762073,1005.199643,1.0
2,1340.707706,1503.88087,1613.133714,1710.53128,1776.585696,1834.681824,1933.401075,2071.217367,2232.582119,2400.844541,...,2280.839564,2350.654067,2445.286256,2494.98961,2483.953315,2306.748783,2179.629057,2039.503687,2028.868453,1.0
3,2142.546472,2239.617596,2353.998042,2383.500828,2479.851887,2485.464598,2448.767027,2416.537488,2504.919324,2642.183248,...,1830.942167,1772.81431,1689.333163,1571.362339,1524.256004,1372.540173,1302.707069,1560.03301,1451.660897,1.0
4,1105.654763,1128.813065,1223.269802,1299.993353,1389.386783,1404.9831,1363.73725,1354.834502,1636.607119,2200.734369,...,2317.175367,2272.057628,2220.011893,2160.427999,2098.847552,2060.752815,1961.207279,1775.55594,1631.07256,1.0


In [12]:
# stacking teh datasets on top of each other

df = pd.concat([df_normal ,df_aggressive])

# shuffle
df = df.sample(frac=1)


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 956 entries, 136 to 306
Columns: 201 entries, 0 to target
dtypes: float64(201)
memory usage: 1.5 MB


In [14]:
def normalize(x: np.ndarray):
    return (x - np.min(x)) / (np.max(x) - np.min(x))

In [15]:
X = df.drop(columns=['target']).values
X = normalize(X)
y = df['target'].values.astype('int')

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, shuffle=True)

In [17]:
def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

In [18]:
# unlike the Medium article, I am not encapsulating this process in a separate class
# I think it is nice just like this
network = [
    FCLayer(200, 512),
    ActivationLayer(relu, relu_prime),
    FCLayer(512, 1)
]

epochs = 40
learning_rate = 0.001

# training
for epoch in range(epochs):
    error = 0
    for x, y_true in zip(X_train, y_train):
        # forward
        x = x.reshape(1, -1)
        y_true = y_true.reshape(1, -1)
        output = x
        for layer in network:
            output = layer.forward(output)

        output = sigmoid(output)
        # error (display purpose only)
        error += binary_cross_entropy(y_true, output[0])

        # backward
        output_error = binary_cross_entropy_prime(y_true, output[0])

        output_error = sigmoid_prime(output[0]) * output_error

        for layer in reversed(network):
            # if not isinstance(layer, ActivationLayer):
            #     print(layer.bias.shape, layer.weights.shape)
            output_error = layer.backward(output_error, learning_rate)

    # evaluate
    y_pred = []
    for x in X_test:
        output = x
        for layer in network:
            output = layer.forward(output)
        output = sigmoid(output)
        y_pred.append(output[0] > 0.5)
    y_pred = np.array(y_pred).reshape(-1)
    print('test accuracy=%f' % accuracy(y_test, y_pred), end=', ')

    error /= len(X_train)
    print('%d/%d, error=%f' % (epoch + 1, epochs, error))

test accuracy=0.817708, 1/40, error=0.653321
test accuracy=0.885417, 2/40, error=0.618964
test accuracy=0.890625, 3/40, error=0.601106
test accuracy=0.890625, 4/40, error=0.585440
test accuracy=0.890625, 5/40, error=0.571474
test accuracy=0.885417, 6/40, error=0.558856
test accuracy=0.880208, 7/40, error=0.547372
test accuracy=0.875000, 8/40, error=0.536893
test accuracy=0.875000, 9/40, error=0.527265
test accuracy=0.854167, 10/40, error=0.518464
test accuracy=0.848958, 11/40, error=0.510384
test accuracy=0.848958, 12/40, error=0.502834
test accuracy=0.848958, 13/40, error=0.495665
test accuracy=0.843750, 14/40, error=0.489094
test accuracy=0.833333, 15/40, error=0.483142
test accuracy=0.817708, 16/40, error=0.477842
test accuracy=0.817708, 17/40, error=0.473126
test accuracy=0.812500, 18/40, error=0.468930
test accuracy=0.812500, 19/40, error=0.465335
test accuracy=0.807292, 20/40, error=0.462246
test accuracy=0.802083, 21/40, error=0.459674
test accuracy=0.802083, 22/40, error=0.4575