In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from scipy.stats import skew, kurtosis
from statsmodels.tsa import stattools
from sklearn.model_selection import train_test_split

In [2]:
########################################################################################
#                                                                                      #
# Extract Data Mentah Menjadi Data Klasifikasi                                         #
# https://github.com/nlathia/pydata_2016/blob/master/Learn/Accelerometer-Explore.ipynb #
#                                                                                      #
########################################################################################

In [3]:
# https://www.sciencedirect.com/science/article/pii/S0893608014000203
# Page 2 Sub-title 3.1 Acceleration synthesization

def magnitude(activity):    
    x2 = activity['xAxis'] * activity['xAxis']
    y2 = activity['yAxis'] * activity['yAxis']
    z2 = activity['zAxis'] * activity['zAxis']
    m2 = x2 + y2 + z2
    m = m2.apply(lambda x: math.sqrt(x))
    return m

In [4]:
def jitter(axis, start, end):
    j = float(0)
    for i in range(start, min(end, axis.count())):
        if start != 0:
            j += abs(axis[i] - axis[i-1])
    return j / (end-start)

In [5]:
def mean_crossing_rate(axis, start, end):
    cr = 0
    m = axis.mean()
    for i in range(start, min(end, axis.count())):
        if start != 0:
            p = axis[i-1] > m
            c = axis[i] > m
            if p != c:
                cr += 1
    return float(cr) / (end-start-1)

In [6]:
def hidden_window_summary(axis, start, end):
    acf = stattools.acf(axis[start:end])      # https://en.wikipedia.org/wiki/Autocorrelation
    acv = stattools.acovf(axis[start:end])    # https://en.wikipedia.org/wiki/Autocovariance
    sqd_error = (axis[start:end] - axis[start:end].mean()) ** 2
    return [
        jitter(axis, start, end),             # Jitter (https://en.wikipedia.org/wiki/Jitter)
        mean_crossing_rate(axis, start, end), # Mean Crossing Rate
        axis[start:end].mean(),               # Mean
        axis[start:end].std(),                # Standard Deviation (https://en.wikipedia.org/wiki/Standard_deviation)
        axis[start:end].var(),                # Variance (https://en.wikipedia.org/wiki/Variance)
        axis[start:end].min(),                # Minimum
        axis[start:end].max(),                # Maximum
        acf.mean(),                           # mean auto correlation
        acf.std(),                            # standard deviation auto correlation
        acv.mean(),                           # mean auto covariance
        acv.std(),                            # standard deviation auto covariance
        skew(axis[start:end]),                # skewness (https://en.wikipedia.org/wiki/Skewness)
        kurtosis(axis[start:end]),            # kurtosis (https://en.wikipedia.org/wiki/Kurtosis)
        math.sqrt(sqd_error.mean())           # Square Root Deviation
    ]

In [7]:
def hidden_windows(df, size=100):
    start = 0
    while start < df.count():
        yield start, start + size
        start += (size / 2)

In [8]:
def features(activity):
    for (start, end) in hidden_windows(activity['timestamp']):
        features = []
        for axis in ['xAxis', 'yAxis', 'zAxis', 'magnitude']:
            features += hidden_window_summary(activity[axis], int(start), int(end))
        yield features

In [9]:
# 3 fungsi dibawah hanya untuk menampilkan grafik.
# Tidak berpengaruh terhadap data.

def plot_axis(ax, x, y, title):
    ax.plot(x, y)
    ax.set_title(title)
    ax.xaxis.set_visible(False)
    ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
    ax.set_xlim([min(x), max(x)])
    ax.grid(True)

def plot_activity(activity):
    fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, figsize=(15, 10), sharex=True)
    plot_axis(ax0, activity['timestamp'], activity['xAxis'], 'x Axis')
    plot_axis(ax1, activity['timestamp'], activity['yAxis'], 'y Axis')
    plot_axis(ax2, activity['timestamp'], activity['zAxis'], 'z Axis')
    plt.subplots_adjust(hspace=0.2)
    plt.show()

def plot_magnitudes(activities, titles, hidden_inputdow=False):
    fig, axs = plt.subplots(nrows=len(activities), figsize=(15, 15))
    for i in range(0, len(activities)):
        plot_axis(axs[i], activities[i]['timestamp'], activities[i]['magnitude'], titles[i])
        if hidden_inputdow == True:
            for (start, end) in hidden_inputdows(activities[i]['timestamp']):
                axs[i].axvline(activities[i]['timestamp'][start], color='r')
    plt.subplots_adjust(hspace=0.2)
    plt.show()

In [10]:
# Contoh isi dari file (Train_1462518004872.csv)
#
# timestamp         xAxis       yAxis       zAxis
# 1462518004907     0.9163807   5.911284    7.6027875
# 1462518004907     0.9163807   5.911284    7.6027875
# 1462518004908     0.9163807   5.911284    7.6027875
# 1462518004908     0.9163807   5.911284    7.6027875
# 1462518004926     0.8822633   5.9561753   7.6943655
# ...
# 1462518115208     2.926313    4.7734394   7.647679
# 1462518115228     3.0298622   4.8895583   7.8547773
# 1462518115250     3.0310593   4.819528    7.8446016
# 1462518115275     2.9005754   4.6124296   7.752425
# 1462518115293     2.7491422   4.648941    7.7159133

COLUMNS = ['timestamp', 'xAxis', 'yAxis', 'zAxis']
data_mentah = pd.read_csv('Train_1462518004872.csv', header=None, names=COLUMNS)

In [11]:
# Pastikan sebelum meng-ekstrak, terlebih dahulu sudah ada data
# timestamp, xAxis, yAxis, zAxis, magnitude

data_mentah['magnitude'] = magnitude(data_mentah)     # tambah magnitude
data_test = list(features(data_mentah))               # hasil ekstrak

In [12]:
#############################################################################
#                                                                           #
# Extreme Learning Machine                                                  #
# https://www.kaggle.com/robertbm/extreme-learning-machine-example/notebook #
#                                                                           #
#############################################################################

In [13]:
# Contoh isi dari file (Dataset_Erick.csv)
#
# labels, ...x_train
#      0, 0.0, 0.0, 0.24989480049999999, ..., 0.01244791475234841
#      0, 0.0110193128, 0.4444444444444444, 0.24805126300000002, ..., 0.011340507786998051
#      0, 0.0092116902999999951, 0.37373737373737376, 0.24554333720000002, ..., 0.008624816223606722
#      0, 0.0083677324999999973, 0.35353535353535354, 0.24606407550000001, ..., 0.008724399979249698
#      0, 0.010606313999999999, 0.37373737373737376, 0.24699781529999998, ..., 0.009699753201686066
#     ...
#      4, 0.062309105999999975, 0.0, 2.8826008860000001, ..., 0.1189263958036313
#      4, 0.076093709000000023, 0.0, 2.903155097, ..., 0.1691570059978957
#      4, 0.067797800000000019, 0.0, 2.8340883649999999, ..., 0.18924480349138034
#      4, 0.042239707000000008, 0.0, 2.7803984030000004, ..., 0.16544506485620625
#      4, 0.017316066000000001, 0.0, 2.7760050519999999, ..., 0.16194180366751312

data_set = np.loadtxt('Dataset_Erick.csv', delimiter=",")
x_train = data_set[:, 1:] # 2D Array = mengambil data dari Dataset_Erick dimulai dari index 1 sampai ke belakang
labels = data_set[:, 0]   # 1D Array = mengambil data dari Dataset_Erick hanya mengambil index 0

In [14]:
# Kita ada 5 aktivitas / classes (0 = STANDING, 1 = WALKING, 2 = RUNNING, 3 = STAIRS, 4 = ON_TRAIN)
# Kita membuat array 2D dengan size n x m (n = banyak dataset, m = banyak aktivitas) dan diisi dengan angka 0

CLASSES = 5                                    
y_train = np.zeros([len(labels), CLASSES])     

In [15]:
# Setiap dataset, index aktivitas diberi angka 1 

for i in range(len(labels)):                   
    y_train[i][int(labels[i])] = 1

In [16]:
INPUT_LENGHT = x_train.shape[1] # ambil panjang array 
HIDDEN_UNITS = 1000

# random data dengan array 2D dengan size INPUT_LENGHT x HIDDEN_UNITS
hidden_input = np.random.normal(size=[INPUT_LENGHT, HIDDEN_UNITS]) 

In [17]:
def input_to_hidden(x):
    a = np.dot(x, hidden_input)     # perkalian matrix
    a = np.maximum(a, 0, a)         # ReLU
    return a

# Referensi ReLU:
# 1. https://en.wikipedia.org/wiki/Rectifier_(neural_networks)

# 2. https://www.kaggle.com/dansbecker/rectified-linear-units-relu-in-deep-learning

In [18]:
# Rumus:
# https://www.sciencedirect.com/science/article/pii/S0893608014000203
# Page 3 Sub-title 3.3 RKELM-based recognition model construction
#
# NB: Ini hanya ELM, sehingga kita akan membuang (1 / lambda) pada rumus

K = input_to_hidden(x_train)
Kt = np.transpose(K)

hasil_1 = np.linalg.pinv(np.dot(Kt, K)) # (Kt x K) ^ -1
hasil_2 = np.dot(Kt, y_train)           # (Kt x T)
B = np.dot(hasil_1, hasil_2)

In [19]:
# Memulai mengklasifikasi

def predict(x):
    x = input_to_hidden(x)
    y = np.dot(x, B)
    return y

p = predict(data_test) # data_test diambil di Line In [11]

correct = 0
total = p.shape[0]
clasification = np.zeros([total])

for i in range(total):
    clasification[i] = np.argmax(p[i])

In [20]:
# Cetak Hasil Klasifikasi
clasification

array([4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
       4., 4., 4., 4., 4., 4., 4., 0., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
       4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
       4., 4.])

In [21]:
# Beberapa link tentang fungsi2 yang digunakan:
# 1. np.dot (https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.dot.html)
# 2. np.std (https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.std.html)
# 3. np.zeros (https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.zeros.html)
# 4. np.argmax (https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.argmax.html)
# 5. np.loadtxt (https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.loadtxt.html)
# 6. np.maximum (https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.maximum.html)
# 7. np.transpose (https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.transpose.html)
# 8. np.linalg.pinv (https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.linalg.pinv.html)
# 9. np.random.normal (https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.random.normal.html)