# Data import and preparation

In [None]:
import pandas as pd
import numpy as np

# Convert a dataset into a windowed format
# For example:
# window = 2
# dataset =
# [
#  [1, 2, 3],
#  [2, 3, 4],
#  [5, 6, 7]
# ]
#
# Will result in:
# [
#  [0, 0, 0, 0, 0, 0, 1, 2, 3],
#  [0, 0, 0, 1, 2, 3, 2, 3, 4],
#  [1, 2, 3, 2, 3, 4, 5, 6, 7]
# ]
def prepare_dataset(dataset, window):
    windowed_data = []
    # TODO append the first "window" rows padded with 0s
    for i in range(len(dataset)-window-1):
        observation = dataset[i:(i+window),]
        windowed_data.append(observation)
    return np.array(windowed_data)

def load_prepared_dataset(path, window):
    # Read training data into memory
    data_raw = pd.read_csv(path)
    
    # Select training columns
    data_selected_raw = data_raw[[" LinAccX (g)"]].as_matrix()
    
    return data_raw, prepare_dataset(data_selected_raw, window)
    
# How long 
window = 20

# Load data used for training
data_train_raw, data_train = load_prepared_dataset('resources/normal_20170202_2229.csv', window)
# Load data used for testing/validating
data_validate_raw, data_validate = load_prepared_dataset('resources/verify_20170202_2243.csv', window)

# Number of columns = number of features * window
features = data_train.shape[1] / window

# Data visualization

In [None]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt
from IPython.core.pylabtools import figsize
figsize(16, 7)

plt.plot(list(range(len(data_train_raw[" LinAccX (g)"]))), data_train_raw[" LinAccX (g)"], "go")
plt.ylabel('LinAccX')
plt.xlabel('Time')
plt.show()

In [None]:
plt.plot(list(range(len(data_validate_raw[" LinAccX (g)"]))), data_validate_raw[" LinAccX (g)"], "go")
plt.ylabel('LinAccX')
plt.show()

# Multi layer perceptron

In [None]:
from __future__ import print_function
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon

In [None]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()
# For machines with GPU - use this 
# model_ctx = mx.gpu(0)

In [None]:
batch_size = 64
num_inputs = features
num_outputs = features
num_examples = data_train.shape[0]

data_train_mxnet = mx.gluon.data.DataLoader(data_train, batch_size)

In [None]:
num_hidden = 64
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))

In [None]:
net.collect_params().initialize(mx.init.Normal(sigma=.1), ctx=model_ctx)

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .01})

In [None]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, data in enumerate(data_iterator):
        data = data.as_in_context(model_ctx).reshape((-1, features))
        output = net(data)
        # TODO make labels = np.array_fill(data.shape[0], 0)
        # predictions = output - data 
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [None]:
epochs = 10
smoothing_constant = .01

for e in range(epochs):
    cumulative_loss = 0
    for i, data in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, data)
            loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()

In [None]:
# TODO do predictions on all the data
# Get the errors
# Get threshold as sd(errors)

In [None]:
# TODO do predictions on all the test data
# Check against threshold for anomalies
# plot

# Long-short term memory

In [None]:
layer = mx.gluon.rnn.LSTM(features, 3)
layer.initialize()
output = layer(data_train)