## Generic Timeseries SageMaker Template with Gluon

This is a template to run the human activity recognition notebook. Refer the `smartphone_human_activity_classification_gluon.ipynb` for non sagemaker version

In [1]:
import os
import boto3
import sagemaker
from sagemaker.mxnet import MXNet
from mxnet import gluon
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

role = get_execution_role()

### Load and Package Data

1. Load your train and test data as numpy arrays
2. Package data as a pickle file and upload to S3

by doing this, we can use the generic_ts.py file to run any timeseries classification task with SageMaker

In [2]:
import csv
import numpy as np

def get_labels_from_csv(path):
    values = []
    with open(path, 'rb') as csvfile:
        rd = csv.reader(csvfile, delimiter=',')
        for row in rd:
            values.append(float(row[0]))
    return np.array(values).astype('float32')


INPUT_SIGNAL_TYPES = [
    "body_acc_x_",
    "body_acc_y_",
    "body_acc_z_",
    "body_gyro_x_",
    "body_gyro_y_",
    "body_gyro_z_",
    "total_acc_x_",
    "total_acc_y_",
    "total_acc_z_"
]

LABELS = [
    "WALKING",
    "WALKING_UPSTAIRS",
    "WALKING_DOWNSTAIRS",
    "SITTING",
    "STANDING",
    "LAYING"
]


path = '../data'

train = [path + "/har_data/train/%strain.txt" % signal for signal in INPUT_SIGNAL_TYPES]
test = [path + "/har_data/test/%stest.txt" % signal for signal in INPUT_SIGNAL_TYPES]


def load_data(files):
    arr = []
    for fname in files:
        with open(fname, 'r') as f:
            rows = [row.replace('  ', ' ').strip().split(' ') for row in f]
            arr.append([np.array(ele, dtype=np.float32) for ele in rows])
    return np.transpose(np.array(arr), (1, 2, 0))

In [3]:
X_train = load_data(train)
X_test = load_data(test)

X_train.shape, X_test.shape

((7352, 128, 9), (2947, 128, 9))

In [4]:
y_train_path = path + "/har_data/train/y_train.txt"
y_train = get_labels_from_csv(y_train_path)

y_test_path = path + "/har_data/test/y_test.txt"
y_test = get_labels_from_csv(y_test_path)

In [5]:
y_train = y_train - 1
y_test = y_test - 1

print(min(y_train), max(y_train), min(y_test), max(y_test))

(0.0, 5.0, 0.0, 5.0)


In [6]:
%%bash
mkdir pkl_data
cd pkl_data
mkdir train
mkdir test

In [7]:
import pickle
pickle.dump([X_train, y_train], open('pkl_data/train/train.pkl', 'wb'))
pickle.dump([X_test, y_test], open('pkl_data/test/test.pkl', 'wb'))

In [8]:
X_t, y_t = pickle.load(open('pkl_data/test/test.pkl', "rb"))
#X_t, y_t = pickle.load(open('test.pkl', "rb"))
X_t.shape, y_t.shape

((2947, 128, 9), (2947,))

--- end of data transformation, loading and packaging ----

## Uploading the data

We use the `sagemaker.Session.upload_data` function to upload our datasets to an S3 location. The return value `inputs` identifies the location -- we will use this later when we start the training job.

In [9]:
inputs = sagemaker_session.upload_data(path='pkl_data', key_prefix='data/har_pkl')

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-148886336128


## execute cell below to view the code

In [100]:
!cat generic_ts.py

'''
TimeSeries Classificataion SageMaker Template 
'''

from __future__ import print_function

import logging
import mxnet as mx
from mxnet import gluon, autograd, nd
from mxnet.gluon import nn
import numpy as np
import json
import pickle

logging.basicConfig(level=logging.DEBUG)

import os
def find_file(root_path, file_name):
    for root, dirs, files in os.walk(root_path):
        if file_name in files:
            return os.path.join(root, file_name)

def detach(hidden):
    if isinstance(hidden, (tuple, list)):
        hidden = [i.detach() for i in hidden]
    else:
        hidden = hidden.detach()
    return hidden

class BaseRNNClassifier(mx.gluon.Block):
    '''
    Exensible RNN class with LSTM that can operate with MXNet NDArray iter or DataLoader.
    Includes fit() function to mimic the symbolic fit() function
    '''
    
    @classmethod
    def get_data(cls, batch, iter_type, ctx):
        ''' get data and label from the iterator/datal

## Run the training script on SageMaker

The ```MXNet``` class allows us to run our training function on SageMaker infrastructure. We need to configure it with our training script, an IAM role, the number of training instances, and the training instance type. In this case we will run our training job on a single m4.xlarge instance. 

In [117]:
m = MXNet("generic_ts.py", 
          role=role, 
          train_instance_count=1, 
          train_instance_type="ml.m4.xlarge",
          hyperparameters={'batch_size': 32, 
                           'epochs': 4, 
                           'n_out': len(LABELS),
                           'rnn_size': 64,
                           'n_layer': 1,
                           'num_gpus': 0
                          })

After we've constructed our `MXNet` object, we can fit it using the data we uploaded to S3. SageMaker makes sure our data is available in the local filesystem, so our training script can simply read the data from disk.


In [118]:
m.fit(inputs)

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-148886336128
INFO:sagemaker:Creating training-job with name: sagemaker-mxnet-2018-05-24-23-36-48-789


..................
[31m2018-05-24 23:39:31,613 INFO - root - running container entrypoint[0m
[31m2018-05-24 23:39:31,613 INFO - root - starting train task[0m
[31m2018-05-24 23:39:31,620 INFO - container_support.training - Training starting[0m
[31m2018-05-24 23:39:33,811 INFO - mxnet_container.train - MXNetTrainingEnvironment: {'enable_cloudwatch_metrics': False, 'available_gpus': 0, 'channels': {u'training': {u'TrainingInputMode': u'File', u'RecordWrapperType': u'None', u'S3DistributionType': u'FullyReplicated'}}, '_ps_verbose': 0, 'resource_config': {u'current_host': u'algo-1', u'hosts': [u'algo-1']}, 'user_script_name': u'generic_ts.py', 'input_config_dir': '/opt/ml/input/config', 'channel_dirs': {u'training': u'/opt/ml/input/data/training'}, 'code_dir': '/opt/ml/code', 'output_data_dir': '/opt/ml/output/data/', 'output_dir': '/opt/ml/output', 'model_dir': '/opt/ml/model', 'hyperparameters': {u'sagemaker_program': u'generic_ts.py', u'rnn_size': 64, u'num_gpus': 0, u'n_layer': 

## Deploy the trained model to prepare for predictions

The deploy() method creates an endpoint which serves prediction requests in real-time.

In [119]:
predictor = m.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

INFO:sagemaker:Creating model with name: sagemaker-mxnet-2018-05-24-23-36-48-789
INFO:sagemaker:Creating endpoint with name sagemaker-mxnet-2018-05-24-23-36-48-789


---------------------------------------------------------------!

## Make a prediction with data to verify the endpoint is up


In [120]:
preds = predictor.predict(X_t[0:2000:100])
print('Prediction results:')
print(preds)
print('Ground truth results:')
print(y_t[0:2000:100].tolist())

Prediction results:
[4.0, 0.0, 4.0, 1.0, 0.0, 3.0, 1.0, 0.0, 3.0, 2.0, 0.0, 4.0, 2.0, 0.0, 3.0, 1.0, 5.0, 4.0, 0.0, 3.0]
Ground truth results:
[4.0, 0.0, 3.0, 1.0, 0.0, 3.0, 1.0, 0.0, 3.0, 2.0, 0.0, 3.0, 1.0, 0.0, 3.0, 1.0, 5.0, 4.0, 0.0, 3.0]


# Cleaning up
To avoid incurring charges to your AWS account for the resources used in this tutorial you need to delete the **SageMaker Endpoint:**

In [121]:
sagemaker.Session().delete_endpoint(predictor.endpoint)

INFO:sagemaker:Deleting endpoint with name: sagemaker-mxnet-2018-05-24-23-36-48-789
