In [51]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import metrics

import os

# Useful Constants

# Those are separate normalised input features for the neural network
INPUT_SIGNAL_TYPES = [
    "body_acc_x_",
    "body_acc_y_",
    "body_acc_z_",
    "body_gyro_x_",
    "body_gyro_y_",
    "body_gyro_z_",
    "total_acc_x_",
    "total_acc_y_",
    "total_acc_z_"
]

# Output classes to learn how to classify
LABELS = [
    "WALKING", 
    "WALKING_UPSTAIRS", 
    "WALKING_DOWNSTAIRS", 
    "SITTING", 
    "STANDING", 
    "LAYING"
] 


# ## Let's start by downloading the data: 

# In[3]:


import os
# Note: Linux bash commands start with a "!" inside those "ipython notebook" cells

DATA_PATH = "./"
get_ipython().system('python download_dataset.py')
DATASET_PATH = DATA_PATH + "UCI HAR Dataset/"
print("\n" + "Dataset is now located at: " + DATASET_PATH)


# ## Preparing dataset:


TRAIN = "train/"
TEST = "test/"


# Load "X" (the neural network's training and testing inputs)

def load_X(X_signals_paths):
    X_signals = []
    
    for signal_type_path in X_signals_paths:
        file = open(signal_type_path, 'r')
        # Read dataset from disk, dealing with text files' syntax
        X_signals.append(
            [np.array(serie, dtype=np.float32) for serie in [
                row.replace('  ', ' ').strip().split(' ') for row in file
            ]]
        )
        file.close()
    
    return np.transpose(np.array(X_signals), (1, 2, 0))

X_train_signals_paths = [
    DATASET_PATH + TRAIN + "Inertial Signals/" + signal + "train.txt" for signal in INPUT_SIGNAL_TYPES
]
X_test_signals_paths = [
    DATASET_PATH + TEST + "Inertial Signals/" + signal + "test.txt" for signal in INPUT_SIGNAL_TYPES
]

X_train = load_X(X_train_signals_paths)
X_test = load_X(X_test_signals_paths)


# Load "y" (the neural network's training and testing outputs)

def load_y(y_path):
    file = open(y_path, 'r')
    # Read dataset from disk, dealing with text file's syntax
    y_ = np.array(
        [elem for elem in [
            row.replace('  ', ' ').strip().split(' ') for row in file
        ]], 
        dtype=np.int32
    )
    file.close()
    
    # Substract 1 to each output class for friendly 0-based indexing 
    return y_ - 1

y_train_path = DATASET_PATH + TRAIN + "y_train.txt"
y_test_path = DATASET_PATH + TEST + "y_test.txt"

y_train = load_y(y_train_path)
y_test = load_y(y_test_path)


# ## Additionnal Parameters:
# 
# Here are some core parameter definitions for the training. 
# 
# The whole neural network's structure could be summarised by enumerating those parameters and the fact an LSTM is used. 
# dropout=0.2, recurrent_dropout=0.2
# nb_epoch=5, batch_size=64
# 
# Keras LSTM with dropout is used.
# A convolutional layer and max pooling can also be tested, before the LSTM cell.

# In[5]:



# Some debugging info

print("Some useful info to get an insight on dataset's shape and normalisation:")
print("(X shape, y shape, every X's mean, every X's standard deviation)")
print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test))
print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")


Downloading...
Dataset already downloaded. Did not download twice.

Extracting...
Dataset already extracted. Did not extract twice.


Dataset is now located at: ../data/UCI HAR Dataset/
Some useful info to get an insight on dataset's shape and normalisation:
(X shape, y shape, every X's mean, every X's standard deviation)
(2947, 128, 9) (2947, 1) 0.09913992 0.39567086
The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.


In [52]:
y_train

array([[4],
       [4],
       [4],
       ...,
       [1],
       [1],
       [1]])

In [40]:
# process batches of 10 days
BATCH_SIZE =64
X=dict()
y=dict()
X['train']=X_train
y['train']=y_train
X['test']=X_test
y['test']=y_test
def next_batch(x, y, ds):
    """get the next batch for training"""

    def as_batch(data, start, count):
        return data[start:start + count]

    for i in range(0, len(x[ds]), BATCH_SIZE):
        yield as_batch(X[ds], i, BATCH_SIZE), as_batch(y[ds], i, BATCH_SIZE)

In [42]:
import time
EPOCHS =10
#Specify the internal-state dimensions of the LSTM cell
H_DIMS = 128
def create_model(features, num_classes, H_DIMS):
    """Create the model for time series prediction"""
    with C.layers.default_options(initial_state = 0.1):
        m = C.layers.Recurrence(C.layers.LSTM(H_DIMS))(features)
        m = C.sequence.last(m)
        m = C.layers.Dropout(0.2)(m)
        m = C.layers.Dense(num_classes)(m)
        return m
# Input variables denoting the features and label data
features = C.sequence.input_variable(shape=input_dim, is_sparse=True)

# Instantiate the sequence classification model
z = create_model(features, num_classes, H_DIMS)

# input sequences
features = C.sequence.input_variable(9)


# expected output (label), also the dynamic axes of the model output
# is specified as the model of the label input
label = C.input_variable(num_classes, dynamic_axes=z.dynamic_axes, name="y")

# the learning rate
learning_rate = 0.005
lr_schedule = C.learning_parameter_schedule(learning_rate)

# loss function
ce = C.cross_entropy_with_softmax(z, label)
pe = C.classification_error(z, label)


# use adam optimizer
momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE)
learner = C.fsadagrad(z.parameters, 
                      lr = lr_schedule, 
                      momentum = momentum_schedule)
trainer = C.Trainer(z, (ce, pe), [learner])
# training
loss_summary = []

start = time.time()
for epoch in range(0, EPOCHS):
    for x_batch, l_batch in next_batch(X, y, "train"):
        trainer.train_minibatch({features: x_batch, label: l_batch})
        
    if epoch % (EPOCHS / 10) == 0:
        training_loss = trainer.previous_minibatch_loss_average
        loss_summary.append(training_loss)
        print("epoch: {}, loss: {:.4f}".format(epoch, training_loss))

print("Training took {:.1f} sec".format(time.time() - start))

  elif np.issubdtype(sample.dtype, int):
  (sample.dtype, var.uid, str(var.dtype)))


ValueError: Values for 1 required arguments 'Input('Input27565', [#, *], [9])', that the requested output(s) 'Output('aggregateLoss', [], []), Output('Block28362_Output_0', [#], [1]), Output('aggregateEvalMetric', [], [])' depend on, have not been provided.

[CALL STACK]
    > CNTK::Internal::  UseSparseGradientAggregationInDataParallelSGD
    - CNTK::Function::  Forward
    - CNTK::  CreateTrainer
    - CNTK::Trainer::  TotalNumberOfUnitsSeen
    - CNTK::Trainer::  TrainMinibatch (x2)
    - PyInit__cntk_py (x2)
    - PyEval_EvalFrameDefault
    - Py_CheckFunctionResult
    - PyObject_CallFunctionObjArgs
    - PyEval_EvalFrameDefault
    - Py_CheckFunctionResult
    - PyObject_CallFunctionObjArgs
    - PyEval_EvalFrameDefault
    - Py_CheckFunctionResult



In [29]:
import cntk as C
    

# Defines the LSTM model for classifying sequences
def lstm_sequence_classifier(features, num_classes, LSTM_dim):
    classifier = C.layers.Sequential([
                                      C.layers.Recurrence(C.layers.LSTM(LSTM_dim)),
                                      C.sequence.last,
                                      C.layers.Dense(num_classes)])
    return classifier(features)


# Creates and trains a LSTM sequence classification model
#def train_sequence_classifier():
input_dim = 9
hidden_dim = 128
num_classes = 6

# Input variables denoting the features and label data
features = C.sequence.input_variable(shape=input_dim, is_sparse=True)
label = C.input_variable(num_classes)

# Instantiate the sequence classification model
classifier_output = lstm_sequence_classifier(features, num_classes, hidden_dim)

ce = C.cross_entropy_with_softmax(classifier_output, label)
pe = C.classification_error(classifier_output, label)

train_reader = C.io.MinibatchSourceFromData(dict(x=np.array(X_train, np.float32), y=np.array(y_train, np.float32)))
#reader = create_reader(path, True, input_dim, num_classes)


lr_per_sample = C.learning_parameter_schedule_per_sample(0.1)

# Instantiate the trainer object to drive the model training
progress_printer = C.logging.ProgressPrinter(0)
trainer = C.Trainer(classifier_output, (ce, pe),
                    C.sgd(classifier_output.parameters, lr=lr_per_sample),
                    progress_printer)

# Get minibatches of sequences to train with and perform model training
minibatch_size = 200

for i in range(251):
    mb = train_reader.next_minibatch(minibatch_size)
    trainer.train_minibatch(dict(features = mb[train_reader.streams['x']],label = mb[train_reader.streams['y']]))

evaluation_average = copy.copy(train_reader.previous_minibatch_evaluation_average)
loss_average = copy.copy(train_reader.previous_minibatch_loss_average)

#error, _ = train_sequence_classifier()

 average      since    average      since      examples
    loss       last     metric       last              
 ------------------------------------------------------




ValueError: variable with name "features" does not exist in the network. Available variable names: 

In [None]:
classifier_output.

In [None]:

import os
import cntk as C
import copy


# Creates the reader
def create_reader(path, is_training, input_dim, label_dim):
    return C.io.MinibatchSource(C.io.CTFDeserializer(path, C.io.StreamDefs(
        features = C.io.StreamDef(field='x', shape=input_dim,   is_sparse=True),
        labels   = C.io.StreamDef(field='y', shape=label_dim,   is_sparse=False)
    )), randomize=is_training, max_sweeps = C.io.INFINITELY_REPEAT if is_training else 1)


# Defines the LSTM model for classifying sequences
def lstm_sequence_classifier(features, num_classes, embedding_dim, LSTM_dim):
    classifier = C.layers.Sequential([C.layers.Embedding(embedding_dim),
                                      C.layers.Recurrence(C.layers.LSTM(LSTM_dim)),
                                      C.sequence.last,
                                      C.layers.Dense(num_classes)])
    return classifier(features)


# Creates and trains a LSTM sequence classification model
def train_sequence_classifier():
    input_dim = 2000
    hidden_dim = 25
    embedding_dim = 50
    num_classes = 5

    # Input variables denoting the features and label data
    features = C.sequence.input_variable(shape=input_dim, is_sparse=True)
    label = C.input_variable(num_classes)

    # Instantiate the sequence classification model
    classifier_output = lstm_sequence_classifier(features, num_classes, embedding_dim, hidden_dim)

    ce = C.cross_entropy_with_softmax(classifier_output, label)
    pe = C.classification_error(classifier_output, label)

    

    reader = create_reader(path, True, input_dim, num_classes)

    input_map = {
        features : reader.streams.features,
        label    : reader.streams.labels
    }

    lr_per_sample = C.learning_parameter_schedule_per_sample(0.1)

    # Instantiate the trainer object to drive the model training
    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(classifier_output, (ce, pe),
                        C.sgd(classifier_output.parameters, lr=lr_per_sample),
                        progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(251):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = copy.copy(trainer.previous_minibatch_evaluation_average)
    loss_average = copy.copy(trainer.previous_minibatch_loss_average)

    return evaluation_average, loss_average

    error, _ = train_sequence_classifier()