In [1]:
%pylab inline
%load_ext autoreload
%autoreload 2

Populating the interactive namespace from numpy and matplotlib


In [2]:
import pandas as pd

In [3]:
from sensorcnn.visualization.examples import *
from sensorcnn.dataset.examples import *

### Encode labels

In [4]:
label_to_idx = {l:i for i, l in enumerate(labels)}

def encode(label, n_labels=6):
    """
    Returns a binary vector with all zeros or one at the label position
    
    example:
    -------
    ecnode(4, n_labels=6)
    = np.array(0, 0, 0, 1, 0, 0)
    
    encode(None, n_labels=6)
    = np.array(0, 0, 0, 0, 0, 0)
    """
    y = np.zeros(n_labels)
    if label is not None and 0 <= label < n_labels:
        y[label] = 1
        
    return y

### Label the first 50 samples as setup

In [5]:
def get_setup_labels(example, setup_threshold=50, n_labels=6):
    """Pick the first 50 samples and label them as setup"""
    ys = []
    i = 0
    for l in examples['label']:
        label_index = label_to_idx.get(labels_mapping.get(l))
        if label_index is None:
            i = 0
            ys.append(np.zeros(n_labels))
        else:
            i += 0
            if i > setup_threshold:
                ys.append(np.zeros(n_labels))
            else:
                ys.append(encode(label_index))
    return np.array(ys)

In [6]:
root_directory = "/Users/mostafa/Google Drive/Exercise Data/"
n_features = 3
n_labels = 6
X = np.zeros((0, 3))
Y = np.zeros((0, 6))
for filename in csv_file_iterator(root_directory):
    examples = load_from_csv(filename)
    n_examples = len(examples)
    X = np.append(X, get_features(examples), axis=0)
    ys = get_setup_labels(examples)
    Y = np.append(Y, ys.reshape((n_examples, n_labels)), axis=0)

  if self.run_code(code, result):


In [38]:
x = [x for x in csv_file_iterator(root_directory)][0]
ex = load_from_csv(filename)

In [39]:
ex

Unnamed: 0,x,y,z,label,Unnamed: 5,Unnamed: 6,Unnamed: 7
0,0.159180,-0.286865,-0.912109,,,,
1,0.158447,-0.238770,-0.931396,,,,
2,0.174072,-0.232910,-0.948242,,,,
3,0.177490,-0.205566,-0.962402,,,,
4,0.183350,-0.204590,-0.962402,,,,
5,0.193604,-0.197754,-0.975342,,,,
6,0.201172,-0.165771,-0.992920,,,,
7,0.206055,-0.187500,-0.996826,,,,
8,0.203613,-0.184082,-0.980225,,,,
9,0.203125,-0.191895,-0.974854,,,,


In [7]:
print(X.shape)
print(Y.shape)

(6557118, 3)
(6557118, 6)


In [None]:
np.savez_compressed("/Users/mostafa/Desktop/muvr-6-exercises.npz", X, Y)

### Get exercise setup data only and drop the other samples

In [8]:
def get_labeled(X, Y):
    """Return only examples with setup labels and drop other examples"""
    assert X.shape[0] == Y.shape[0], "X and Y lengthes don't match"
    labeled_xs = []
    labeled_ys = []
    for x, y in zip(X, Y):
        if sum(y) == 0:
            continue

        labeled_xs.append(x)
        labeled_ys.append(y)

    return np.array(labeled_xs), np.array(labeled_ys)

In [9]:
X_labeled, Y_labeled = get_labeled(X, Y)

In [31]:
X_labeled[0]

array([ 0.35644501,  0.68505901, -0.61499   ])

###  Examples augmentation - put each 50 sample into 1 50*3 sample

In [10]:
def augment_examples(X, Y, new_sample_size=50, label_merge=np.average):
    """"""
    assert X.shape[0] == Y.shape[0], "X and Y lengthes don't match"
    n_samples = X.shape[0]
    n_features = X.shape[1]
    n_labels = Y.shape[1]
    end = n_samples - (n_samples % new_sample_size)
    new_X = X[:end, :].reshape((int(end/new_sample_size), int(n_features*new_sample_size)))
    ys = []
    for y in Y[:end, :].reshape((int(end/new_sample_size), int(n_labels*new_sample_size))):
        ys.append(label_merge(y.reshape((new_sample_size, n_labels)), axis=0))

    new_Y = np.array(ys)
    return new_X, new_Y

In [11]:
new_X, new_Y = augment_examples(X_labeled, Y_labeled)

In [32]:
print(new_X.shape)
print(new_Y.shape)
new_Y[0]

(18530, 150)
(18530, 6)


array([ 0.,  0.,  1.,  0.,  0.,  0.])

In [None]:
np.savez_compressed("/Users/mostafa/Desktop/muvr-6-labeledonly-exercises.npz", new_X, new_Y)

### Train an MLP model

In [15]:
from sklearn.cross_validation import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(new_X, new_Y, test_size=0.33, random_state=42)

In [25]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD

model = Sequential()
model.add(Dense(input_dim=150, output_dim=64, init='uniform', activation='tanh'))
model.add(Dense(input_dim=64, output_dim=32, init='uniform', activation='tanh'))
model.add(Dropout(0.3))
model.add(Dense(input_dim=32, output_dim=6, init='uniform', activation='tanh'))

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_squared_error', optimizer=sgd, metrics=["accuracy"])

In [30]:
model.fit(X_train, Y_train, nb_epoch=30, batch_size=128)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1132779e8>

In [29]:
train_score = model.evaluate(X_train, Y_train, batch_size=128)
test_score = model.evaluate(X_test, Y_test, batch_size=128)
print(train_score)
print(test_score)

[0.024551761824003017, 0.90447039898489445]
[0.026107511551516131, 0.90089942777342025]
