In [40]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from sklearn.cross_validation import train_test_split

In [41]:
df = pd.read_csv('data.csv')

In [42]:
df.head(10)

Unnamed: 0,user_id,activity,timestamp,acc_x,acc_y,acc_z
0,33,Jogging,49105960000000.0,-0.694638,12.680544,0.503953
1,33,Jogging,49106060000000.0,5.012288,11.264028,0.953424
2,33,Jogging,49106110000000.0,4.903325,10.882658,-0.081722
3,33,Jogging,49106220000000.0,-0.612916,18.496431,3.023717
4,33,Jogging,49106330000000.0,-1.18497,12.108489,7.205164
5,33,Jogging,49106440000000.0,1.375655,-2.492524,-6.510526
6,33,Jogging,49106540000000.0,-0.612916,10.56939,5.706926
7,33,Jogging,49106650000000.0,-0.503953,13.947236,7.05534
8,33,Jogging,49106760000000.0,-8.430995,11.413852,5.134871
9,33,Jogging,49106870000000.0,0.953424,1.375655,1.648062


In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1074741 entries, 0 to 1074740
Data columns (total 6 columns):
user_id      1074741 non-null int64
activity     1074588 non-null object
timestamp    1074588 non-null float64
acc_x        1074588 non-null float64
acc_y        1074588 non-null float64
acc_z        1074587 non-null float64
dtypes: float64(4), int64(1), object(1)
memory usage: 57.4+ MB


In [44]:
df = df.dropna(how="any")

In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1074587 entries, 0 to 1074740
Data columns (total 6 columns):
user_id      1074587 non-null int64
activity     1074587 non-null object
timestamp    1074587 non-null float64
acc_x        1074587 non-null float64
acc_y        1074587 non-null float64
acc_z        1074587 non-null float64
dtypes: float64(4), int64(1), object(1)
memory usage: 57.4+ MB


In [46]:
#sample = np.random.choice(xrange(df.shape[0]), 4000)

#df_new = df.iloc[sample][['acc_x','acc_y','acc_z','activity']]
df_new = df[['acc_x','acc_y','acc_z','activity']]

In [47]:
df_new.head(10)

Unnamed: 0,acc_x,acc_y,acc_z,activity
0,-0.694638,12.680544,0.503953,Jogging
1,5.012288,11.264028,0.953424,Jogging
2,4.903325,10.882658,-0.081722,Jogging
3,-0.612916,18.496431,3.023717,Jogging
4,-1.18497,12.108489,7.205164,Jogging
5,1.375655,-2.492524,-6.510526,Jogging
6,-0.612916,10.56939,5.706926,Jogging
7,-0.503953,13.947236,7.05534,Jogging
8,-8.430995,11.413852,5.134871,Jogging
9,0.953424,1.375655,1.648062,Jogging


In [48]:
df_new.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1074587 entries, 0 to 1074740
Data columns (total 4 columns):
acc_x       1074587 non-null float64
acc_y       1074587 non-null float64
acc_z       1074587 non-null float64
activity    1074587 non-null object
dtypes: float64(3), object(1)
memory usage: 41.0+ MB


In [49]:
from sklearn.feature_extraction import DictVectorizer
 
def encode_onehot(df, cols):
    """
    One-hot encoding is applied to columns specified in a pandas DataFrame.
    
    Modified from: https://gist.github.com/kljensen/5452382
    
    Details:
    
    http://en.wikipedia.org/wiki/One-hot
    http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html
    
    @param df pandas DataFrame
    @param cols a list of columns to encode
    @return a DataFrame with one-hot encoding
    """
    vec = DictVectorizer()
    
    vec_data = pd.DataFrame(vec.fit_transform(df[cols].to_dict(outtype='records')).toarray())
    vec_data.columns = vec.get_feature_names()
    vec_data.index = df.index
    
    df = df.drop(cols, axis=1)
    df = df.join(vec_data)
    return df


In [50]:
df_processed = encode_onehot(df_new, cols=['activity'])



In [51]:
df_processed.head(10)

Unnamed: 0,acc_x,acc_y,acc_z,activity=Downstairs,activity=Jogging,activity=Sitting,activity=Standing,activity=Upstairs,activity=Walking
0,-0.694638,12.680544,0.503953,0,1,0,0,0,0
1,5.012288,11.264028,0.953424,0,1,0,0,0,0
2,4.903325,10.882658,-0.081722,0,1,0,0,0,0
3,-0.612916,18.496431,3.023717,0,1,0,0,0,0
4,-1.18497,12.108489,7.205164,0,1,0,0,0,0
5,1.375655,-2.492524,-6.510526,0,1,0,0,0,0
6,-0.612916,10.56939,5.706926,0,1,0,0,0,0
7,-0.503953,13.947236,7.05534,0,1,0,0,0,0
8,-8.430995,11.413852,5.134871,0,1,0,0,0,0
9,0.953424,1.375655,1.648062,0,1,0,0,0,0


In [52]:
df_input = df_processed[["acc_x","acc_y","acc_z"]].values
df_output = df_processed[["activity=Downstairs","activity=Jogging","activity=Sitting","activity=Standing","activity=Upstairs","activity=Walking"]].values

In [53]:
print df_input.shape,df_output.shape

(1074587, 3) (1074587, 6)


In [54]:
X_train, X_test, y_train, y_test = train_test_split(df_input, df_output, test_size=0.33, random_state=42)

In [55]:
print X_train.shape, y_train.shape
print X_test.shape, y_test.shape

(719973, 3) (719973, 6)
(354614, 3) (354614, 6)


## Logistic Regression

In [59]:
import tensorflow as tf
import shutil
import os.path

# Parameters
learning_rate = 0.01
training_epochs = 1000
batch_size = 100
display_step = 10

g = tf.Graph()

with g.as_default():
    # Create the model
    
    # tf Graph Input
    x = tf.placeholder(tf.float32, [None, 3]) # 3 inputs
    y = tf.placeholder(tf.float32, [None, 6]) # 6 classes

    # Set model weights
    W = tf.Variable(tf.zeros([3, 6]))
    b = tf.Variable(tf.zeros([6]))

    # Construct model
    pred = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax

    # Minimize error using cross entropy
    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
    # Gradient Descent
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    # Initializing the variables
    init = tf.initialize_all_variables()
    
    
    sess = tf.Session()

    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        _, c = sess.run([optimizer, cost], feed_dict={x: X_train,y: y_train})
        
        # Compute average loss
        #avg_cost += c / total_batch
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c)

    print "Optimization Finished!"

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy for 3000 examples
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print "Accuracy:", accuracy.eval({x: X_test, y: y_test}, sess)

# Store variable
_W = W.eval(sess)
_b = b.eval(sess)


sess.close()

#Create new graph for exporting
g_2 = tf.Graph()
with g_2.as_default():
    # Reconstruct graph
    x_2 = tf.placeholder("float", [None, 3], name="input")
    W_2 = tf.constant(_W, name="constant_W")
    b_2 = tf.constant(_b, name="constant_b")
    y_2 = tf.nn.softmax(tf.matmul(x_2, W_2) + b_2, name="output")

    sess_2 = tf.Session()

    init_2 = tf.initialize_all_variables();
    sess_2.run(init_2)

    
    graph_def = g_2.as_graph_def()
    
    tf.train.write_graph(graph_def, 'Models','activityModel.pb', as_text=False)

    # Test trained model
    y__2 = tf.placeholder("float", [None, 6])
    correct_prediction_2 = tf.equal(tf.argmax(y_2, 1), tf.argmax(y__2, 1))
    accuracy_2 = tf.reduce_mean(tf.cast(correct_prediction_2, "float"))
    print(accuracy_2.eval({x_2: X_test, y__2: y_test}, sess_2))


Epoch: 0010 cost= 1.582609177
Epoch: 0020 cost= 1.560548425
Epoch: 0030 cost= 1.550834179
Epoch: 0040 cost= 1.544809818
Epoch: 0050 cost= 1.540548086
Epoch: 0060 cost= 1.537255883
Epoch: 0070 cost= 1.534567118
Epoch: 0080 cost= 1.532214761
Epoch: 0090 cost= 1.530112386
Epoch: 0100 cost= 1.528159261
Epoch: 0110 cost= 1.526309133
Epoch: 0120 cost= 1.524545670
Epoch: 0130 cost= 1.522824645
Epoch: 0140 cost= 1.521140218
Epoch: 0150 cost= 1.519489288
Epoch: 0160 cost= 1.517881036
Epoch: 0170 cost= 1.516289353
Epoch: 0180 cost= 1.514722109
Epoch: 0190 cost= 1.513172388
Epoch: 0200 cost= 1.511655688
Epoch: 0210 cost= 1.510163188
Epoch: 0220 cost= 1.508684039
Epoch: 0230 cost= 1.507215381
Epoch: 0240 cost= 1.505781174
Epoch: 0250 cost= 1.504349470
Epoch: 0260 cost= 1.502942443
Epoch: 0270 cost= 1.501557469
Epoch: 0280 cost= 1.500190616
Epoch: 0290 cost= 1.498832464
Epoch: 0300 cost= 1.497498512
Epoch: 0310 cost= 1.496189594
Epoch: 0320 cost= 1.494893551
Epoch: 0330 cost= 1.493620038
Epoch: 034

In [None]:
### MLP 

In [58]:

import tensorflow as tf
import shutil
import os.path


# Parameters
learning_rate = 0.001
training_epochs = 200
batch_size = 100
display_step = 5

# Network Parameters
n_hidden_1 = 100 # 1st layer number of features
n_hidden_2 = 100 # 2nd layer number of features
n_input = 3 # Number of inputs
n_classes = 6 # Number of classes

g = tf.Graph()
with g.as_default():
    # model inputs
    x = tf.placeholder("float", shape=[None, n_input])
    y = tf.placeholder("float", shape=[None, n_classes])
    
    # set model weights
    W_h1 = tf.Variable(tf.random_normal([n_input, n_hidden_1]))
    W_h2 = tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]))
    W_out = tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
    
    # set model biases
    b1 = tf.Variable(tf.random_normal([n_hidden_1]))
    b2 = tf.Variable(tf.random_normal([n_hidden_2]))
    b_out = tf.Variable(tf.random_normal([n_classes]))
    
    # Construct Model
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, W_h1), b1)
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, W_h2), b2)
    layer_2 = tf.nn.relu(layer_2)
    # Output layer with linear activation
    pred = tf.matmul(layer_2, W_out) + b_out
    
    # Define loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    # Initializing the variables
    init = tf.initialize_all_variables()
    
    sess = tf.Session()

    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        _, c = sess.run([optimizer, cost], feed_dict={x: X_train,y: y_train})
        
        # Compute average loss
        #avg_cost += c / total_batch
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c)

    print "Optimization Finished!"

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy for 3000 examples
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print "Accuracy:", accuracy.eval({x: X_test, y: y_test}, sess)

# Store Variable
_W_h1 = W_h1.eval(sess)
_W_h2 = W_h2.eval(sess)
_W_out =W_out.eval(sess)

_b1 = b1.eval(sess)
_b2 = b2.eval(sess)
_b_out = b_out.eval(sess)

sess.close()

# create a new graph for exporting
g_2 = tf.Graph()
with g_2.as_default():
    # Reconstruct Graph
    # model inputs
    x_2 = tf.placeholder("float", shape=[None, n_input], name="input")
    
    
    # set model weights
    W_2_h1 = tf.constant(_W_h1, name="constant_W_h1")
    W_2_h2 = tf.constant(_W_h2, name="constant_W_h2")
    W_2_out = tf.constant(_W_out, name="constant_W_out")
    
    # set model biases
    b_2_1 = tf.constant(_b1, name="constant_b1")
    b_2_2 = tf.constant(_b2, name="constant_b2")
    b_2_out = tf.constant(_b_out, name="constant_b_out")
    
    # Construct Model
    # Hidden layer with RELU activation
    layer_2_1 = tf.add(tf.matmul(x_2, W_2_h1), b_2_1)
    layer_2_1 = tf.nn.relu(layer_2_1)
    # Hidden layer with RELU activation
    layer_2_2 = tf.add(tf.matmul(layer_2_1, W_2_h2), b_2_2)
    layer_2_2 = tf.nn.relu(layer_2_2)
    
    # Output layer with linear activation
    y_2 = tf.nn.bias_add(tf.matmul(layer_2_2, W_2_out), b_2_out, name="output")
    
    #y_2.name = "output"
    
    sess_2 = tf.Session()

    init_2 = tf.initialize_all_variables();
    sess_2.run(init_2)

    
    graph_def = g_2.as_graph_def()
    
    tf.train.write_graph(graph_def, 'Models','activityModelMLP.pb', as_text=False)

    # Test trained model
    y__2 = tf.placeholder("float", [None, 6])
    correct_prediction_2 = tf.equal(tf.argmax(y_2, 1), tf.argmax(y__2, 1))
    accuracy_2 = tf.reduce_mean(tf.cast(correct_prediction_2, "float"))
    print(accuracy_2.eval({x_2: X_test, y__2: y_test}, sess_2))

    


Epoch: 0005 cost= 719.341308594
Epoch: 0010 cost= 623.962402344
Epoch: 0015 cost= 541.754394531
Epoch: 0020 cost= 463.802581787
Epoch: 0025 cost= 392.782958984
Epoch: 0030 cost= 340.336730957
Epoch: 0035 cost= 313.986694336
Epoch: 0040 cost= 294.496337891
Epoch: 0045 cost= 269.572448730
Epoch: 0050 cost= 245.806457520
Epoch: 0055 cost= 226.994155884
Epoch: 0060 cost= 208.649246216
Epoch: 0065 cost= 192.297073364
Epoch: 0070 cost= 177.435470581
Epoch: 0075 cost= 163.729522705
Epoch: 0080 cost= 150.990829468
Epoch: 0085 cost= 138.663467407
Epoch: 0090 cost= 126.714599609
Epoch: 0095 cost= 115.190597534
Epoch: 0100 cost= 104.026679993
Epoch: 0105 cost= 93.251182556
Epoch: 0110 cost= 82.867668152
Epoch: 0115 cost= 74.868316650
Epoch: 0120 cost= 69.509635925
Epoch: 0125 cost= 63.509780884
Epoch: 0130 cost= 58.162509918
Epoch: 0135 cost= 53.186306000
Epoch: 0140 cost= 48.648612976
Epoch: 0145 cost= 45.055343628
Epoch: 0150 cost= 42.104000092
Epoch: 0155 cost= 39.786750793
Epoch: 0160 cost= 3