## Exercise 6 - Research Paper Implementation
#### Time Series Classification Using Multi-Channels Deep Convolutional Neural Networks 
#### Dataset: PAMPA2 dataset (http://archive.ics.uci.edu/ml/datasets/pamap2+physical+activity+monitoring)

### Import Libraries

In [1]:
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
import os
import numpy as np
import tensorflow as tf

### Extract Required Data

In [None]:
def data_split(data):
    data_req = data[(data[1] == 3) | (data[1] == 4) | (data[1] == 12) | (data[1] == 13)]
    data_channel = data_req[[1,38,39,40]]
    data_time = data_req[[0]]
    data_channel.columns = [0,1,2,3]
    return data_time,data_channel

### Data Loading

In [None]:
directory = "G:/DA - Hildeshim/DDA Lab/Exercise 6/Dastet/PAMAP2_Dataset/Protocol/"
for i in os.listdir(directory):
    data = pd.read_csv(directory + i,header=None,sep=" ")
    data_time,data_channel = data_split(data)
    data_channel = data_channel.reset_index()
    data_channel = data_channel.drop(columns="index")
    data_channel.fillna(method="ffill",inplace=True)
    data_channel.to_csv(directory+i[:-4]+".csv",index=None)

### 3D data stats

In [None]:
data_stat = pd.DataFrame(columns=["start","end","Activity"])
new = 3
start = 0
cnt = 0
end = 0
for i,j in enumerate(data_activity[0]):
    if new!=j:
        end = i-1
        data_stat.loc[cnt] = [start]+[end]+[data_activity[0][i-1]]
        start = end
        new = j
        cnt+=1
    
data_stat.loc[4] = [start]+[data_activity.shape[0]]+[data_activity[0][i-1]]
print("Activity details of Subject 1")
data_stat


### IMU Data Plot

In [None]:
fig, ax = plt.subplots(nrows=4, ncols=1,figsize=(30,45))
ax[0].set_title("3D Acceleration Data from IMU Ankle of Subject 1\n",fontsize = 25)
ax[0].plot(data_channel[3])
ax[0].set_xlabel("Time",fontsize=15)
ax[0].set_ylabel("Data",fontsize=15)
ax[1].plot(data_channel[2])
ax[1].set_xlabel("Time",fontsize=15)
ax[1].set_ylabel("Data",fontsize=15)
ax[2].plot(data_channel[1])
ax[2].set_xlabel("Time",fontsize=15)
ax[2].set_ylabel("Data",fontsize=15)
ax[3].plot(data_channel[1])
ax[3].plot(data_channel[2])
ax[3].plot(data_channel[3])
ax[3].set_xlabel("Time",fontsize=15)
ax[3].set_ylabel("3D Data",fontsize=15)

### Normalise Data

In [2]:
def normalise_data(data):
    data[1] = (data[1] - np.mean(data[1])) / np.std(data[1])
    data[2] = (data[2] - np.mean(data[2])) / np.std(data[2])
    data[3] = (data[3] - np.mean(data[3])) / np.std(data[3])
    return data

### One-hot Activities

In [3]:
def get_activity(start,end,act):
    y = np.zeros((4))
    y[np.argmax(np.sum(act[start:end],axis=0))] = 1
    return y

### Read IMU data from CSV

In [4]:
def read_data(path):
    data = None
    activity_data = None
    for file in os.listdir(path):
        data_in = pd.read_csv(path+file)
        data_in.columns = [0,1,2,3]
        data_channel = data_in[[1,2,3]]
        from sklearn.preprocessing import StandardScaler
        data_channel = normalise_data(data_channel)
        data_activity = data_in[[0]]
        activity = np.array(pd.get_dummies(data_activity[0]))
        channel_ip= []
        channel_ac= []
        for i in range(int((data_channel.shape[0] - 256) /128)):
            if i == 0:
                start = 0
                end = 256
            else:
                start += 128
                end +=128
            channel_ip.append(data_channel[start:end])
            channel_ac.append(get_activity(start,end,activity))
        dat = np.stack(channel_ip)
        act = np.stack(channel_ac)
        if data is None:
            data = dat
            activity_data = act
        else:
            data = np.concatenate((data,dat),axis = 0)
            activity_data = np.concatenate((activity_data,act),axis = 0)
    return data,activity_data

In [5]:
train_data_path = "G:/DA - Hildeshim/DDA Lab/Exercise 6/Dastet/PAMAP2_Dataset/Train/"
test_data_path = "G:/DA - Hildeshim/DDA Lab/Exercise 6/Dastet/PAMAP2_Dataset/Test/"
test, activity_test = read_data(test_data_path)
train, activity_train = read_data(train_data_path)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


### Defining Networks

In [7]:
def conv1d(x,weight,bias):
    x = tf.nn.conv1d(x,weight,stride=1,padding="VALID")
    x = tf.nn.bias_add(x,bias)
    x = tf.sigmoid(x)
    return x

def avgpool1d(x):
    x = tf.layers.average_pooling1d(x,pool_size=2,strides=2,padding="VALID")
    return x

def fc_layer(x,weight,bias):
    x = tf.reshape(x,[-1,3*4*61])
    x = tf.add(tf.matmul(x,weight),bias)
    x = tf.sigmoid(x)
    return x

def out_layer(x,weight,bias):
    x = tf.add(tf.matmul(x,weight),bias)
    return x

### Initialise Variables and Placeholders

In [8]:
learning_rate = 0.01
input_len = 256
n_channels = 3
n_class = 4
ip_ch_c1 = 3
op_ch_c1 = 8 * n_channels
op_ch_c2 = 4 * n_channels
ip_ch_fc1 = 61 * op_ch_c2
op_ch_fc1 = 4 * n_channels
epochs = 1500
batch_size = 1
tf.reset_default_graph() 

x = tf.placeholder(tf.float32,[None,input_len,n_channels])
y = tf.placeholder(tf.float32,[None,n_class])

with tf.variable_scope("Weights", reuse=tf.AUTO_REUSE):
    w_conv1 = tf.get_variable('w_c1',shape=(5,ip_ch_c1,op_ch_c1),initializer=tf.random_normal_initializer)
    w_conv2 = tf.get_variable('w_c2',shape=(5,op_ch_c1,op_ch_c2),initializer=tf.random_normal_initializer)
    w_fc = tf.get_variable('w_fc',shape=(ip_ch_fc1,op_ch_fc1),initializer=tf.random_normal_initializer)
    w_out = tf.get_variable('w_out',shape=(op_ch_fc1,n_class),initializer=tf.random_normal_initializer)
    tf.summary.histogram('w1',w_conv1)
    tf.summary.histogram('w2',w_conv2)
    tf.summary.histogram('wfc',w_fc)
    tf.summary.histogram('wout',w_out)
    
with tf.variable_scope("Bias", reuse=tf.AUTO_REUSE):
    b_conv1 = tf.get_variable('B_c1',shape=(op_ch_c1),initializer=tf.random_normal_initializer)
    b_conv2 = tf.get_variable('B_c2',shape=(op_ch_c2),initializer=tf.random_normal_initializer)
    b_fc = tf.get_variable('B_fc',shape=(op_ch_fc1),initializer=tf.random_normal_initializer)
    b_out = tf.get_variable('B_out',shape=(n_class),initializer=tf.random_normal_initializer)


### Defining the Model

In [13]:
def MC_DCNN(x):
    conv1 = conv1d(x,w_conv1,b_conv1)
    avg_pool1 = avgpool1d(conv1)
    conv2 = conv1d(avg_pool1,w_conv2,b_conv2)
    avg_pool2 = avgpool1d(conv2)
    fc = fc_layer(avg_pool2,w_fc,b_fc)
    out = out_layer(fc,w_out,b_out)
    return out

### Initializing optimizers and operations

In [10]:
model = MC_DCNN(x)

predictions = tf.nn.softmax_cross_entropy_with_logits(logits=model,labels=y)
tf.summary.histogram("predictions", predictions)

loss = tf.reduce_mean(predictions)
tf.summary.scalar("Loss_Scalar",loss)
tf.summary.histogram("loss",loss)

optimizer_RMS = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
                                      decay=0.0005,
                                      momentum=0.9,
                                      epsilon=0.01).minimize(loss)

optimizer_Adam = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                        epsilon=0.01).minimize(loss)

correct_pred = tf.equal(tf.argmax(model, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
tf.summary.scalar("Accuracy",accuracy)
tf.summary.histogram("Accuracy_histogram",accuracy)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



<tf.Tensor 'Accuracy_histogram:0' shape=() dtype=string>

### Training the Model

In [11]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    train_writer = tf.summary.FileWriter("G:/DA - Hildeshim/DDA Lab/Exercise 6/log/1/train",sess.graph)
    test_writer = tf.summary.FileWriter("G:/DA - Hildeshim/DDA Lab/Exercise 6/log/1/test",sess.graph)
    merge = tf.summary.merge_all()    
    sess.run(init)
    counter = 0
    for ep in range(epochs):
        counter += 1
#         for batch in range(len(train)//batch_size):
#             batch_x = train[batch*batch_size:min((batch+1)*batch_size,len(train))]
#             batch_y = activity_train[batch*batch_size:min((batch+1)*batch_size,len(activity_train))]
        assert not np.any(np.isnan(train))
        assert not np.any(np.isnan(activity_train))
        feed = {x : train, y : activity_train}
        summary, cost, _ , acc = sess.run([merge, loss, optimizer_RMS, accuracy], feed_dict = feed)
        train_writer.add_summary(summary,counter)
        if ep%100 == 1:
            print("\nEpoch: "+str(ep)+"\tTraining loss: "+str(cost)+"\tTraining Accuracy: "+str(acc))
            feed = {x : test, y : activity_test}
            su, cost, acc = sess.run([merge, loss, accuracy], feed_dict = feed)
            test_writer.add_summary(su,counter)
            print("\t\tTesting loss: "+str(cost)+"\tTesting Accuracy: "+str(acc))


Epoch: 1	Training loss: 3.434344	Training Accuracy: 0.30448893
		Testing loss: 3.0382938	Testing Accuracy: 0.305949

Epoch: 101	Training loss: 0.74434865	Training Accuracy: 0.7290427
		Testing loss: 0.80719507	Testing Accuracy: 0.6827195

Epoch: 201	Training loss: 0.59156466	Training Accuracy: 0.8053002
		Testing loss: 0.7271629	Testing Accuracy: 0.73796034

Epoch: 301	Training loss: 0.49842468	Training Accuracy: 0.841536
		Testing loss: 0.69391066	Testing Accuracy: 0.75212467

Epoch: 401	Training loss: 0.44140574	Training Accuracy: 0.86479175
		Testing loss: 0.67725956	Testing Accuracy: 0.7790368

Epoch: 501	Training loss: 0.3810678	Training Accuracy: 0.8912926
		Testing loss: 0.6868787	Testing Accuracy: 0.77478755

Epoch: 601	Training loss: 0.33676794	Training Accuracy: 0.91076255
		Testing loss: 0.6974701	Testing Accuracy: 0.7818697

Epoch: 701	Training loss: 0.29369763	Training Accuracy: 0.9302326
		Testing loss: 0.71878636	Testing Accuracy: 0.7691218

Epoch: 801	Training loss: 0.