Note: This code has been inspired from https://github.com/ermongroup/Wifi_Activity_Recognition   and some sections have been copied outright.

In [0]:
# Mount Google Drive
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Note place the dataset in the root of Google drive and create these 4 folders in google drive otherwise code may not find some folders and may give error:  
1.ActivityRecognition/  
2.ActivityRecognition/input_files  
3.ActivityRecognition/input_files_2  
4.ActivityRecognition/keras_models

In [0]:
#Extract "Dataset" in WifiActivityRecognition Folder
!tar xvzf "./drive/My Drive/WifiActivityRecognition/Dataset.tar.gz" -C "./drive/My Drive/WifiActivityRecognition/"

In [0]:
#List the sorted data and make sure data has same naming scheme in both Annotation and Input
import glob
import pandas as pd
import numpy as np
sorted(glob.glob("./drive/My Drive/WifiActivityRecognition/Dataset/annotation_*bed*.csv"))
#sorted(glob.glob("./drive/My Drive/WifiActivityRecognition/Dataset/input_*bed*.csv"))

In [0]:
#Rename siamak and sankalp data with numbers in the start to make sorted input and annotation data consistient
#Note this would only work if your folder scheme is the same as mine i.e, WifiActivityRecognition/Dataset
#Otherwise understand this block and rename the data yourself
import glob
import os
annot_data = sorted(glob.glob("./drive/My Drive/WifiActivityRecognition/Dataset/input_161219*.csv"))
for i in range(len(annot_data)):
  print(annot_data[i][1:39+16] + annot_data[i][16+46:])
  os.rename(annot_data[i], annot_data[i][0:39+16] + annot_data[i][46+16:])

In [0]:
#Convert Raw Data into Windows with threshold of 60%
#These files are 1kHz and include noActivity data in each file. We are going to convert these into 500Hz and separate file for each activity after this.
#Do it separately for each activity. You just need to change activity name in main and restart runtime after each execution to avoid memory error
#Note : You have to mount GDrive each time you restart runtume and clear the ram
#Colab takes about ~10GB ram for a 4GB csv file and max ram availabl is 12GB. This is why we need to process each activity separately on colab.

import numpy as np,numpy
import csv
import glob
import os

window_size = 1000
threshold = 60
slide_size = 200 #less than window_size!!!

def dataimport(path1, path2):

	xx = np.empty([0,window_size,90],float)
	yy = np.empty([0,8],float)

	###Input data###
	#data import from csv
	input_csv_files = sorted(glob.glob(path1))
	for f in input_csv_files:
		print("input_file_name=",f)
		data = [[ float(elm) for elm in v] for v in csv.reader(open(f, "r"))]
		tmp1 = np.array(data)
		x2 =np.empty([0,window_size,90],float)

		#data import by slide window
		k = 0
		while k <= (len(tmp1) + 1 - 2 * window_size):
			x = np.dstack(np.array(tmp1[k:k+window_size, 1:91]).T)
			x2 = np.concatenate((x2, x),axis=0)
			k += slide_size

		xx = np.concatenate((xx,x2),axis=0)
	xx = xx.reshape(len(xx),-1)

	###Annotation data###
	#data import from csv
	annotation_csv_files = sorted(glob.glob(path2))
	for ff in annotation_csv_files:
		print("annotation_file_name=",ff)
		ano_data = [[ str(elm) for elm in v] for v in csv.reader(open(ff,"r"))]
		tmp2 = np.array(ano_data)

		#data import by slide window
		y = np.zeros(((len(tmp2) + 1 - 2 * window_size)//slide_size+1,8))
		k = 0
		while k <= (len(tmp2) + 1 - 2 * window_size):
			y_pre = np.stack(np.array(tmp2[k:k+window_size]))
			bed = 0
			fall = 0
			walk = 0
			pickup = 0
			run = 0
			sitdown = 0
			standup = 0
			noactivity = 0
			for j in range(window_size):
				if y_pre[j] == "bed":
					bed += 1
				elif y_pre[j] == "fall":
					fall += 1
				elif y_pre[j] == "walk":
					walk += 1
				elif y_pre[j] == "pickup":
					pickup += 1
				elif y_pre[j] == "run":
					run += 1
				elif y_pre[j] == "sitdown":
					sitdown += 1
				elif y_pre[j] == "standup":
					standup += 1
				else:
					noactivity += 1

			if bed > window_size * threshold / 100:
				y[int(k/slide_size),:] = np.array([0,1,0,0,0,0,0,0])
			elif fall > window_size * threshold / 100:
				y[int(k/slide_size),:] = np.array([0,0,1,0,0,0,0,0])
			elif walk > window_size * threshold / 100:
				y[int(k/slide_size),:] = np.array([0,0,0,1,0,0,0,0])
			elif pickup > window_size * threshold / 100:
				y[int(k/slide_size),:] = np.array([0,0,0,0,1,0,0,0])
			elif run > window_size * threshold / 100:
				y[int(k/slide_size),:] = np.array([0,0,0,0,0,1,0,0])
			elif sitdown > window_size * threshold / 100:
				y[int(k/slide_size),:] = np.array([0,0,0,0,0,0,1,0])
			elif standup > window_size * threshold / 100:
				y[int(k/slide_size),:] = np.array([0,0,0,0,0,0,0,1])
			else:
				y[int(k/slide_size),:] = np.array([1,0,0,0,0,0,0,0])
			k += slide_size

		yy = np.concatenate((yy, y),axis=0)
	print(xx.shape,yy.shape)
	return (xx, yy)


#### Main ####
if not os.path.exists("input_files/"):
        os.makedirs("input_files/")

for i, label in enumerate (["bed"]):
	filepath1 = "./drive/My Drive/WifiActivityRecognition/Dataset/input_*" + str(label) + "*.csv"
	filepath2 = "./drive/My Drive/WifiActivityRecognition/Dataset/annotation_*" + str(label) + "*.csv"
	outputfilename1 = "./drive/My Drive/WifiActivityRecognition/input_files/xx_" + str(window_size) + "_" + str(threshold) + "_" + label + ".csv"
	outputfilename2 = "./drive/My Drive/WifiActivityRecognition/input_files/yy_" + str(window_size) + "_" + str(threshold) + "_" + label + ".csv"

	x, y = dataimport(filepath1, filepath2)
	with open(outputfilename1, "w") as f:
		writer = csv.writer(f, lineterminator="\n")
		writer.writerows(x)
	with open(outputfilename2, "w") as f:
		writer = csv.writer(f, lineterminator="\n")
		writer.writerows(y)
	print(label + "finish!")

In [0]:
#Number of windows/samples_size of a particular activity in a csv file
import numpy as np
import pandas as pd
import csv

file = pd.read_csv("./drive/My Drive/ActivityRecognition/input_files/yy_1000_60_bed.csv")
file = np.array(file)
np.shape(np.where(file[:,1] == 1))

In [0]:
#Now Keeping Only activity Data and Discarding NoActivity Data and converting to 500Hz to limit RAM usage and avoid memory error.
#Again you need to repeat this with every activity file.

import pandas as pd
import numpy as np
import csv
import glob


def csv_write():
    window_size = 500
    threshold = 60
    print("csv file importing...")

    for i in ["walk"]:
        label = i
        SKIPROW = 2 #Skip every 2 rows -> overlap 800ms to 600ms  (To avoid memory error)
        num_lines = sum(1 for l in open("./drive/My Drive/input_files/xx_1000_60_" + str(i) + ".csv"))
        skip_idx = [x for x in range(1, num_lines) if x % SKIPROW !=0]

        xx = np.array(pd.read_csv("./drive/My Drive/WifiActivityRecognition/input_files/xx_1000_60_" + str(i) + ".csv", header=None, skiprows = skip_idx))
        yy = np.array(pd.read_csv("./drive/My Drive/WifiActivityRecognition/input_files/yy_1000_60_" + str(i) + ".csv", header=None, skiprows = skip_idx))
        print("Read Done")
        # eliminate the NoActivity Data
        rows = np.where(yy[:,0] == 1)
        xx = np.delete(xx, rows,0)
        print("Eliminate Done Done")

        xx = xx.reshape(len(xx),1000,90)
        
        # 1000 Hz to 500 Hz (To avoid memory error)
        xx = xx[:,::2,:90]

        #Rehsape Back to save in CSV
        xx = xx.reshape(-1,500*90)
        print("Downsampling Done")
        print(str(i), "finished...", "xx=", xx.shape, "yy=",  yy.shape)
        
        #Write Data in CSV Files to be imported to train LSTM
        outputfilename1 = "./drive/My Drive/WifiActivityRecognition/input_files_2/xxx_" + str(window_size) + "_" + str(threshold) + "_" + label + ".csv"
        with open(outputfilename1, "w") as f:
          writer = csv.writer(f, lineterminator="\n")
          writer.writerows(xx)
        print(label + "finish!")
       
      
csv_write()

In [0]:
#Importing Data for LSTM
import pandas as pd
import numpy as np
import glob
import csv
x = np.empty(shape = [0,500,90], dtype = float)
y = np.empty(shape = [0,7], dtype = float)
for i in ["bed", "fall", "pickup", "run", "sitdown", "standup", "walk"]:
  f = "./drive/My Drive/WifiActivityRecognition/input_files_2/xxx_500_60_" + str(i) + ".csv"
  print("input_file_name=",f)
  data = [[ float(elm) for elm in v] for v in csv.reader(open(f, "r"))]
  tmp1 = np.array(data)
  tmp1 = np.reshape(tmp1,[-1,500,90])
  r,c,w = np.shape(tmp1)
  x = np.concatenate((x,tmp1),axis = 0)
  yy = np.empty([r,7],float)
  if i == "bed" :
     yy[:,:] = np.array([1.0,0.0,0.0,0.0,0.0,0.0,0.0])
  elif i == "fall":
     yy[:,:] = np.array([0.0,1.0,0.0,0.0,0.0,0.0,0.0])
  elif i == "pickup":
     yy[:,:] = np.array([0.0,0.0,1.0,0.0,0.0,0.0,0.0])
  elif i == "run":
     yy[:,:] = np.array([0.0,0.0,0.0,1.0,0.0,0.0,0.0])
  elif i == "sitdown":
     yy[:,:] = np.array([0.0,0.0,0.0,0.0,1.0,0.0,0.0])
  elif i == "standup":
     yy[:,:] = np.array([0.0,0.0,0.0,0.0,0.0,1.0,0.0])
  elif i == "walk":
     yy[:,:] = np.array([0.0,0.0,0.0,0.0,0.0,0.0,1.0])
      
  y = np.concatenate((y,yy),axis = 0)
  
  print(np.shape(x))
  print(np.shape(y))
  print(str(i) + "done")

In [0]:
perm = np.arange(x.shape[0])
np.random.shuffle(perm)
x = x[perm]
y = y[perm]

div_idx = int(np.ceil(0.9 * x.shape[0]))
x_train, x_test = x[:div_idx, ...], x[div_idx:, ...]
y_train, y_test = y[:div_idx, ...], y[div_idx:, ...]

In [0]:
#Save perm from drive (In case if training stops and you want to recover test and training split from drive)

outputfilename1 = "./drive/My Drive/WifiActivityRecognition/perm/perm.csv"
with open(outputfilename1, "w") as f:
  writer = csv.writer(f, lineterminator="\n")
  writer.writerows(map(lambda n: [n], perm))
		

In [0]:
#Load perm from drive
f = "./drive/My Drive/WifiActivityRecognition/perm/perm2.csv"
print("input_file_name=",f)
perm = np.array([[ int(elm) for elm in v] for v in csv.reader(open(f, "r"))])
perm = perm[:]
x = x[perm]
y = y[perm]

train_idx = int(np.ceil(0.8 * x.shape[0]))
val_idx = int(np.ceil(0.9 * x.shape[0]))

x_train, x_val, x_test = x[:train_idx, ...], x[train_idx:val_idx:, ...], x[val_idx:, ...]
y_train, y_val, y_test = y[:train_idx, ...], y[train_idx:val_idx:, ...], y[val_idx:, ...]

x_train, x_val, x_test = np.squeeze(x_train), np.squeeze(x_val), np.squeeze(x_test)
y_train, y_val, y_test = np.squeeze(y_train), np.squeeze(y_val), np.squeeze(y_test)

In [0]:
#Machine Learning Model
#Plot Loss
%matplotlib inline

import keras
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Flatten, Dense, Activation

import numpy as np
from matplotlib import pyplot as plt
from IPython.display import clear_output

import tensorflow as tf
import pandas as pd
import glob
import os

from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, GRU, Embedding, LSTM,Dropout
from tensorflow.python.keras.optimizers import Adam,SGD,RMSprop
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from tensorflow.python.keras.layers import BatchNormalization
from keras import metrics


class PlotLosses(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="loss")
        plt.plot(self.x, self.val_losses, label="val_loss")
        plt.legend()
        plt.show();
        
plot_losses = PlotLosses()


class PlotLearning(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        self.acc = []
        self.val_acc = []
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.acc.append(logs.get('acc'))
        self.val_acc.append(logs.get('val_acc'))
        self.i += 1
        f, (ax1, ax2) = plt.subplots(1, 2, sharex=True)
        
        clear_output(wait=True)
        
        ax1.set_yscale('log')
        ax1.plot(self.x, self.losses, label="loss")
        ax1.plot(self.x, self.val_losses, label="val_loss")
        ax1.legend()
        
        ax2.plot(self.x, self.acc, label="accuracy")
        ax2.plot(self.x, self.val_acc, label="validation accuracy")
        ax2.legend()
        
        plt.show();
        
plot = PlotLearning()

#Machine Learning Model for TPU
%matplotlib inline
print(keras.__version__)
model = Sequential()
model.add(LSTM(200, input_shape=(500,90),unit_forget_bias=True,bias_initializer="zeros",return_sequences = False))
model.add(Dense(7,activation = 'softmax'))
optimizer=tf.train.AdamOptimizer(learning_rate=0.0001)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
tf.logging.set_verbosity(tf.logging.INFO)

tpu_model = tf.contrib.tpu.keras_to_tpu_model(
    model,
    strategy=tf.contrib.tpu.TPUDistributionStrategy(
        tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)))

tpu_model.summary()
tpu_model.save('./drive/My Drive/WifiActivityRecognition/keras_models/7-activity-2/model_arch.json', overwrite=True)

mc = keras.callbacks.ModelCheckpoint('./drive/My Drive/WifiActivityRecognition/keras_models/7-activity-2/weights{epoch:08d}.h5', 
                                     save_weights_only=True, period=50)

history = tpu_model.fit(x_train, y_train,
                        epochs=2000,
                        batch_size=128 * 8,
                        validation_split=0.2,shuffle = True,callbacks=[plot,mc])

tpu_model.save('./drive/My Drive/WifiActivityRecognition/keras_models/7-activity-2/model.h5', overwrite=True)


In [0]:
#Make confustion Matrix
#Machine Learning Predictor for TPU

%matplotlib inline

import tensorflow as tf
import pandas as pd
import numpy as np
import glob
import os


from matplotlib import pyplot as plt
from IPython.display import clear_output

from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, GRU, Embedding, LSTM,Dropout,TimeDistributed, Bidirectional, Dropout
from tensorflow.python.keras.optimizers import Adam,SGD,RMSprop
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from tensorflow.python.keras.layers import BatchNormalization
from mlxtend.evaluate import confusion_matrix

model = Sequential()
model.add(Bidirectional(LSTM(200), input_shape=(500, 90), merge_mode = 'concat'))
model.add(Dropout(0.6))
model.add(Dense(8, activation='softmax'))

optimizer=tf.train.AdamOptimizer(learning_rate=0.0001)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

model.load_weights('./drive/My Drive/WifiActivityRecognition/keras_models/8-activity-3/model00001950.h5')

n=np.shape(x_test)
n= n[0] - n[0]%8;
x_test = x_test[:n,:,:];
y_test = y_test[:n,:];
pred = model.predict(x_test)

cm = confusion_matrix(y_target=np.argmax(y_test,1),y_predicted=np.argmax(pred,1),binary=False)
cm
