### Import packages

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
import glob
import time
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

Check if have cuda device (GPU)

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("use", device)

use cuda


### ML model

In [3]:
class Model(nn.Module):
	def __init__(self, n_input = 90, n_hidden = 128, num_layers=1, n_classes = 7):
		super(Model, self).__init__()
			
		self.rnn = nn.LSTM(input_size=n_input, hidden_size=n_hidden, num_layers=num_layers, batch_first=True, bidirectional=False)
		self.classifier = nn.Sequential(
			nn.Linear(n_hidden, n_classes)
		)

	def forward(self, x):
		# (batch_size, n_steps, n_input)
		x, _ = self.rnn(x)
		x = self.classifier(x[:, -1, :])
		return x

### Dataset

Prepare dataset for training

In [4]:
class CSIDataset(Dataset):
	def __init__(
			self, 
			input_path="./Dataset/Data/", 
			processed_path="./Dataset/Processed/", 
			use_processed=True, 
			split="train",
			train_test_ratio=0.8,
			window_size=1000, 
			threshold=60, 
			slide_size=200
			):
		self.window_size = window_size
		self.threshold = threshold
		self.slide_size = slide_size
		# fix seed for reproduce
		np.random.seed(0)

		# create processed data directory
		if not os.path.isdir(processed_path):
			os.makedirs(processed_path)

		# read and concat all data
		self.csi = []
		self.label = []
		for i, label in enumerate (["bed", "fall", "pickup", "run", "sitdown", "standup", "walk"]):
			output_csi_fn = os.path.join(processed_path, label + "_csi.csv")
			output_label_fn = os.path.join(processed_path, label + "_label.csv")
			# use processed, don't need to process again
			if use_processed and os.path.isfile(output_csi_fn) and os.path.isfile(output_label_fn):
				x = pd.read_csv(
					output_csi_fn, 
					header=None, 
					engine="c"
					).to_numpy().reshape((-1, self.window_size, 90))
				y = pd.read_csv(
					output_label_fn, 
					header=None, 
					engine="c"
					).to_numpy()
			else:
				# else, process now
				print("No processed", label, "found, start process")
				x = []
				y = []
				filepath1 = os.path.join(input_path, "input_*" + str(label) + "*.csv")
				filepath2 = os.path.join(input_path, "annotation_*" + str(label) + "*.csv")
				input_csv_files = sorted(glob.glob(filepath1))
				annotation_csv_files = sorted(glob.glob(filepath2))
				# Because naming in the data are inconsistent, we process three part separately.
				input_csv_files_sankalp = [fn for fn in input_csv_files if "sankalp" in fn]
				input_csv_files_siamak = [fn for fn in input_csv_files if "siamak" in fn]
				input_csv_files_date = [fn for fn in input_csv_files if ("siamak" not in fn and "sankalp" not in fn)]
				annotation_csv_files_sankalp = [fn for fn in annotation_csv_files if "sankalp" in fn]
				annotation_csv_files_siamak = [fn for fn in annotation_csv_files if "siamak" in fn]
				annotation_csv_files_date = [fn for fn in annotation_csv_files if ("siamak" not in fn and "sankalp" not in fn)]
				for i in range(len(input_csv_files_sankalp)):
					if not int(input_csv_files_sankalp[i].split('input_')[1].split('.csv')[0].split('_')[-1]) == int(annotation_csv_files_sankalp[i].split('annotation_')[1].split('.csv')[0].split('_')[-1]):
						# file name of input and annotation(label) not match
						raise "error"
					else:
						x_, y_ = self.process_data(input_csv_files_sankalp[i], annotation_csv_files_sankalp[i])
						x.append(x_)
						y.append(y_)
				
				for i in range(len(input_csv_files_siamak)):
					# print(input_csv_files_siamak[i].split('input_')[1].split('.csv')[0], "=", annotation_csv_files_siamak[i].split('annotation_')[1].split('.csv')[0])
					if not int(input_csv_files_siamak[i].split('input_')[1].split('.csv')[0].split('_')[-1]) == int(annotation_csv_files_siamak[i].split('annotation_')[1].split('.csv')[0].split('_')[-1]):
						raise "error"
					else:
						x_, y_ = self.process_data(input_csv_files_siamak[i], annotation_csv_files_siamak[i])
						x.append(x_)
						y.append(y_)
						
				for i in range(len(input_csv_files_date)):
					if not int(input_csv_files_date[i].split('input_')[1].split('.csv')[0].split('_')[-1]) == int(annotation_csv_files_date[i].split('annotation_')[1].split('.csv')[0].split('_')[-1]):
						raise "error"
					else:
						x_, y_ = self.process_data(input_csv_files_date[i], annotation_csv_files_date[i])
						x.append(x_)
						y.append(y_)
				x = np.concatenate(x, axis=0)
				y = np.concatenate(y, axis=0)

				# shuffle x and y together
				idxs = np.arange(x.shape[0])
				np.random.shuffle(idxs)
				x = x[idxs]
				y = y[idxs]

				# save to file for the future use
				np.savetxt(output_csi_fn, x.reshape((-1, self.window_size * 90)), delimiter = ",", fmt='%.4f')
				np.savetxt(output_label_fn, y, delimiter = ",", fmt='%d')
			
			if split == "train":
				# 700 is for balance between 7 class
				self.csi.append(x[:int(x.shape[0] * train_test_ratio)])
				self.label.append(y[:int(x.shape[0] * train_test_ratio)])
			elif split == "test":
				self.csi.append(x[int(x.shape[0] * train_test_ratio):])
				self.label.append(y[int(x.shape[0] * train_test_ratio):])
					
			print("finish", label)
		self.csi = np.concatenate(self.csi, axis=0)
		self.label = np.concatenate(self.label, axis=0)
		self.label = np.squeeze(self.label)
		# some statistic information
		unique, counts = np.unique(self.label, return_counts=True)
		print(dict(zip(unique, counts)))

	def process_data(self, csi_fn, label_fn):
		csi_raw = pd.read_csv(
			csi_fn, 
			header=None, 
			engine="c"
			).to_numpy()
		#data import by slide window
		k = 0
		x_list = []
		while k <= (len(csi_raw) + 1 - 2 * self.window_size):
			# for each row
			# 0: time
			# 1~91: amplitude
			# 91: 181: phase
			single_window = np.array(csi_raw[k:k + self.window_size, 1:91])
			x_list.append(single_window)
			k += self.slide_size
		x = np.dstack(x_list)
		# (window_size, feature, # samples)
		x = np.transpose(x, [2, 0, 1])

		label_raw = pd.read_csv(
			label_fn, 
			header=None, 
			engine="c"
			).to_numpy()
		#data import by slide window
		k = 0
		y_list = []
		while k <= (len(label_raw) + 1 - 2 * self.window_size):
			single_window = label_raw[k:k + self.window_size]
			bed = 0
			fall = 0
			walk = 0
			pickup = 0
			run = 0
			sitdown = 0
			standup = 0
			noactivity = 0
			for j in range(self.window_size):
				if single_window[j] == "bed":
					bed += 1
				elif single_window[j] == "fall":
					fall += 1
				elif single_window[j] == "pickup":
					pickup += 1
				elif single_window[j] == "run":
					run += 1
				elif single_window[j] == "sitdown":
					sitdown += 1
				elif single_window[j] == "standup":
					standup += 1
				elif single_window[j] == "walk":
					walk += 1
				else:
					noactivity += 1

			if bed > self.window_size * self.threshold / 100:
				y_list.append(0)
			elif fall > self.window_size * self.threshold / 100:
				y_list.append(1)
			elif pickup > self.window_size * self.threshold / 100:
				y_list.append(2)
			elif run > self.window_size * self.threshold / 100:
				y_list.append(3)
			elif sitdown > self.window_size * self.threshold / 100:
				y_list.append(4)
			elif standup > self.window_size * self.threshold / 100:
				y_list.append(5)
			elif walk > self.window_size * self.threshold / 100:
				y_list.append(6)
			else:
				y_list.append(-1)
			k += self.slide_size
		y = np.array(y_list)

		# remove no activity
		mask = y != -1
		x = x[mask]
		y = y[mask]
		return x, y

	def __len__(self):
		return self.csi.shape[0]
	
	def __getitem__(self, idx):
		return torch.tensor(self.csi[idx], dtype=torch.float), \
			torch.tensor(self.label[idx], dtype=torch.long)

### Configs

In [5]:
# Parameters
input_path = "./Dataset/Data/"
processed_path = "./Dataset/Processed/"
use_processed = True
window_size = 500
threshold = 60
slide_size = 400

learning_rate = 0.00001
training_epos = 2000
batch_size = 128
display_epo = 50

model = Model()

Dataset instance

training set and testing set

In [6]:
train_set = CSIDataset(
split="train", 
window_size=window_size, 
threshold=threshold, 
slide_size=slide_size, 
use_processed=use_processed, 
input_path=input_path, 
processed_path=processed_path
)
test_set = CSIDataset(
split="test", 
window_size=window_size, 
threshold=threshold, 
slide_size=slide_size, 
use_processed=use_processed, 
input_path=input_path, 
processed_path=processed_path
)

finish bed
finish fall
finish pickup
finish run
finish sitdown
finish standup
finish walk
{0: 536, 1: 375, 2: 405, 3: 980, 4: 340, 5: 252, 6: 1188}
finish bed
finish fall
finish pickup
finish run
finish sitdown
finish standup
finish walk
{0: 135, 1: 94, 2: 102, 3: 246, 4: 85, 5: 64, 6: 298}


### Dataloader, loss function and optimizer

In [7]:
model.to(device)
train_dataloader = DataLoader(train_set, batch_size=batch_size, num_workers=0, shuffle=True)
test_dataloader = DataLoader(test_set, batch_size=batch_size, num_workers=0, shuffle=False)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


### Training main loop

In [8]:
train_losses = []
for epoch_index in range(0, training_epos):
    start_time = time.time()

    train_loss = 0
    model.train()
    correct = 0
    for batch in tqdm(train_dataloader, leave=False):
        csi, label = batch
        csi = csi.to(device)
        label = label.to(device)

        outputs = model(csi)
        loss = criterion(outputs, label)
        # calculate prediction accuracy
        correct += torch.sum(outputs.argmax(dim=1) == label).cpu().item()

        train_loss += loss.item()
        loss.backward()
        optimizer.step()

    train_loss = train_loss / len(train_set)
    accuracy = 100 * correct / len(train_set)
    train_losses.append(train_loss)
    end_time = time.time()
    if epoch_index % display_epo == 0:
        print('epo: ', epoch_index, 
            ', train:', train_loss,
            ', acc: ', round(accuracy, 2),
            ', time:', round((end_time - start_time) / 60, 3))


                                               

epo:  0 , train: 0.015981117660093823 , acc:  21.39 , time: 0.03


                                               

epo:  50 , train: 0.011835653397005135 , acc:  47.45 , time: 0.028


                                               

epo:  100 , train: 0.010469683223898442 , acc:  52.72 , time: 0.029


                                               

epo:  150 , train: 0.009496010964237321 , acc:  58.83 , time: 0.027


                                               

epo:  200 , train: 0.008630880268913958 , acc:  64.65 , time: 0.025


                                               

epo:  250 , train: 0.00784421622226704 , acc:  69.16 , time: 0.024


                                               

epo:  300 , train: 0.007144860448032413 , acc:  72.4 , time: 0.027


                                               

epo:  350 , train: 0.006542048171177229 , acc:  74.53 , time: 0.027


                                               

epo:  400 , train: 0.0060475947695461645 , acc:  76.42 , time: 0.026


                                               

epo:  450 , train: 0.005701899309153646 , acc:  77.09 , time: 0.028


                                               

epo:  500 , train: 0.00527367204924912 , acc:  78.93 , time: 0.025


                                               

epo:  550 , train: 0.004878592151888921 , acc:  80.03 , time: 0.026


                                               

epo:  600 , train: 0.004574928078087553 , acc:  81.53 , time: 0.027


                                               

epo:  650 , train: 0.00430157163543252 , acc:  83.19 , time: 0.025


                                               

epo:  700 , train: 0.0040371621775205984 , acc:  84.1 , time: 0.026


                                               

epo:  750 , train: 0.0038287635888973794 , acc:  85.06 , time: 0.026


                                               

epo:  800 , train: 0.0035960293933383616 , acc:  86.09 , time: 0.025


                                               

epo:  850 , train: 0.00340016551089123 , acc:  87.02 , time: 0.026


                                               

epo:  900 , train: 0.003252030347526015 , acc:  87.61 , time: 0.025


                                               

epo:  950 , train: 0.003084918979456661 , acc:  88.22 , time: 0.025


                                               

epo:  1000 , train: 0.0029262617135363774 , acc:  88.84 , time: 0.025


                                               

epo:  1050 , train: 0.0028170846673176496 , acc:  89.28 , time: 0.026


                                               

epo:  1100 , train: 0.002705941899835888 , acc:  89.79 , time: 0.025


                                               

epo:  1150 , train: 0.002579055426750146 , acc:  90.33 , time: 0.026


                                               

epo:  1200 , train: 0.0025045101487893937 , acc:  90.33 , time: 0.025


                                               

epo:  1250 , train: 0.00241546049143779 , acc:  91.02 , time: 0.026


                                               

epo:  1300 , train: 0.002299887487359557 , acc:  91.12 , time: 0.026


                                               

epo:  1350 , train: 0.0021822005071630655 , acc:  91.68 , time: 0.025


                                               

epo:  1400 , train: 0.0020866479370030147 , acc:  92.32 , time: 0.026


                                               

epo:  1450 , train: 0.00198624493285531 , acc:  92.54 , time: 0.025


                                               

epo:  1500 , train: 0.0018947424917272544 , acc:  92.91 , time: 0.026


                                               

epo:  1550 , train: 0.0018170751974907894 , acc:  93.18 , time: 0.025


                                               

epo:  1600 , train: 0.0017346243453563955 , acc:  93.45 , time: 0.025


                                               

epo:  1650 , train: 0.0016644493374202156 , acc:  93.74 , time: 0.023


                                               

epo:  1700 , train: 0.0015950006879756448 , acc:  94.14 , time: 0.024


                                               

epo:  1750 , train: 0.0015253482370018608 , acc:  94.31 , time: 0.024


                                               

epo:  1800 , train: 0.0014689438907800642 , acc:  94.55 , time: 0.023


                                               

epo:  1850 , train: 0.0014117827092437211 , acc:  94.7 , time: 0.024


                                               

epo:  1900 , train: 0.0013575906827826495 , acc:  94.85 , time: 0.024


                                               

epo:  1950 , train: 0.0012918476973156699 , acc:  95.12 , time: 0.024


                                               

### Testing (on testing set)

In [9]:
test_label = []
test_prediction = []
test_loss = 0
model.eval()
start_time = time.time()
for batch in tqdm(test_dataloader, leave=False):
    csi, label = batch
    csi = csi.to(device)
    label = label.to(device)

    outputs = model(csi)
    loss = criterion(outputs, label)
    test_prediction.append(outputs.argmax(dim=1).cpu().detach().numpy())
    test_label.append(label.cpu().detach().numpy())

    test_loss += loss.item()
end_time = time.time()
test_label = np.concatenate(test_label, axis=0)
test_prediction = np.concatenate(test_prediction, axis=0)
accuracy = np.sum(test_label == test_prediction) / test_label.shape[0]
print('test:', test_loss / len(test_set),
    ', acc: ', round(accuracy, 2),
    ', time:', round((end_time - start_time) / 60, 3))
confusion_matrix = np.zeros((7, 7))
for i in range(test_label.shape[0]):
    confusion_matrix[test_label[i], test_prediction[i]] += 1

                                             

test: 0.0024281635123770684 , acc:  0.9 , time: 0.007




### Confusion matrix

In [10]:
print(confusion_matrix)

[[111.   0.   6.   1.   8.   7.   2.]
 [  0.  92.   0.   0.   0.   0.   2.]
 [  4.   2.  85.   7.   0.   0.   4.]
 [  1.   0.   0. 238.   0.   2.   5.]
 [  4.   0.   3.   1.  66.   5.   6.]
 [  4.   1.   1.   2.   5.  49.   2.]
 [  1.   1.   0.   9.   1.   3. 283.]]
