In [1]:
import argparse
import os
import time
import random
import torch
import torch.nn as nn
import numpy as np
from utils import *
from model import *
from sklearn.metrics import recall_score, f1_score, accuracy_score



In [2]:
class Args:
    def __init__(self):
        self.config = 'default'
        self.dataset = 'pamap2'
        self.model = 'UniTS'
        self.log = 'log'
        self.exp = ''
        self.seed = 0
        self.ratio = 0.2
        self.n_gpu = 2
        # no of epochs for pamap2 is set to 25
        self.epochs = 25
        self.lr = 1e-3
        self.batch_size = 64
        self.save = True #'BCE'
        self.test_only = False
        self.input_size = 171 # 24 #256    
        self.input_channel = 27 # 113 #45
        self.hheads = 9
        self.SENSOR_AXIS = 3
        
    
    # def corrupt1(self,x):
    #     print(type(x))
    #     #sigma = 0.2
    #     noise = self.sigma * (torch.randn(x.size()).type_as(x))
    #     return x + noise
    
    
        
args = Args()
args.num_labels=12

args.log_path = os.path.join(args.log, args.dataset)
if not os.path.exists(args.log_path):
    os.mkdir(args.log_path)
torch.cuda.set_device(args.n_gpu)
args.model_save_path = os.path.join(args.log_path, args.model + '_'+ args.config + '.pt')
config = read_config(args.config + '.yaml')


In [3]:
def corrupt1(x, sigma=0.2):
    print(type(x))
    sigma = 0.2
    noise = sigma * (torch.randn(x.size()).type_as(x))
    return x + noise

In [204]:
def corrupt2(x, sigma=[50,80]):
        # print(x.shape)
        # current: torch.Size([18944, 1, 27, 171])
        # expected: torch.Size([171, 64, 27])
        #18979,171,27
        # time * batch_size * feature
        # lambdas reuse the sigma variable, unpack
        #sigma =[40,80]
        lambda_corr = sigma[0] # lambda for missing data period
        lambda_norm = sigma[1] # lambda for normal data periodß
        # corrupted_x = copy.deepcopy(x)
        
        # failure_mat = np.random.uniform(size = x.shape) < failure_rate
        # num_failures = np.sum(failure_mat)
        # failure_durations = np.random.exponential(scale = duration_scale, size = num_failures).astype(int)
        mask = torch.ones_like(x)
        #print(mask.shape)
        # failure_id = 0
        # sample_id is the batch size : 64
        # ch_id is the features: 27
        # mask.shape[0]: 171
        
        for sample_id in range(mask.shape[0]):
            for ch_id in range(mask.shape[2]):  
                ptr = 0
                is_corrupted = False
                while ptr < mask.shape[1]:
                    if is_corrupted:
                        corr_duration = int(np.random.exponential(scale=lambda_corr))
                        #  mask[ptr:min(mask.shape[0], ptr + corr_duration), sample_id, ch_id] = 0
                        mask[sample_id ,ch_id, ptr:min(mask.shape[1], ptr + corr_duration)] = 0
                        ptr = min(mask.shape[1], ptr + corr_duration)
                        is_corrupted = False
                    else:
                        norm_duration = int(np.random.exponential(scale=lambda_norm))
                        ptr = min(mask.shape[1], ptr + norm_duration)
                        is_corrupted = True
        #print(mask)
        return torch.mul(x, mask)   
        
       

In [3]:
def corrupt3(x, sigma=[0.1, 50,80]):
        # print(x.shape)
        # current: torch.Size([18944, 1, 27, 171])
        # expected: torch.Size([171, 64, 27])
        #189882,171,27
        noise = sigma[0] * (torch.randn(x.size()).type_as(x))
        x= x+noise
        # time * batch_size * feature
        # lambdas reuse the sigma variable, unpack
        lambda_corr = sigma[1] # lambda for missing data period
        lambda_norm = sigma[2] # lambda for normal data periodß
        # corrupted_x = copy.deepcopy(x)
        
        # failure_mat = np.random.uniform(size = x.shape) < failure_rate
        # num_failures = np.sum(failure_mat)
        # failure_durations = np.random.exponential(scale = duration_scale, size = num_failures).astype(int)
        mask = torch.ones_like(x)
        #print(mask.shape)

        #print(x.shape)
        # failure_id = 0
        # sample_id is the batch size : 64
        # ch_id is the features: 27
        # mask.shape[0]: 171
        
        for sample_id in range(mask.shape[0]):
            for ch_id in range(mask.shape[2]):  
                ptr = 0
                is_corrupted = False
                while ptr < mask.shape[1]:
                    if is_corrupted:
                        corr_duration = int(np.random.exponential(scale=lambda_corr))
                        #  mask[ptr:min(mask.shape[0], ptr + corr_duration), sample_id, ch_id] = 0
                        mask[sample_id ,ch_id, ptr:min(mask.shape[1], ptr + corr_duration)] = 0
                        ptr = min(mask.shape[1], ptr + corr_duration)
                        is_corrupted = False
                    else:
                        norm_duration = int(np.random.exponential(scale=lambda_norm))
                        ptr = min(mask.shape[1], ptr + norm_duration)
                        is_corrupted = True
        #print(mask)
        return torch.mul(x, mask)   
        
       

In [4]:
'''
Commented our parse_args to use in jupyter notebook
'''
# def parse_args():
#     parser = argparse.ArgumentParser(description='train and test')
#     parser.add_argument('--config', default = 'default', type =str) # Read UniTS hyperparameters
#     parser.add_argument('--dataset', default = 'opportunity_lc', type = str,
#                         choices=['opportunity_lc', 'seizure', 'wifi', 'keti'])
#     parser.add_argument('--model', default='UniTS', type=str,
#                         choices=['UniTS', 'THAT', 'RFNet', 'ResNet', 'MaDNN', 'MaCNN', 'LaxCat', 'static'])
#     parser.add_argument('--seed', default=0, type=int)
#     parser.add_argument('--log', default='log', type=str,
#                         help="Log directory")
#     parser.add_argument('--exp', default='', type=str,
#                         choices = ['','noise','missing_data'])
#     parser.add_argument('--ratio', default=0.2, type=float)
#     parser.add_argument('--n_gpu', default=0, type =int)
    
#     parser.add_argument('--epochs', default = 50, type = int)
#     parser.add_argument('--lr', default = 1e-3, type = float)
#     parser.add_argument('--batch_size', default = 64, type = int)

#     parser.add_argument('--save', action = 'store_true')
#     parser.add_argument('--test_only', action = 'store_true')
#     args = parser.parse_args()
#     config = read_config(args.config + '.yaml')
#     if not os.path.exists(args.log):
#         os.mkdir(args.log)
#     args.log_path = os.path.join(args.log, args.dataset)
#     if not os.path.exists(args.log_path):
#         os.mkdir(args.log_path)
#     torch.cuda.set_device(args.n_gpu)

#     if args.dataset == 'opportunity_lc':
#         args.input_size = 256
#         args.input_channel = 45
#         args.hheads = 9
#         args.SENSOR_AXIS = 3
#     elif args.dataset == 'seizure':
#         args.input_channel = 18
#         args.input_size = 256
#         args.hheads = 6
#         args.SENSOR_AXIS = 1
#     elif args.dataset == 'wifi':
#         args.input_channel = 180
#         args.input_size = 256
#         args.batch_size = 16
#         args.hheads = 9
#         args.SENSOR_AXIS = 3
#     elif args.dataset == 'keti':
#         args.input_channel = 4
#         args.input_size = 256
#         args.hheads = 4
#         args.SENSOR_AXIS = 1
#     args.model_save_path = os.path.join(args.log_path, args.model + '_'+ args.config + '.pt')
#     return args, config

# args, config = parse_args()
log = set_up_logging(args, config)
args.log = log

layer_num:	1

window_list:	[7, 16, 32, 48, 64, 80, 96, 112, 128]

stride_list:	[3, 8, 16, 24, 32, 40, 48, 56, 64]

k_list:	[3, 8, 16, 24, 24, 32, 32, 40, 40]

hidden_channel:	48



In [5]:
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sliding_window import sliding_window
import pickle as cp
import pandas as pd

In [None]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00231/PAMAP2_Dataset.zip --no-check-certificate

In [None]:
import zipfile
with zipfile.ZipFile("PAMAP2_Dataset.zip","r") as zip_ref:
    zip_ref.extractall(".")

In [6]:
window_len = 512 # 512
stride_len = 20 # 100
act_list = [1, 2, 3, 4, 5, 6, 7, 12, 13, 16, 17, 24]
# act_list = [1, 2]
act_labels_txt = ['lay', 'sit', 'std', 'wlk', 'run', 'cyc', 'nord', 'ups', 'dws', 'vac', 'iron', 'rop']


In [7]:
data=[]
user_labels=[]
act_labels=[]

# columns for IMU data
# 4-20 IMU hand
# 21-37 IMU chest
# 38-54 IMU ankle
# 2-4 3D-acceleration data (ms-2), scale: ±16g, resolution: 13-bit
# 8-10 3D-gyroscope data (rad/s)
# 11-13 3D-magnetometer data (μT)
imu_locs = [4,5,6, 10,11,12, 13,14,15, 
            21,22,23, 27,28,29, 30,31,32, 
            38,39,40, 44,45,46, 47,48,49
           ] 

# acc=0,1,2,9,10,11,18,19,20
# gyrp 3,4,5, 12,13,14,21,22,23
# mag 6,7,8,15,16,17, 24,25,26

# scaler = StandardScaler()

for uid in np.arange(1,10):
    path = '....../PAMAP2_Dataset/Protocol/subject10' + str(uid) + '.dat'
    df = pd.read_table(path, sep=' ', header=None)
    act_imu_filter = df.iloc[:, imu_locs] 


    for act_id in range(len(act_list)):
        act_filter =  act_imu_filter[df.iloc[:, 1] == act_list[act_id]]
        act_data = act_filter.to_numpy()
        act_data = np.transpose(act_data)

        # sliding window segmentation
        start_idx = 0
        while start_idx + window_len < act_data.shape[1]:
            window_data = act_data[:, start_idx:start_idx+window_len] 
            downsamp_data = window_data[:, ::3] # downsample from 100hz to 33.3hz
            downsamp_data = np.nan_to_num(downsamp_data) # remove nan
            # downsamp_data = np.transpose(downsamp_data) # dim: seq_len, feature_len

            data.append(downsamp_data)
            user_labels.append(uid)
            act_labels.append(act_id)
            start_idx = start_idx + stride_len
            
data = np.array(data).astype('float32')

normalized_X = np.zeros_like(data) # allocate numpy array for normalized data
for ch_id in range(data.shape[1]): # loop the 27 sensor channels
    ch_data = data[:, ch_id, :] # the data of channel id
    scaler = MinMaxScaler() # maybe different scalers?
    ch_data = scaler.fit_transform(ch_data) # scale the data in this channel to [0,1]
    normalized_X[:, ch_id, :] = ch_data # assign normalized data to normalized_X
normalized_X = np.transpose(normalized_X, (0, 2, 1)) # overwrote X here, changed dimensions into: num_samples, sequence_length, feature_length
        
    # convert list to numpy array
    # normalized_X= normalized_X.reshape(normalized_X.shape[0], 1, normalized_X.shape[1], normalized_X.shape[2]) 
act_labels = np.array(act_labels).astype('float32')

In [8]:
normalized_X=torch.Tensor(normalized_X)

In [9]:
xtrain, xtest, ytrain, ytest = train_test_split(normalized_X, act_labels, test_size=args.ratio, random_state=42)

Create a copy of xtest for evaluation

In [10]:
Xtest = xtest

In [11]:
xtrain = corrupt3(xtrain)
xtest = corrupt3(xtest)

## Our pre-processing

In [12]:
print(config.window_list)
print(config.stride_list)
print(config.k_list)
print(config.layer_num)
print(config.hidden_channel)
# Adjust model parameters based on our preprocessing
# config.window_list = [7,16,32, 48, 64, 80]
config.window_list = [7, 16, 32, 48, 64, 80, 96, 112, 128]
# config.stride_list = [3, 8, 16, 24, 32, 40]
config.stride_list = [3, 8, 16, 24, 32, 40, 48, 56, 64]
# config.k_list = [3, 8, 16, 24, 24, 32]
config.k_list = [3, 8, 16, 24, 24, 32, 32, 40, 40]

[7, 16, 32, 48, 64, 80, 96, 112, 128]
[3, 8, 16, 24, 32, 40, 48, 56, 64]
[3, 8, 16, 24, 24, 32, 32, 40, 40]
1
48


In [13]:
def test(model, xtest, ytest):
    # choose accordingly

    #noise = sigma * (torch.randn(xtest.size()).type_as(xtest))
    #xtest = corrupt3(xtest, sigma)
    #xtest=xtest+noise
    
    y_pred = []
    y_true = []
    with torch.no_grad():
        model.eval()
        for i in range(0, len(xtest), args.batch_size):
            if i + args.batch_size <= len(xtest):
               
                x = torch.Tensor(xtest[i: i+args.batch_size]).cuda()
               # x_new = float(x.item())
                # print(type(ytest[i: i+args.batch_size]))
                y_true += list(ytest[i: i+args.batch_size])
            else:
                x = torch.Tensor(xtest[i:]).cuda()
                y_true += list(ytest[i:])
            out = model(x)
            pred = torch.argmax(out, dim = -1)
            y_pred += pred.cpu().tolist()

    log("Accuracy : " + str(accuracy_score(y_true, y_pred)) +
        "\nWeighted F1 : " + str(f1_score(y_true, y_pred, labels=list(range(args.num_labels)),average='weighted')) )

In [14]:
if args.model == 'UniTS':
    model = UniTS(input_size = args.input_size, sensor_num = args.input_channel, layer_num = config.layer_num,
    window_list = config.window_list,  stride_list = config.stride_list, k_list = config.k_list,
    out_dim = args.num_labels, hidden_channel = config.hidden_channel).cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
# elif args.model == 'static':
#     model = static_UniTS(input_size = args.input_size, sensor_num = args.input_channel, layer_num = config.layer_num,
#     window_list = config.window_list, stride_list = config.stride_list, k_list = config.k_list,
#     out_dim = args.num_labels, hidden_channel = config.hidden_channel).cuda()

#     optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
# elif args.model == 'THAT':
#     args.hlayers = 5
#     args.vlayers = 1
#     args.vheads = 16
#     args.K = 10
#     args.sample = 4
#     model = HARTrans(args).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

# elif args.model == 'RFNet':
#     model = RFNet(num_classes = args.num_labels, input_channel = args.input_channel, win_len = args.input_size).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
# elif args.model == 'ResNet':
#     model = ResNet(input_size = args.input_size, input_channel = args.input_channel, num_label = args.num_labels).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)        

# elif args.model == 'MaDNN':
#     model = MaDNN(input_size = args.input_size, input_channel = args.input_channel, num_label = args.num_labels).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)   

# elif args.model == 'MaCNN':
#     model = MaCNN(input_size = args.input_size, input_channel = args.input_channel, num_label = args.num_labels, 
#         sensor_num = int(args.input_channel / args.SENSOR_AXIS)).cuda()
#     optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
# elif args.model == 'LaxCat':
#     model = LaxCat(input_size = args.input_size, input_channel = args.input_channel, num_label = args.num_labels,
#         hidden_dim = 64, kernel_size = 32, stride = 8).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

total_params = sum(p.numel() for p in model.parameters())
log('Total parameters: ' + str(total_params))

if args.test_only:
    if os.path.exists(args.model_save_path):
        model.load_state_dict(torch.load(args.model_save_path))
        # changing while testing
        test(model, xtest, ytest)
        #test(model, xtest, ytest,sigma=[0.3, 60, 80])

    else:
        log("Model state dict not found!")
    # return


Total parameters: 2941078


To load path during testing

In [18]:
model.load_state_dict(torch.load(args.model_save_path))

<All keys matched successfully>

In [None]:
torch.load(args.model_save_path).keys()

odict_keys(['ts_encoders.0.FIC.conv.weight', 'ts_encoders.0.RPC.weight', 'ts_encoders.0.RPC.bias', 'ts_encoders.1.FIC.conv.weight', 'ts_encoders.1.RPC.weight', 'ts_encoders.1.RPC.bias', 'ts_encoders.2.FIC.conv.weight', 'ts_encoders.2.RPC.weight', 'ts_encoders.2.RPC.bias', 'ts_encoders.3.FIC.conv.weight', 'ts_encoders.3.RPC.weight', 'ts_encoders.3.RPC.bias', 'ts_encoders.4.FIC.conv.weight', 'ts_encoders.4.RPC.weight', 'ts_encoders.4.RPC.bias', 'ts_encoders.5.FIC.conv.weight', 'ts_encoders.5.RPC.weight', 'ts_encoders.5.RPC.bias', 'ts_encoders.6.FIC.conv.weight', 'ts_encoders.6.RPC.weight', 'ts_encoders.6.RPC.bias', 'ts_encoders.7.FIC.conv.weight', 'ts_encoders.7.RPC.weight', 'ts_encoders.7.RPC.bias', 'ts_encoders.8.FIC.conv.weight', 'ts_encoders.8.RPC.weight', 'ts_encoders.8.RPC.bias', 'multi_channel_fusion.0.weight', 'multi_channel_fusion.0.bias', 'multi_channel_fusion.1.weight', 'multi_channel_fusion.1.bias', 'multi_channel_fusion.2.weight', 'multi_channel_fusion.2.bias', 'multi_channe

In [None]:
# random.seed(args.seed)
# random.shuffle(xtrain)
# random.seed(args.seed)
# random.shuffle(ytrain)

In [15]:
loss_func = nn.CrossEntropyLoss()
try:
    for ep in range(1, 1+args.epochs):
        model.train()
        epoch_loss = 0
        log("Training epoch : " + str(ep))
        for i in range(0, len(xtrain), args.batch_size):
            if i + args.batch_size <= len(xtrain):
                x = torch.Tensor(xtrain[i: i+args.batch_size]).cuda()

                y = torch.LongTensor(ytrain[i: i+args.batch_size]).cuda()  
            else:
                x = torch.Tensor(xtrain[i:]).cuda()
                y = torch.LongTensor(ytrain[i:]).cuda()                      
            out = model(x)
            loss = loss_func(out, y)
            epoch_loss += loss.cpu().item()

            optimizer.zero_grad()           
            loss.backward()
            optimizer.step()

        log("Training loss : " + str(epoch_loss / (i / args.batch_size + 1)))
        # change while training, sigma value is passed here
        #test(model, xtrain, ytrain,sigma=0.2)
        test(model, xtest, ytest)

        #test(model, xtest, ytest,sigma=[0.2, 50,80])

        log("----------------------------")


except KeyboardInterrupt:
    print('Exiting from training early')
    test(model, xtest, ytest)
if args.save:
    torch.save(model.state_dict(), args.model_save_path)

Training epoch : 1
Training loss : 0.4017756482917367
Accuracy : 0.9347700089572686
Weighted F1 : 0.9349686140213165
----------------------------
Training epoch : 2
Training loss : 0.14318107347464676
Accuracy : 0.9603772590758206
Weighted F1 : 0.9604980870861523
----------------------------
Training epoch : 3
Training loss : 0.09118237012013379
Accuracy : 0.9643289952052269
Weighted F1 : 0.9643566856442122
----------------------------
Training epoch : 4
Training loss : 0.06679324012248923
Accuracy : 0.9753411665525054
Weighted F1 : 0.9754223977915714
----------------------------
Training epoch : 5
Training loss : 0.0517402682595603
Accuracy : 0.9836134675167291
Weighted F1 : 0.9836297500309151
----------------------------
Training epoch : 6
Training loss : 0.04300300183865554
Accuracy : 0.985299541598609
Weighted F1 : 0.9852842866129785
----------------------------
Training epoch : 7
Training loss : 0.034383821941593624
Accuracy : 0.9851414721534327
Weighted F1 : 0.9851596910412089
--

In [16]:
torch.save(model.state_dict(), args.model_save_path)

Evaluation

In [30]:
#xtest_eval = corrupt1(Xtest,sigma=0.05)
xtest_eval = corrupt3(Xtest,sigma=[0.2, 60, 80])


test(model, xtest_eval, ytest)

Accuracy : 0.9905158332894252
Weighted F1 : 0.9905164607783523
