In [2]:
import argparse
import os
import time
import random
import torch
import torch.nn as nn
import numpy as np
from utils import *
from model import *
from sklearn.metrics import recall_score, f1_score, accuracy_score

In [3]:
class Args:
    def __init__(self):
        self.config = 'default'
        self.dataset = 'pamap2'
        self.model = 'UniTS'
        self.log = 'log'
        self.exp = ''
        self.seed = 0
        self.ratio = 0.2
        self.n_gpu = 2
        self.epochs = 25
        self.lr = 1e-3
        # self.sigma = 0.35
        self.batch_size = 64
        self.save = True #'BCE'
        self.test_only = False
        self.input_size = 171 # 24 #256    
        self.input_channel = 27 # 113 #45
        self.hheads = 9
        self.SENSOR_AXIS = 3
        
        # self.momentum = 0.1
        # self.c = 0.01
        # self.svmLR = 1e-4
        # self.Ntest = 100
        # self.gpuNo = 2
        # self.cuda_id = 2
        # self.multimodalZ = False
        # self.window_len = 512
        # self.stride_len = 20
        # self.act_list = [1, 2, 3, 4, 5, 6, 7, 12, 13, 16, 17, 24]
        # self.imSize = 64
        # self.sigma = [60, 80]
        # # self.sigma = 0.3
        # self.random_seed = 0
        # self.train_split = 0.8
        
args = Args()
args.num_labels=12

args.log_path = os.path.join(args.log, args.dataset)
if not os.path.exists(args.log_path):
    os.mkdir(args.log_path)
torch.cuda.set_device(args.n_gpu)
args.model_save_path = os.path.join(args.log_path, args.model + '_'+ args.config + '.pt')
config = read_config(args.config + '.yaml')


In [4]:
'''
Commented our parse_args to use in jupyter notebook
'''
# def parse_args():
#     parser = argparse.ArgumentParser(description='train and test')
#     parser.add_argument('--config', default = 'default', type =str) # Read UniTS hyperparameters
#     parser.add_argument('--dataset', default = 'opportunity_lc', type = str,
#                         choices=['opportunity_lc', 'seizure', 'wifi', 'keti'])
#     parser.add_argument('--model', default='UniTS', type=str,
#                         choices=['UniTS', 'THAT', 'RFNet', 'ResNet', 'MaDNN', 'MaCNN', 'LaxCat', 'static'])
#     parser.add_argument('--seed', default=0, type=int)
#     parser.add_argument('--log', default='log', type=str,
#                         help="Log directory")
#     parser.add_argument('--exp', default='', type=str,
#                         choices = ['','noise','missing_data'])
#     parser.add_argument('--ratio', default=0.2, type=float)
#     parser.add_argument('--n_gpu', default=0, type =int)
    
#     parser.add_argument('--epochs', default = 50, type = int)
#     parser.add_argument('--lr', default = 1e-3, type = float)
#     parser.add_argument('--batch_size', default = 64, type = int)

#     parser.add_argument('--save', action = 'store_true')
#     parser.add_argument('--test_only', action = 'store_true')
#     args = parser.parse_args()
#     config = read_config(args.config + '.yaml')
#     if not os.path.exists(args.log):
#         os.mkdir(args.log)
#     args.log_path = os.path.join(args.log, args.dataset)
#     if not os.path.exists(args.log_path):
#         os.mkdir(args.log_path)
#     torch.cuda.set_device(args.n_gpu)

#     if args.dataset == 'opportunity_lc':
#         args.input_size = 256
#         args.input_channel = 45
#         args.hheads = 9
#         args.SENSOR_AXIS = 3
#     elif args.dataset == 'seizure':
#         args.input_channel = 18
#         args.input_size = 256
#         args.hheads = 6
#         args.SENSOR_AXIS = 1
#     elif args.dataset == 'wifi':
#         args.input_channel = 180
#         args.input_size = 256
#         args.batch_size = 16
#         args.hheads = 9
#         args.SENSOR_AXIS = 3
#     elif args.dataset == 'keti':
#         args.input_channel = 4
#         args.input_size = 256
#         args.hheads = 4
#         args.SENSOR_AXIS = 1
#     args.model_save_path = os.path.join(args.log_path, args.model + '_'+ args.config + '.pt')
#     return args, config

# args, config = parse_args()
log = set_up_logging(args, config)
args.log = log

layer_num:	1

window_list:	[7, 16, 32, 48, 64, 80, 96, 112, 128]

stride_list:	[3, 8, 16, 24, 32, 40, 48, 56, 64]

k_list:	[3, 8, 16, 24, 24, 32, 32, 40, 40]

hidden_channel:	48



In [9]:
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sliding_window import sliding_window
import pickle as cp
import pandas as pd

In [52]:
def prepare_data_PAMAP2(root_path='../../../PAMAP2_Dataset/Protocol/subject10'):
    X=[]
    user_labels=[]
    act_labels=[]

    window_len = 512
    stride_len = 20
    # columns for IMU data
    imu_locs = [4,5,6, 10,11,12, 13,14,15, 
                21,22,23, 27,28,29, 30,31,32, 
                38,39,40, 44,45,46, 47,48,49
            ] 
    
    act_list = [1, 2, 3, 4, 5, 6, 7, 12, 13, 16, 17, 24]

    scaler = MinMaxScaler()
    # scaler = StandardScaler()

    for uid in np.arange(1,10):
        path = root_path + str(uid) + '.dat'
        df = pd.read_table(path, sep=' ', header=None)
        act_imu_filter = df.iloc[:, imu_locs] 

        for act_id in range(len(act_list)):
            act_filter =  act_imu_filter[df.iloc[:, 1] == act_list[act_id]]
            act_data = act_filter.to_numpy()
                
            act_data = np.transpose(act_data)
            # sliding window segmentation
            start_idx = 0
            while start_idx + window_len < act_data.shape[1]:
                window_data = act_data[:, start_idx:start_idx + window_len]
                downsamp_data = window_data[:, ::3] # downsample from 100hz to 33.3hz
                downsamp_data = np.nan_to_num(downsamp_data) # remove nan

                X.append(downsamp_data)
                user_labels.append(uid)
                act_labels.append(act_id)
                start_idx = start_idx + stride_len

    X_n = np.array(X).astype('float32')

    normalized_X = np.zeros_like(X_n) # allocate numpy array for normalized data
    for ch_id in range(X_n.shape[1]): # loop the 27 sensor channels
        ch_data = X_n[:, ch_id, :] # the data of channel id
        scaler = MinMaxScaler() # maybe different scalers?
        ch_data = scaler.fit_transform(ch_data) # scale the data in this channel to [0,1]
        normalized_X[:, ch_id, :] = ch_data # assign normalized data to normalized_X
    normalized_X = np.transpose(normalized_X, (0, 2, 1)) # overwrote X here, changed dimensions into: num_samples, sequence_length, feature_length
        
    # convert list to numpy array
    # normalized_X= normalized_X.reshape(normalized_X.shape[0], 1, normalized_X.shape[1], normalized_X.shape[2]) 
    act_labels = np.array(act_labels).astype('float32')
    # act_labels = act_labels.reshape(act_labels.shape[0],1)
    # act_labels = to_categorical(act_labels, num_classes=len(act_list))

    return normalized_X, act_labels

In [53]:
normalized_X, act_labels = prepare_data_PAMAP2()

In [54]:
X_train, X_test, y_train, y_test = train_test_split(normalized_X, act_labels, test_size=args.ratio, random_state=42)

In [55]:
# print(X_train.shape)
print(y_train)

[ 8.  5. 10. ...  6.  0.  3.]


## Our pre-processing

In [56]:
# from sliding_window import sliding_window
# import pickle as cp
# from tensorflow.keras.utils import to_categorical


In [57]:
# # Number of Sensor Channels used in the OPPORTUNITY dataset.
# NB_SENSOR_CHANNELS = 113

# # Number of classes in which data is classified (or to be classified).
# NUM_CLASSES = 5

# # Length of the sliding window used to segmenting the time-series-data.
# SLIDING_WINDOW_LENGTH = 24

# # Steps of the sliding window used in segmenting the data.
# SLIDING_WINDOW_STEP = 12

# act_labels_txt = ['std', 'wlk', 'sit', 'lie', 'null']

In [58]:
# def load_dataset(filename):

#     f = open(filename, 'rb')
#     data = cp.load(f)
#     f.close()

#     X_train, y_train = data[0]
#     X_test, y_test = data[1]

#     print(" ..from file {}".format(filename))
#     print(" ..reading instances: train {0}, test {1}".format(X_train.shape, X_test.shape))

#     X_train = X_train.astype(np.float32)
#     X_test = X_test.astype(np.float32)

#     # The targets are casted to int8 for GPU compatibility.
#     y_train = y_train.astype(np.uint8)
#     y_test = y_test.astype(np.uint8)

#     return X_train, y_train, X_test, y_test

# print("Loading Data...")
# X_train, y_train, X_test, y_test = load_dataset('../../../data/oppChallenge_gestures.data')

# assert NB_SENSOR_CHANNELS == X_train.shape[1]
# def opp_sliding_window(data_x, data_y, ws, ss):
#     data_x = sliding_window(data_x,(ws,data_x.shape[1]),(ss,1))
#     data_y = np.asarray([[i[-1]] for i in sliding_window(data_y,ws,ss)])
#     return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8)

# # Sensor data is segmented using a sliding window mechanism
# X_test, y_test = opp_sliding_window(X_test, y_test, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)
# print(" ..after sliding window (testing): inputs {0}, targets {1}".format(X_test.shape, y_test.shape))

# # Data is reshaped since the input of the network is a 4 dimension tensor
# X_test = X_test.reshape((-1, SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS))
# # X_test = np.transpose(X_test, (0, 2, 1))
# # X_test= X_test.reshape(X_test.shape[0], 1, X_test.shape[1], X_test.shape[2]) # convert list to numpy array

In [59]:
# X_train, y_train = opp_sliding_window(X_train, y_train, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)
# print(" ..after sliding window (training): inputs {0}, targets {1}".format(X_train.shape, y_train.shape))
# X_train = X_train.reshape((-1,SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS))
# # X_train = np.transpose(X_train, (0, 2, 1))
# # X_train= X_train.reshape(X_train.shape[0], 1, X_train.shape[1], X_train.shape[2]) # convert list to numpy array

# X_train.shape


In [60]:
# y_train = to_categorical(y_train, num_classes=NUM_CLASSES)
# y_test = to_categorical(y_test, num_classes=NUM_CLASSES)

In [61]:
X_train = list(X_train)
X_test = list(X_test)
xtrain, ytrain, xtest, ytest = X_train, y_train, X_test, y_test

In [62]:
# X_train for OPPO (46495, 24, 113)


In [63]:
print(config.window_list)
print(config.stride_list)
print(config.k_list)
print(config.layer_num)
print(config.hidden_channel)
# Adjust model parameters based on our preprocessing
config.window_list = [7,16,32, 48, 64, 80]
config.stride_list = [3, 8, 16, 24, 32, 40]
config.k_list = [3, 8, 16, 24, 24, 32]

[7, 16, 32, 48, 64, 80]
[3, 8, 16, 24, 32, 40]
[3, 8, 16, 24, 24, 32]
1
48


In [64]:
if args.model == 'UniTS':
    model = UniTS(input_size = args.input_size, sensor_num = args.input_channel, layer_num = config.layer_num,
    window_list = config.window_list, stride_list = config.stride_list, k_list = config.k_list,
    out_dim = args.num_labels, hidden_channel = config.hidden_channel).cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
# elif args.model == 'static':
#     model = static_UniTS(input_size = args.input_size, sensor_num = args.input_channel, layer_num = config.layer_num,
#     window_list = config.window_list, stride_list = config.stride_list, k_list = config.k_list,
#     out_dim = args.num_labels, hidden_channel = config.hidden_channel).cuda()

#     optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
# elif args.model == 'THAT':
#     args.hlayers = 5
#     args.vlayers = 1
#     args.vheads = 16
#     args.K = 10
#     args.sample = 4
#     model = HARTrans(args).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

# elif args.model == 'RFNet':
#     model = RFNet(num_classes = args.num_labels, input_channel = args.input_channel, win_len = args.input_size).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
# elif args.model == 'ResNet':
#     model = ResNet(input_size = args.input_size, input_channel = args.input_channel, num_label = args.num_labels).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)        

# elif args.model == 'MaDNN':
#     model = MaDNN(input_size = args.input_size, input_channel = args.input_channel, num_label = args.num_labels).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)   

# elif args.model == 'MaCNN':
#     model = MaCNN(input_size = args.input_size, input_channel = args.input_channel, num_label = args.num_labels, 
#         sensor_num = int(args.input_channel / args.SENSOR_AXIS)).cuda()
#     optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
# elif args.model == 'LaxCat':
#     model = LaxCat(input_size = args.input_size, input_channel = args.input_channel, num_label = args.num_labels,
#         hidden_dim = 64, kernel_size = 32, stride = 8).cuda()
#     optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

total_params = sum(p.numel() for p in model.parameters())
log('Total parameters: ' + str(total_params))

if args.test_only:
    if os.path.exists(args.model_save_path):
        model.load_state_dict(torch.load(args.model_save_path))
        test(model, xtest, ytest)
    else:
        log("Model state dict not found!")
    # return


Total parameters: 1992630


In [65]:
random.seed(args.seed)
random.shuffle(xtrain)
random.seed(args.seed)
random.shuffle(ytrain)

In [66]:
def test(model, xtest, ytest):
    y_pred = []
    y_true = []
    with torch.no_grad():
        model.eval()
        for i in range(0, len(xtest), args.batch_size):
            if i + args.batch_size <= len(xtest):
                x = torch.Tensor(xtest[i: i+args.batch_size]).cuda()
                # print(type(ytest[i: i+args.batch_size]))
                y_true += list(ytest[i: i+args.batch_size])
            else:
                x = torch.Tensor(xtest[i:]).cuda()
                y_true += list(ytest[i:])
            out = model(x)
            pred = torch.argmax(out, dim = -1)
            y_pred += pred.cpu().tolist()

    log("Accuracy : " + str(accuracy_score(y_true, y_pred)) +
        "\nWeighted F1 : " + str(f1_score(y_true, y_pred, labels=list(range(args.num_labels)),average='weighted')) )

In [67]:
loss_func = nn.CrossEntropyLoss()
try:
    for ep in range(1, 1+args.epochs):
        model.train()
        epoch_loss = 0
        log("Training epoch : " + str(ep))
        for i in range(0, len(xtrain), args.batch_size):
            if i + args.batch_size <= len(xtrain):
                x = torch.Tensor(xtrain[i: i+args.batch_size]).cuda()
                y = torch.LongTensor(ytrain[i: i+args.batch_size]).cuda()  
            else:
                x = torch.Tensor(xtrain[i:]).cuda()
                y = torch.LongTensor(ytrain[i:]).cuda()                      
            out = model(x)
            loss = loss_func(out, y)
            epoch_loss += loss.cpu().item()

            optimizer.zero_grad()           
            loss.backward()
            optimizer.step()

        log("Training loss : " + str(epoch_loss / (i / args.batch_size + 1)))
        test(model, xtest, ytest)
        log("----------------------------")


except KeyboardInterrupt:
    print('Exiting from training early')
    test(model, xtest, ytest)
if args.save:
    torch.save(model.state_dict(), args.model_save_path)

Training epoch : 1
Training loss : 0.22837813920400277
Accuracy : 0.9538437220085357
Weighted F1 : 0.9539509332476872
----------------------------
Training epoch : 2
Training loss : 0.08400580414977685
Accuracy : 0.9726539859845091
Weighted F1 : 0.9727869522095438
----------------------------
Training epoch : 3
Training loss : 0.05026092524370065
Accuracy : 0.9821908425101428
Weighted F1 : 0.9823181681773392
----------------------------
Training epoch : 4
Training loss : 0.03573713502294495
Accuracy : 0.9416723747299647
Weighted F1 : 0.939246228365403
----------------------------
Training epoch : 5
Training loss : 0.029756427915852063
Accuracy : 0.9902523842141314
Weighted F1 : 0.9902329987549917
----------------------------
Training epoch : 6
Training loss : 0.022905549714251162
Accuracy : 0.9936772221929501
Weighted F1 : 0.9936902487255769
----------------------------
Training epoch : 7
Training loss : 0.01972888499169012
Accuracy : 0.9938879814531851
Weighted F1 : 0.9938856541358142

In [68]:
torch.save(model.state_dict(), args.model_save_path)