In [1]:
import os
os.chdir('../')
import numpy as np
from MAT import DEVICE
from MAT.augment import numpy_augment
from MAT.dataset.data_generator import DatasetWindowsFusion
from MAT.models.model_tcn_classifier import TCN
from MAT.common.train import mat_train
from MAT import logger

## hyper-parameters

In [2]:
gamma = 1.0

## load target data

In [3]:
target_data_folder = f'../Dataset/MotionSense'

# data: array shape [num windows, window length, num channels]
target_data = np.load(os.path.join(target_data_folder, "data_new_map.npy"))
# label: array shape [num windows, 2(subject id, categorical label)]
target_label = np.load(os.path.join(target_data_folder, "p_lb_new_map.npy"))

# get training window indices by subject IDs
train_subjects = np.unique(np.linspace(1, 24, 4, False).astype(int))
train_window_idx = np.isin(target_label[:, 0], train_subjects)

# split dataset into train set and test set, remove subject IDs from label array
# data: array shape [num windows, window length, num channels]
# label: array shape [num windows, 1(categorical label)]
target_train_data = target_data[train_window_idx]
target_train_label = target_label[train_window_idx, 1:]
target_valid_data = target_data[~train_window_idx]
target_valid_label = target_label[~train_window_idx, 1:]

logger.info(f"Target train data: {target_train_data.shape}")
logger.info(f"Target train label: {target_train_label.shape}")
logger.info(f"Target valid data: {target_valid_data.shape}")
logger.info(f"Target valid label: {target_valid_label.shape}")

2021-09-29 00:29:51,794 [INFO ] 3281918625 <module>   Target train data: (4626, 300, 6)
2021-09-29 00:29:51,795 [INFO ] 3281918625 <module>   Target train label: (4626, 1)
2021-09-29 00:29:51,795 [INFO ] 3281918625 <module>   Target valid data: (22936, 300, 6)
2021-09-29 00:29:51,795 [INFO ] 3281918625 <module>   Target valid label: (22936, 1)


## load source data

In [4]:
source_data_folder = '../Dataset/MobiActV2'

# indices of selected instances
pick_index = np.load(os.path.join(source_data_folder, 'mobiactv2_r2_pick_index_one-tenth.npy'))
# data: array shape [num windows, window length, num channels]
source_data = np.load(os.path.join(source_data_folder, "data_new.npy"))[pick_index]
# label: array shape [num windows, 2(subject id, categorical label)]
source_label = np.load(os.path.join(source_data_folder, "p_lb_new.npy"))[pick_index]
# only keep label column => shape [num windows, 1(categorical label)]
source_label = source_label[:, 1:]

logger.info(f"Source data: {source_data.shape}")
logger.info(f"Source label: {source_label.shape}")

2021-09-29 00:29:52,080 [INFO ] 706849133  <module>   Source data: (11164, 300, 6)
2021-09-29 00:29:52,080 [INFO ] 706849133  <module>   Source label: (11164, 1)


## set weight and mask for each instance of source/target set

In [5]:
# set weight and mask placeholder for instances from target domain
# array shape [num windows, 3(label, weight, mask)]
target_train_label = np.concatenate([
    target_train_label,
    np.empty([len(target_train_label), 2])
], axis=1)

# set weight and mask placeholder for instances from source domain
# array shape [num windows, 3(label, weight, mask)]
source_label = np.concatenate([
    source_label,
    np.empty([len(source_label), 2])
], axis=1)

# calculate source and target weights
total_train_set_len = len(target_train_label) + len(source_label)
source_weight = total_train_set_len / len(source_label)
target_weight = (total_train_set_len / len(target_train_label)) * gamma
# set sample weight for source and target sets
source_label[:, 1] = source_weight
target_train_label[:, 1] = target_weight

# set multi-task mask
source_label[:, 2] = 0  # source mask
target_train_label[:, 2] = 1  # target mask

## init dataset loader with augmentation

In [6]:
# combine target and source train sets into 1
# data array shape [num windows, window length, num channels]
train_data = np.concatenate([target_train_data, source_data])
# label array shape [num windows, 3(label, weight, mask)]
train_label = np.concatenate([target_train_label, source_label])

# create Dataset object (split data into list of 2 modalities)
train_set = DatasetWindowsFusion(
    [train_data[:, :, :3], train_data[:, :, 3:]],
    train_label,
    augment_rate=0.5,
    augmenter=numpy_augment.Rotate(input_shape=[300, 3],
                                   rotate_x_range=[0., 20.],
                                   rotate_y_range=[0., 20.],
                                   rotate_z_range=[0., 20.])
)
valid_set = DatasetWindowsFusion(
    [target_valid_data[:, :, :3], target_valid_data[:, :, 3:]],
    target_valid_label
)

logger.info(f"Total train data: {train_data.shape}")
logger.info(f"Total train label: {train_label.shape}")
logger.info(f"Train set size: {len(train_set)}")
logger.info(f"Valid set size: {len(valid_set)}")

2021-09-29 00:29:52,112 [INFO ] 1811352841 <module>   Total train data: (15790, 300, 6)
2021-09-29 00:29:52,113 [INFO ] 1811352841 <module>   Total train label: (15790, 3)
2021-09-29 00:29:52,113 [INFO ] 1811352841 <module>   Train set size: 15790
2021-09-29 00:29:52,113 [INFO ] 1811352841 <module>   Valid set size: 22936


## init model

In [7]:
# get number of classes in source/target dataset
n_class_source = len(np.unique(source_label[:, 0]))
n_class_target = len(np.unique(target_train_label[:, 0]))

# init model object
model = TCN(
    n_classes=[n_class_source, n_class_target],
    how_flatten="spatial attention gap",
    n_tcn_channels=(64,) * 6 + (128,) * 2,
    tcn_kernel_size=2,
    dilation_base=2,
    tcn_droprate=0.2,
    use_spatial_dropout=False,
    n_fc_layers=1,
    fc_droprate=0.5,
    use_init_batchnorm=True
).to(DEVICE)

logger.info(f"Number of source classes: {n_class_source}")
logger.info(f"Number of target classes: {n_class_target}")

2021-09-29 00:29:52,144 [INFO ] 2236640326 <module>   Number of source classes: 16
2021-09-29 00:29:52,145 [INFO ] 2236640326 <module>   Number of target classes: 6


## train

In [8]:
mat_train(
    model,
    train_set=train_set,
    valid_set=valid_set,
    weights_save_name="param/demo/mat_motionsense",
    only_save_best_of_best=True,
    save_before_early_stop=False,
    curve_save_name=None,
    learning_rate=1e-3,
    weight_decay=0.,
    batch_size=32,
    max_epoch=100,
    class_weight=None,
    patience=10
)