In [1]:
import os
os.chdir('../')
import numpy as np
import torch as tr
import torch.utils.data as tr_data
from MAT import DEVICE
from MAT.augment import numpy_augment
from MAT.dataset.data_generator import DatasetWindowsFusion
from MAT.models.model_tcn_classifier import TCN
from MAT.common.train import train
from MAT import logger

## hyper-parameters

In [2]:
train_data_fraction = 20

## load data and label, randomly select the first subset

In [3]:
# data, array shape: [num window, window length, channel]
data = np.load("../Dataset/MobiActV2/data_new.npy")
# label, array shape: [num window,]
label = np.load("../Dataset/MobiActV2/p_lb_new.npy")[:, 1]

valid_data = np.copy(data)
valid_label = np.copy(label)

n_classes = len(np.unique(label))
logger.info(f'num class: {n_classes}')

# generate random indices for a subset
half_subset_size = len(random_index)//train_data_fraction
random_index = np.random.permutation(np.arange(len(data)))
remaining_index = random_index[half_subset_size:]
random_index = random_index[:half_subset_size]

# keep only a subset as train set
train_data = data[random_index]
train_label = label[random_index]

del data
del label

logger.info(f"train data: {train_data.shape}, {train_data.dtype}")
logger.info(f"train label: {train_label.shape}, {train_label.dtype}")

logger.info(f"valid data: {valid_data.shape}, {valid_data.dtype}")
logger.info(f"valid label: {valid_label.shape}, {valid_label.dtype}")

2021-10-03 17:16:29,386 [INFO ] 2573394594 <module>   num class: 16
2021-10-03 17:16:29,431 [INFO ] 2573394594 <module>   train data: (5582, 300, 6), float32
2021-10-03 17:16:29,432 [INFO ] 2573394594 <module>   train label: (5582,), int64
2021-10-03 17:16:29,432 [INFO ] 2573394594 <module>   valid data: (111649, 300, 6), float32
2021-10-03 17:16:29,433 [INFO ] 2573394594 <module>   valid label: (111649,), int64


## init dataset loader with augmentation

In [4]:
# create Dataset object (split data into list of 2 modalities)
train_set = DatasetWindowsFusion(
    [train_data[:, :, :3], train_data[:, :, 3:]],
    train_label,
    augment_rate=0.5,
    augmenter=numpy_augment.Rotate(input_shape=[300, 3],
                                   rotate_x_range=[0., 20.],
                                   rotate_y_range=[0., 20.],
                                   rotate_z_range=[0., 20.])
)
valid_set = DatasetWindowsFusion(
    [valid_data[:, :, :3], valid_data[:, :, 3:]],
    valid_label
)

logger.info(f"train set: {len(train_set)}")
logger.info(f"valid set: {len(valid_set)}")

2021-10-03 17:16:29,439 [INFO ] 511878823  <module>   train set: 5582
2021-10-03 17:16:29,440 [INFO ] 511878823  <module>   valid set: 111649


## init model

In [5]:
model = TCN(
    n_classes=n_classes,
    how_flatten="spatial attention gap",
    n_tcn_channels=(64,) * 6 + (128,) * 2,
    tcn_kernel_size=2,
    dilation_base=2,
    tcn_droprate=0.2,
    use_spatial_dropout=False,
    n_fc_layers=1,
    fc_droprate=0.5,
    use_init_batchnorm=True
).to(DEVICE)

## train first sub-set

In [6]:
train(
    model,
    train_set=train_set,
    valid_set=valid_set,
    weights_save_name="source_subset_weight",
    only_save_best_of_best=True,
    save_before_early_stop=False,
    curve_save_name=None,
    learning_rate=1e-3,
    weight_decay=0.,
    batch_size=32,
    max_epoch=100,
    class_weight=None,
    patience=10
)

test


## make prediction on the remaining data

In [None]:
# prepare data
remaining_dataset = DatasetWindowsFusion(
    [data[remaining_index, :, :3], data[remaining_index, :, 3:]],
    label[remaining_index]
)
logger.info(f"remaining data: {len(remaining_dataset)}")
remaining_data_loader = tr_data.DataLoader(remaining_dataset, batch_size=32, shuffle=False)

# predict
model = model.eval()
scores = []
with tr.no_grad():
    for batch_data, batch_label in remaining_data_loader:
        data = data.to(DEVICE)
        # y_pred shape [num window, num class]
        y_pred = model(data)
        scores.append(y_pred)
        
    scores = tr.cat(scores).to("cpu").numpy()
logger.info(f"prediction: {scores.shape}")

## even selection

In [None]:
scores = scores.argmax(axis=1)
remaining_index = remaining_index[scores.argsort()]

even_index = np.linspace(
    start=0,
    stop=len(remaining_index),
    num=half_subset_size,
    dtype=int
)
selected_index = remaining_index[even_index]

final_subset_index = np.concatenate([random_index, selected_index])
np.save("mobiactv2_r2_pick_index_one-tenth.npy", final_subset_index)