# Data augmentation

method used here is based on the following paper

Germain Forestier et al. "Generating synthetic time series to augment sparse datasets". ICDM 2017.

In [1]:
import sys, os, inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir)
sys.path.insert(0, os.path.join(parent_dir,'spring-break'))
sys.path.insert(0, os.path.join(parent_dir,'Linear Classifier'))

In [18]:
import numpy as np
import random
import copy
import math
from tqdm import tqdm
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from martins.complex_transformer import ComplexTransformer
from FNNLinear import FNNLinear
from FNNSeparated import FNNSeparated
from GAN import Generator, Discriminator
from data_utils import *
import argparse
import logging
import logging.handlers
import pickle
from centerloss import CenterLoss
from DataSetLoader import JoinDataset, SingleDataset
from torch.autograd import Variable
from binaryloss import BinaryLoss

from tslearn.barycenters import dtw_barycenter_averaging
from tslearn.metrics import dtw
from tslearn.neighbors import KNeighborsTimeSeriesClassifier, KNeighborsTimeSeries

In [4]:
# local only
class local_args:
    def __init__(self, **entries):
        self.__dict__.update(entries)
        
args = local_args(**{
    'data_path': '../data_unzip',
    'task': '3E',
    'num_class': 50,
    'batch_size': 100,
    'num_per_class': -1,
    'gap': 5,
    'lbl_percentage':0.7,
    'lr_gan': 1e-4,
    'lr_FNN': 1e-4,
    'lr_encoder': 1e-4,
    'epochs': 2,
    'clip_value': 0.01,
    'n_critic': 4,
    'sclass': 0.7,
    'scent': 1e-2,
    'seed': None,
    'save_path': '../train_related',
    'model_save_period': 1,
    'lr_centerloss': 1e-3,
    'lr_prototype': 1e-3,
    'sprototype': 1e-2,
    'seed': 0,
    'select_pretrain_epoch': 77,
    'epoch_begin_prototype': 0,
    'sbinary_loss': 1,
    'gpu_num': 0,
    'source_lbl_percentage': 0.7,
    'target_lbl_percentage': 0.7
})

In [5]:
labeled_target_x_filename = '/processed_file_not_one_hot_%s_%1.1f_target_known_label_x.npy'%(args.task, args.target_lbl_percentage)
labeled_target_y_filename = '/processed_file_not_one_hot_%s_%1.1f_target_known_label_y.npy'%(args.task, args.target_lbl_percentage)
unlabeled_target_x_filename = '/processed_file_not_one_hot_%s_%1.1f_target_unknown_label_x.npy'%(args.task, args.target_lbl_percentage)
unlabeled_target_y_filename = '/processed_file_not_one_hot_%s_%1.1f_target_unknown_label_y.npy'%(args.task, args.target_lbl_percentage)
labeled_target_x = np.load(args.data_path+labeled_target_x_filename)
labeled_target_y = np.load(args.data_path+labeled_target_y_filename)
unlabeled_target_x = np.load(args.data_path+unlabeled_target_x_filename)
unlabeled_target_y = np.load(args.data_path+unlabeled_target_y_filename)
labeled_target_dataset = SingleDataset(labeled_target_x, labeled_target_y)
unlabled_target_dataset = SingleDataset(unlabeled_target_x, unlabeled_target_y)
labeled_target_dataloader = DataLoader(labeled_target_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=4)
unlabeled_target_dataloader = DataLoader(unlabled_target_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4)

labeled_source_x_filename = '/processed_file_not_one_hot_%s_%1.1f_source_known_label_x.npy'%(args.task, args.source_lbl_percentage)
labeled_source_y_filename = '/processed_file_not_one_hot_%s_%1.1f_source_known_label_y.npy'%(args.task, args.source_lbl_percentage)
unlabeled_source_x_filename = '/processed_file_not_one_hot_%s_%1.1f_source_unknown_label_x.npy'%(args.task, args.source_lbl_percentage)
unlabeled_source_y_filename = '/processed_file_not_one_hot_%s_%1.1f_source_unknown_label_y.npy'%(args.task, args.source_lbl_percentage)
labeled_source_x = np.load(args.data_path+labeled_source_x_filename)
labeled_source_y = np.load(args.data_path+labeled_source_y_filename)
unlabeled_source_x = np.load(args.data_path+unlabeled_source_x_filename)
unlabeled_source_y = np.load(args.data_path+unlabeled_source_y_filename)
labeled_source_dataset = SingleDataset(labeled_source_x, labeled_source_y)
unlabled_source_dataset = SingleDataset(unlabeled_source_x, unlabeled_source_y)
labeled_source_dataloader = DataLoader(labeled_source_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=4)
unlabeled_source_dataloader = DataLoader(unlabled_source_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4)

In [6]:
labeled_source_x.shape

(9880, 1600, 2)

In [7]:
labeled_source_y.shape

(9880,)

In [9]:
labeled_target_x.shape

(4940, 1600, 2)

In [10]:
labeled_target_y.shape

(4940,)

# ASD for source labeled

In [13]:
dtw_barycenter_averaging(labeled_source_x[1:10])

array([[ 0.01459876,  0.0234728 ],
       [-0.01661602,  0.06393354],
       [-0.04156876, -0.00610017],
       ...,
       [ 0.15926704,  0.07266202],
       [-0.07154952,  0.18946992],
       [-0.06384353, -0.08018603]])

In [16]:
# calculating dtw
dtw(labeled_source_x[0], labeled_source_x[2])

14.821511658608665

In [22]:
# nearest neighor
nearest_dist = 222222222222222222222
nearest_ind = 0
for i in range(1, 10):
    d = dtw(labeled_source_x[0], labeled_source_x[i])
    if d < nearest_dist:
        nearest_ind = i
        nearest_dist = d
print(nearest_dist)
print(nearest_ind)

6.671230818715468
4


In [59]:
# for class 1

class_ind = np.where(labeled_source_y == 1)
class_x = labeled_source_x[class_ind]

# randomly chose one from the same class
t_star_ind = np.random.choice(class_x.shape[0], 1)
t_star = class_x[t_star_ind,][0]
t_star.shape

# compute weight
dtw_class_t = np.empty((class_x.shape[0],))
dnn = float('inf')
dnn_ind = float('inf')
for i in tqdm(range(class_x.shape[0])):
    dist = dtw(class_x[i], t_star)
    dtw_class_t[i] = dist
    if dist < dnn and i != t_star_ind:
        dnn = dist
        dnn_ind = i
weight = np.exp(np.log(0.5) * dtw_class_t / dnn)

100%|██████████| 152/152 [00:09<00:00, 15.45it/s]

[ 8.2742165   6.34934706 17.71207496 13.85720654 12.86421968  7.51398101
 13.99496833 19.55985273 17.33340741  9.15442174 17.39403575 15.96675186
  9.31999309 16.18260714 19.21428661 16.92484852 17.17795166 15.50383399
 10.11755003 13.77903165 17.05949879  9.29324511 19.64773425  9.22102775
 15.00020535  8.43175672  8.89351571 13.34332414  8.62486632  8.79396818
  9.14873448  7.59889158 17.29450933 16.88324151 17.97545967 16.59560545
 17.244924   16.81956109 18.69009706 17.1530952  13.8422562  18.47938921
 17.09531338 17.81178189 19.02028469 11.74256487  9.04245024  9.01235184
  9.29247495  9.74614365  6.37005255 15.83027538 18.01183644  8.86286936
  0.          7.98438942 17.07296095  9.07944894  9.18311408 17.94914321
  8.51076187 18.74762616  8.43055972 19.56777982  6.4028544   8.66225559
 18.17711091  7.99152833 16.95694437 18.74314009 16.87868588 17.05193397
  4.9498377  18.82158493 17.66085899  9.91703893 10.9053642  17.78062151
 16.23127137 17.91677394 18.41923017  7.02923527 16




In [66]:
weight = np.exp(np.log(0.5) * dtw_class_t / dnn)
weight

array([0.2853479 , 0.38200883, 0.06825841, 0.12243095, 0.14231529,
       0.32019525, 0.11990118, 0.05158599, 0.07229043, 0.24971109,
       0.0716292 , 0.08892764, 0.24352278, 0.08606543, 0.05435977,
       0.07690825, 0.07401388, 0.0953909 , 0.21579571, 0.12389017,
       0.07535464, 0.24451201, 0.05090345, 0.24720297, 0.10295721,
       0.27861537, 0.25978327, 0.13234757, 0.27057908, 0.26373246,
       0.24992642, 0.31610102, 0.07271787, 0.07739477, 0.06558728,
       0.08084336, 0.07326641, 0.07814536, 0.05885465, 0.07429324,
       0.12270868, 0.06076451, 0.07494672, 0.06723466, 0.05598184,
       0.16868681, 0.25398497, 0.25514623, 0.24454055, 0.22829138,
       0.38081191, 0.09078622, 0.06522668, 0.26099271, 1.        ,
       0.29816157, 0.07520105, 0.25256472, 0.24862754, 0.0658494 ,
       0.2752991 , 0.05834372, 0.27866592, 0.05152405, 0.37892341,
       0.26905011, 0.0636131 , 0.29783914, 0.07653504, 0.0583834 ,
       0.07744822, 0.07544109, 0.47227165, 0.05769338, 0.06879

In [73]:
dba_avg_t_star = dtw_barycenter_averaging(class_x, weights=weight, verbose=True)
dba_avg_t_star

Attempt 1
[DBA] epoch 1, cost: 196.731
[DBA] epoch 2, cost: 120.742
[DBA] epoch 3, cost: 109.526
[DBA] epoch 4, cost: 106.774
[DBA] epoch 5, cost: 105.366
[DBA] epoch 6, cost: 104.431
[DBA] epoch 7, cost: 103.758
[DBA] epoch 8, cost: 103.260
[DBA] epoch 9, cost: 102.917
[DBA] epoch 10, cost: 102.635
[DBA] epoch 11, cost: 102.422
[DBA] epoch 12, cost: 102.252
[DBA] epoch 13, cost: 102.094
[DBA] epoch 14, cost: 101.941
[DBA] epoch 15, cost: 101.818
[DBA] epoch 16, cost: 101.730
[DBA] epoch 17, cost: 101.640
[DBA] epoch 18, cost: 101.563
[DBA] epoch 19, cost: 101.503
[DBA] epoch 20, cost: 101.449
[DBA] epoch 21, cost: 101.409
[DBA] epoch 22, cost: 101.374
[DBA] epoch 23, cost: 101.346
[DBA] epoch 24, cost: 101.316
[DBA] epoch 25, cost: 101.287
[DBA] epoch 26, cost: 101.260
[DBA] epoch 27, cost: 101.229
[DBA] epoch 28, cost: 101.206
[DBA] epoch 29, cost: 101.189
[DBA] epoch 30, cost: 101.172


array([[ 0.05269497, -0.00605071],
       [-0.07568017, -0.08923488],
       [-0.06598281,  0.07270323],
       ...,
       [-0.09785316, -0.12348897],
       [-0.00418212,  0.18491132],
       [ 0.00954557, -0.0525043 ]])

In [71]:
dba_avg_t_star.shape

(1600, 2)