In [None]:

import os
import copy
import argparse
import pathlib
import torch
import numpy as np
from scipy import signal
import torch.nn as nn
import scipy.spatial
from collections import OrderedDict
import time
import torch.nn.functional as F
import configparser
from scipy.interpolate import CubicSpline
from google.colab import drive
from random import shuffle
import math


# from engine.util import parse_config
# from engine.data_io import load_ucr_pretrain as load_dataset
# from engine.model import get_model
# from engine.train_test import nn_pretrain

In [None]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
drive.mount('/mnt/drive', force_remount=True)
drive_path = '/mnt/drive/MyDrive/STAT940'

Mounted at /mnt/drive


# Data Stuff

### Loading Dataset

In [None]:
def _relabel(label):
    label_set = np.unique(label)
    n_class = len(label_set)

    label_re = np.zeros(label.shape[0], dtype=int)
    for i, label_i in enumerate(label_set):
        label_re[label == label_i] = i
    return label_re, n_class


def _normalize_dataset(data):
    data_mu = np.mean(data, axis=2, keepdims=True)
    data_sigma = np.std(data, axis=2, keepdims=True)
    data_sigma[data_sigma <= 0] = 1
    data = (data - data_mu) / data_sigma
    return data

def get_ucr_data_names():
    names = [
        'Adiac',
        'ArrowHead',
        'Beef',
        'BeetleFly',
        'BirdChicken',
        'Car',
        'CBF',
        'ChlorineConcentration',
        'CinCECGTorso',
        'Coffee',
        'Computers',
        'CricketX',
        'CricketY',
        'CricketZ',
        'DiatomSizeReduction',
        'DistalPhalanxOutlineAgeGroup',
        'DistalPhalanxOutlineCorrect',
        'DistalPhalanxTW',
        'Earthquakes',
        'ECG200',
        'ECG5000',
        'ECGFiveDays',
        'ElectricDevices',
        'FaceAll',
        'FaceFour',
        'FacesUCR',
        'FiftyWords',
        'Fish',
        'FordA',
        'FordB',
        'GunPoint',
        'Ham',
        'HandOutlines',
        'Haptics',
        'Herring',
        'InlineSkate',
        'InsectWingbeatSound',
        'ItalyPowerDemand',
        'LargeKitchenAppliances',
        'Lightning2',
        'Lightning7',
        'Mallat',
        'Meat',
        'MedicalImages',
        'MiddlePhalanxOutlineAgeGroup',
        'MiddlePhalanxOutlineCorrect',
        'MiddlePhalanxTW',
        'MoteStrain',
        'NonInvasiveFetalECGThorax1',
        'NonInvasiveFetalECGThorax2',
        'OliveOil',
        'OSULeaf',
        'PhalangesOutlinesCorrect',
        'Phoneme',
        'Plane',
        'ProximalPhalanxOutlineAgeGroup',
        'ProximalPhalanxOutlineCorrect',
        'ProximalPhalanxTW',
        'RefrigerationDevices',
        'ScreenType',
        'ShapeletSim',
        'ShapesAll',
        'SmallKitchenAppliances',
        'SonyAIBORobotSurface1',
        'SonyAIBORobotSurface2',
        'StarLightCurves',
        'Strawberry',
        'SwedishLeaf',
        'Symbols',
        'SyntheticControl',
        'ToeSegmentation1',
        'ToeSegmentation2',
        'Trace',
        'TwoLeadECG',
        'TwoPatterns',
        'UWaveGestureLibraryAll',
        'UWaveGestureLibraryX',
        'UWaveGestureLibraryY',
        'UWaveGestureLibraryZ',
        'Wafer',
        'Wine',
        'WordSynonyms',
        'Worms',
        'WormsTwoClass',
        'Yoga',
        'ACSF1',
        'AllGestureWiimoteX',
        'AllGestureWiimoteY',
        'AllGestureWiimoteZ',
        'BME',
        'Chinatown',
        'Crop',
        'DodgerLoopDay',
        'DodgerLoopGame',
        'DodgerLoopWeekend',
        'EOGHorizontalSignal',
        'EOGVerticalSignal',
        'EthanolLevel',
        'FreezerRegularTrain',
        'FreezerSmallTrain',
        'Fungi',
        'GestureMidAirD1',
        'GestureMidAirD2',
        'GestureMidAirD3',
        'GesturePebbleZ1',
        'GesturePebbleZ2',
        'GunPointAgeSpan',
        'GunPointMaleVersusFemale',
        'GunPointOldVersusYoung',
        'HouseTwenty',
        'InsectEPGRegularTrain',
        'InsectEPGSmallTrain',
        'MelbournePedestrian',
        'MixedShapesRegularTrain',
        'MixedShapesSmallTrain',
        'PickupGestureWiimoteZ',
        'PigAirwayPressure',
        'PigArtPressure',
        'PigCVP',
        'PLAID',
        'PowerCons',
        'Rock',
        'SemgHandGenderCh2',
        'SemgHandMovementCh2',
        'SemgHandSubjectCh2',
        'ShakeGestureWiimoteZ',
        'SmoothSubspace',
        'UMD',
    ]
    return names

def load_ucr_dataset(data_name, data_config):
    data_config = data_config['data']
    data_dir = data_config['data_dir']
    max_len = int(data_config['max_len'])
    seed = int(data_config['seed'])
    pretrain_frac = float(data_config['pretrain_frac'])
    train_frac = float(data_config['train_frac'])
    valid_frac = float(data_config['valid_frac'])
    test_frac = float(data_config['test_frac'])
    is_same_length = data_config['is_same_length']
    is_same_length = is_same_length.lower() == 'true'
    # assert pretrain_frac + train_frac + valid_frac + test_frac == 1.0

    reduced_train = 0
    if 'reduced_train' in data_config:
        reduced_train = float(data_config['reduced_train'])

    train_path = os.path.join(
        data_dir, 'Missing_value_and_variable_length_datasets_adjusted',
        '{0}', '{0}_TRAIN.tsv')
    test_path = os.path.join(
        data_dir, 'Missing_value_and_variable_length_datasets_adjusted',
        '{0}', '{0}_TEST.tsv')
    if not os.path.isfile(train_path.format(data_name)):
        train_path = os.path.join(data_dir, '{0}', '{0}_TRAIN.tsv')
        test_path = os.path.join(data_dir, '{0}', '{0}_TEST.tsv')

    train_path = train_path.format(data_name)
    test_path = test_path.format(data_name)
    data = np.concatenate(
        (np.loadtxt(train_path),
         np.loadtxt(test_path), ), axis=0)
    n_data = data.shape[0]
    data_len = data.shape[1] - 1

    np.random.seed(seed=seed)
    random_vec = np.random.permutation(n_data)
    data = data[random_vec, :]

    label = data[:, 0]
    label = label.astype(int)
    data = data[:, 1:]
    data = np.expand_dims(data, 1)

    if is_same_length:
        if data_len != max_len:
            data = signal.resample(
                data, max_len, axis=2)
    else:
        if data_len > max_len:
            data = signal.resample(
                data, max_len, axis=2)

    label, n_class = _relabel(label)
    if np.isclose(pretrain_frac, 1.0):
        data_pretrain = data
        data_train = None
        data_valid = None
        data_test = None
        label_train = None
        label_valid = None
        label_test = None
    else:
        data_pretrain = []
        data_train = []
        data_valid = []
        data_test = []
        label_train = []
        label_valid = []
        label_test = []
        for i in range(n_class):
            data_i = data[label == i, :, :]
            label_i = label[label == i]

            n_data_i = label_i.shape[0]
            n_train_i = np.round(train_frac * n_data_i)
            n_train_i = int(n_train_i)
            n_train_i = max(n_train_i, 1)

            n_valid_i = np.round(valid_frac * n_data_i)
            n_valid_i = int(n_valid_i)
            n_valid_i = max(n_valid_i, 1)

            n_test_i = np.round(test_frac * n_data_i)
            n_test_i = int(n_test_i)
            n_test_i = max(n_test_i, 1)

            n_pretrain_i = n_data_i - n_train_i - n_valid_i - n_test_i

            train_start = 0
            train_end = n_train_i

            valid_start = train_end
            valid_end = valid_start + n_valid_i

            test_start = valid_end
            test_end = test_start + n_test_i

            pretrain_start = test_end
            pretrain_end = pretrain_start + n_pretrain_i

            if reduced_train > 0:
                train_end *= reduced_train
                train_end = np.round(train_end)
                if train_end < 1:
                    train_end = 1
                train_end = int(train_end)

            data_train.append(data_i[train_start:train_end, :, :])
            data_valid.append(data_i[valid_start:valid_end, :, :])
            data_test.append(data_i[test_start:test_end, :, :])
            data_pretrain.append(data_i[pretrain_start:pretrain_end, :, :])

            label_train.append(label_i[train_start:train_end])
            label_valid.append(label_i[valid_start:valid_end])
            label_test.append(label_i[test_start:test_end])

        data_train = np.concatenate(data_train, axis=0)
        data_valid = np.concatenate(data_valid, axis=0)
        data_test = np.concatenate(data_test, axis=0)
        data_pretrain = np.concatenate(data_pretrain, axis=0)
        label_train = np.concatenate(label_train, axis=0)
        label_valid = np.concatenate(label_valid, axis=0)
        label_test = np.concatenate(label_test, axis=0)

    dataset_ = {}
    dataset_['data_pretrain'] = data_pretrain
    dataset_['data_train'] = data_train
    dataset_['data_valid'] = data_valid
    dataset_['data_test'] = data_test
    dataset_['label_train'] = label_train
    dataset_['label_valid'] = label_valid
    dataset_['label_test'] = label_test
    dataset_['n_class'] = n_class
    dataset_['n_dim'] = data_pretrain.shape[1]
    dataset_['data_len'] = data_pretrain.shape[2]
    return dataset_


def load_dataset(data_config):
    data_names = get_ucr_data_names()
    pretrain_data = []
    max_len = 0
    for data_name in data_names:
        dataset = load_ucr_dataset(data_name, data_config)
        data_pretrain = _normalize_dataset(dataset['data_pretrain'])
        pretrain_data.append(data_pretrain)
        if max_len < dataset['data_len']:
            max_len = dataset['data_len']

    for i in range(len(pretrain_data)):
        data_len = pretrain_data[i].shape[2]
        if data_len < max_len:
            n_data = pretrain_data[i].shape[0]
            data_i = np.zeros((n_data, 1, max_len))
            data_i[:, :, :data_len] = pretrain_data[i]
            pretrain_data[i] = data_i
    pretrain_data = np.concatenate(pretrain_data, axis=0)
    return pretrain_data


### Augmentation

In [None]:
def jittering(data, strength=0.1, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]
    sigma = np.std(data, axis=1, keepdims=True)

    sigma_jitter = strength * sigma
    sigma_jitter[sigma[:, 0] == 0, 0] = strength

    if seed is not None:
        np.random.seed(seed=seed)
    noise = np.random.randn(n_data, data_len) * sigma_jitter
    data_aug = data + noise

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def add_offset(data, strength=1, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]
    sigma = np.std(data, axis=1, keepdims=True)

    sigma_scaling = strength * sigma
    sigma_scaling[sigma[:, 0] == 0, 0] = strength

    if seed is not None:
        np.random.seed(seed=seed)
    noise = np.random.randn(n_data, 1) * sigma_scaling
    data_aug = data + noise

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def add_slope(data, strength=1, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]
    sigma = np.std(data, axis=1, keepdims=True)

    sigma_scaling = strength * sigma
    sigma_scaling[sigma[:, 0] == 0, 0] = strength

    slope = np.arange(0, 1, step=1 / data_len)

    if seed is not None:
        np.random.seed(seed=seed)
    noise = np.random.randn(n_data, 1) * sigma_scaling * slope
    noise = noise + np.random.randn(n_data, 1) * sigma_scaling
    data_aug = data + noise

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def add_spike(data, strength=3, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]
    sigma = np.std(data, axis=1, keepdims=True)

    sigma_scaling = strength * sigma
    sigma_scaling[sigma[:, 0] == 0, 0] = strength

    if seed is not None:
        np.random.seed(seed=seed)
    location = np.random.randint(data_len, size=n_data)
    noise = np.random.randn(n_data) * sigma_scaling

    data_aug = copy.deepcopy(data)
    for i in range(n_data):
        data_aug[i, location[i]] += noise[i]

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def add_step(data, min_ratio=0.1, strength=1, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]
    sigma = np.std(data, axis=1, keepdims=True)

    sigma_scaling = strength * sigma
    sigma_scaling[sigma[:, 0] == 0, 0] = strength

    if seed is not None:
        np.random.seed(seed=seed)
    segment_len = np.random.rand(n_data)
    segment_len = segment_len * (1 - min_ratio) + min_ratio
    segment_len = segment_len * data_len
    segment_start = np.random.rand(n_data)
    segment_start = segment_start * (data_len - segment_len)
    segment_end = segment_start + segment_len

    segment_start = _cleanup(segment_start, 0, data_len)
    segment_end = _cleanup(segment_end, 0, data_len)

    data_aug = copy.deepcopy(data)
    for i in range(n_data):
        data_aug[i, segment_start[i]:segment_end[i]] += sigma_scaling[i]

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def cropping(data, min_ratio=0.1, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]

    if seed is not None:
        np.random.seed(seed=seed)
    segment_len = np.random.rand(n_data)
    segment_len = segment_len * (1 - min_ratio) + min_ratio
    segment_len = segment_len * data_len
    segment_start = np.random.rand(n_data)
    segment_start = segment_start * (data_len - segment_len)
    segment_end = segment_start + segment_len

    segment_start = _cleanup(segment_start, 0, data_len)
    segment_end = _cleanup(segment_end, 0, data_len)
    data_aug = np.zeros((n_data, data_len))
    for i in range(n_data):
        segment = data[i, segment_start[i]:segment_end[i]]
        data_aug[i, :] = signal.resample(segment, data_len)

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def flipping(data):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    mu = np.mean(data, axis=1, keepdims=True)

    data_aug = 2 * mu - data

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def inverting(data):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    data_aug = copy.deepcopy(data)
    data_aug = np.flip(data_aug, axis=1)

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def _apply_warping(data, n_knot, strength, seed):
    np.random.seed(seed=seed)
    data_len = data.shape[0]
    knot_step = data_len / (n_knot - 1)
    # knot_t = np.arange(knot_step, data_len, knot_step)
    knot_t = np.arange(0, data_len + knot_step, knot_step)
    knot_mag = np.random.randn(n_knot) * strength + 1
    data_aug_t = np.arange(data_len)

    if knot_t.shape[0] != knot_mag.shape[0]:
        knot_t = knot_t[:knot_mag.shape[0]]
    # data_aug = data + CubicSpline(knot_t, knot_mag)(data_aug_t)
    data_aug = data * CubicSpline(knot_t, knot_mag)(data_aug_t)
    return data_aug


def mag_warping(data, strength=1, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]
    sigma = np.std(data, axis=1, keepdims=False)

    sigma_scaling = strength * sigma
    sigma_scaling[sigma == 0] = strength

    if seed is not None:
        np.random.seed(seed=seed)
    n_knot = np.random.randint(3, high=data_len, size=n_data)
    seeds = np.random.randint(2 ** 32 - 1, size=n_data)

    data_aug = np.zeros((n_data, data_len))
    for i in range(n_data):
        data_aug[i, :] = _apply_warping(
            data[i, :], n_knot[i], sigma_scaling[i], seeds[i])

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def masking(data, max_ratio=0.5, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]
    mu = np.mean(data, axis=1, keepdims=False)

    if seed is not None:
        np.random.seed(seed=seed)
    segment_len = np.random.rand(n_data)
    segment_len = segment_len * max_ratio * data_len
    segment_start = np.random.rand(n_data)
    segment_start = segment_start * (data_len - segment_len)
    segment_end = segment_start + segment_len

    segment_start = _cleanup(segment_start, 0, data_len)
    segment_end = _cleanup(segment_end, 0, data_len)
    data_aug = copy.deepcopy(data)
    for i in range(n_data):
        data_aug[i, segment_start[i]:segment_end[i]] = mu[i]

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def _cleanup(data, value_min, value_max):
    data = np.round(data)
    data[data < value_min] = value_min
    data[data > value_max] = value_max
    data = data.astype(int)
    return data

def scaling(data, strength=1, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]
    sigma = np.std(data, axis=1, keepdims=True)
    mu = np.mean(data, axis=1, keepdims=True)

    sigma_scaling = strength * sigma
    sigma_scaling[sigma[:, 0] == 0, 0] = strength

    if seed is not None:
        np.random.seed(seed=seed)
    noise = np.random.randn(n_data, 1) * sigma_scaling + 1
    data_aug = (data - mu) * noise + mu

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def shifting(data, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]

    if seed is not None:
        np.random.seed(seed=seed)
    shift_len = np.random.randn(n_data) * data_len
    shift_len = np.round(shift_len)
    shift_len = shift_len.astype(int)

    data_aug = copy.deepcopy(data)
    for i in range(n_data):
        data_aug[i, :] = np.roll(data_aug[i, :], shift_len[i])

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def smoothing(data, max_ratio=0.5, min_ratio=0.01, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]
    if seed is not None:
        np.random.seed(seed=seed)
    ratio = np.random.rand(n_data) * (max_ratio - min_ratio) + min_ratio
    data_len_down = np.ceil(data_len * ratio).astype(int)

    data_aug = np.zeros((n_data, data_len))
    for i in range(n_data):
        data_aug_ = signal.resample(data[i, :], data_len_down[i])
        data_aug[i, :] = signal.resample(data_aug_, data_len)

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def time_warping(data, min_ratio=0.5, seed=None):
    is_matrix = len(data.shape) == 2
    if not is_matrix:
        data = np.expand_dims(data, 0)

    n_data = data.shape[0]
    data_len = data.shape[1]

    if seed is not None:
        np.random.seed(seed=seed)
    ratio = (1 - min_ratio) * np.random.rand(n_data) + min_ratio
    wrap_len = np.round(data_len * ratio)
    wrap_len[wrap_len > data_len] = data_len
    wrap_len = wrap_len.astype(int)

    data_aug = np.zeros((n_data, data_len))
    for i in range(n_data):
        random_vec = np.random.permutation(data_len)
        random_vec = random_vec[:wrap_len[i]]
        random_vec = np.sort(random_vec)
        data_aug_ = data[i, random_vec]
        data_aug[i, :] = signal.resample(data_aug_, data_len)

    if not is_matrix:
        data_aug = data_aug[0, :]
    return data_aug

def _normalize_t(t_, normalize):
    if not torch.is_tensor(t_):
        t_ = torch.from_numpy(t_)
    if len(t_.size()) == 1:
        t_ = torch.unsqueeze(t_, 0)
    if len(t_.size()) == 2:
        t_ = torch.unsqueeze(t_, 1)
    if normalize:
        t_mu = np.mean(t_, axis=2, keepdims=True)
        t_sigma = np.std(t_, axis=2, keepdims=True)
        t_sigma[t_sigma <= 0] = 1.0
        t_ = (t_ - t_mu) / t_sigma
    return t_

# Model Stuff

### Classifier

In [None]:
class Classifier(nn.Module):
    def __init__(self, encoder, n_class, n_dim=64, n_layer=2):
        super(Classifier, self).__init__()
        self.encoder = encoder
        self.add_module('encoder', encoder)

        in_dim_ = self.encoder.out_dim
        out_dim_ = n_dim
        layers = OrderedDict()
        for i in range(n_layer - 1):
            layers[f'linear_{i:02d}'] = nn.Linear(
                in_dim_, out_dim_)
            layers[f'relu_{i:02d}'] = nn.ReLU()
            in_dim_ = out_dim_
            out_dim_ = n_dim

        layers[f'linear_{n_layer - 1:02d}'] = nn.Linear(
            in_dim_, n_class)
        self.classifier = nn.Sequential(layers)

    def forward(self, ts, normalize=True, to_numpy=False):
        hidden = self.encoder.encode(
            ts, normalize=normalize, to_numpy=False)
        logit = self.classifier(hidden)
        if to_numpy:
            return logit.cpu().detach().numpy()
        else:
            return logit


def get_classifier(model_config, encoder):
    n_class = int(model_config['classifier']['n_class'])
    n_dim = int(model_config['classifier']['n_dim'])
    n_layer = int(model_config['classifier']['n_layer'])
    model = Classifier(
        encoder, n_class, n_dim=n_dim, n_layer=n_layer)
    return model

### TimeFreq

In [None]:
class TimeFreqEncoder(nn.Module):
    def __init__(self, encoder,
                 jitter_strength=0.1, freq_ratio=0.1,
                 freq_strength=0.1, project_norm=None):
        r"""
        The TF-C model described in the paper 'Self-Supervised Contrastive
        Pre-Training For Time Series via Time-Frequency Consistency'. The
        implementation is abased on the authors' github repository
        https://github.com/mims-harvard/TFC-pretraining

        Args:
            encoder (Module): The base encoder for both time/frequency-based
                contrastive learning
            jitter_strength (float, optional): the strength of jitter added
                when creating augmented time series in time domain.
            freq_ratio (float, optional): ratio of perturbed frequencies for
                frequency removal/amplification when creating augmented time
                series in frequency domain.
            freq_strength (float, optional): strength of frequency
                amplification  when creating augmented time series in
                frequency domain.
            project_norm (string, optional): If set to ``BN``, the projector
                will use batch normalization. If set to ``LN``, the projector
                will use layer normalization. If set to None, the projector
                will not use normalization. Default: None (no normalization).

        Shape:
            - Input: :math:`(N, C_{in}, L_{in})`.
            - Output: :math:`(N, C_{out})`.
        """
        super(TimeFreqEncoder, self).__init__()
        assert project_norm in ['BN', 'LN', None]

        self.pretrain_name = 'timefreq'
        self.encoder_t = copy.deepcopy(encoder)
        self.encoder_f = copy.deepcopy(encoder)
        self.add_module('encoder_t', self.encoder_t)
        self.add_module('encoder_f', self.encoder_f)

        self.jitter_strength = jitter_strength
        self.freq_ratio = freq_ratio
        self.freq_strength = freq_strength

        out_dim_t = self.encoder_t.out_dim
        out_dim_f = self.encoder_f.out_dim
        self.out_dim = out_dim_t + out_dim_f

        if project_norm == 'BN':
            self.projector_t = nn.Sequential(
                nn.BatchNorm1d(out_dim_t),
                nn.ReLU(),
                nn.Linear(out_dim_t, out_dim_t * 2),
                nn.BatchNorm1d(out_dim_t * 2),
                nn.ReLU(),
                nn.Linear(out_dim_t * 2, out_dim_t)
            )
        elif project_norm == 'LN':
            self.projector_t = nn.Sequential(
                nn.ReLU(),
                nn.LayerNorm(out_dim_t),
                nn.Linear(out_dim_t, out_dim_t * 2),
                nn.ReLU(),
                nn.LayerNorm(out_dim_t * 2),
                nn.Linear(out_dim_t * 2, out_dim_t)
            )
        else:
            self.projector_t = nn.Sequential(
                nn.ReLU(),
                nn.Linear(out_dim_t, out_dim_t * 2),
                nn.ReLU(),
                nn.Linear(out_dim_t * 2, out_dim_t)
            )
        self.add_module('projector_t', self.projector_t)

        if project_norm == 'BN':
            self.projector_f = nn.Sequential(
                nn.BatchNorm1d(out_dim_f),
                nn.ReLU(),
                nn.Linear(out_dim_f, out_dim_f * 2),
                nn.BatchNorm1d(out_dim_f * 2),
                nn.ReLU(),
                nn.Linear(out_dim_f * 2, out_dim_f)
            )
        elif project_norm == 'LN':
            self.projector_f = nn.Sequential(
                nn.ReLU(),
                nn.LayerNorm(out_dim_f),
                nn.Linear(out_dim_f, out_dim_f * 2),
                nn.ReLU(),
                nn.LayerNorm(out_dim_f * 2),
                nn.Linear(out_dim_f * 2, out_dim_f)
            )
        else:
            self.projector_f = nn.Sequential(
                nn.ReLU(),
                nn.Linear(out_dim_f, out_dim_f * 2),
                nn.ReLU(),
                nn.Linear(out_dim_f * 2, out_dim_f)
            )
        self.add_module('projector_f', self.projector_f)
        self.dummy = nn.Parameter(torch.empty(0))

    def forward(self, ts, normalize=True, to_numpy=False, is_augment=False):
        ts_t = _normalize_t(ts, normalize)
        n_dim = ts_t.shape[1]
        ts_f = np.fft.fft(ts_t, axis=2)
        ts_f = np.abs(ts_f)

        if is_augment:
            jitter_strength = self.jitter_strength
            for i in range(n_dim):
                ts_t[:, i, :] = jittering(
                    ts_t[:, i, :], strength=jitter_strength,
                    seed=None)

            freq_ratio = self.freq_ratio
            freq_strength = self.freq_strength
            for i in range(n_dim):
                ts_f[:, i, :] = _freq_perturb(
                    ts_f[:, i, :], ratio=freq_ratio, strength=freq_strength,
                    seed=None)

        h_t = self.encoder_t.encode(
            ts_t, normalize=False, to_numpy=False)
        z_t = self.projector_t(h_t)

        h_f = self.encoder_f.encode(
            ts_f, normalize=False, to_numpy=False)
        z_f = self.projector_f(h_f)

        if to_numpy:
            h_t = h_t.cpu().detach().numpy()
            z_t = z_t.cpu().detach().numpy()
            h_f = h_f.cpu().detach().numpy()
            z_f = z_f.cpu().detach().numpy()
        return h_t, z_t, h_f, z_f

    def encode(self, ts, normalize=True, to_numpy=False):
        _, z_t, _, z_f = self.forward(
            ts, normalize=normalize, to_numpy=False, is_augment=False)
        feature = torch.cat((z_t, z_f), dim=1)
        if to_numpy:
            return feature.cpu().detach().numpy()
        else:
            return feature

def _freq_perturb(data, ratio=0.1, strength=0.1, seed=None):
    n_data = data.shape[0]
    data_len = data.shape[1]

    data_aug = copy.deepcopy(data)
    if seed is not None:
        np.random.seed(seed=seed)
    if ratio < 1:
        mask_remove = np.random.rand(n_data, data_len)
        mask_remove = mask_remove < ratio
        data_aug[mask_remove] = 0.0

        mask_perturb = np.random.rand(n_data, data_len)
        mask_perturb = mask_perturb < ratio

    sigma = np.std(data, axis=1, keepdims=True)
    sigma_scaling = strength * sigma
    sigma_scaling[sigma == 0] = strength

    noise = np.random.rand(n_data, data_len) * sigma_scaling
    if ratio < 1:
        data_aug[mask_perturb] = data_aug[mask_perturb] + noise[mask_perturb]
    else:
        data_aug = data_aug + noise
    return data_aug

def get_timefreq(model_config, encoder):
    jitter_strength = float(model_config['timefreq']['jitter_strength'])
    freq_ratio = float(model_config['timefreq']['freq_ratio'])
    freq_strength = float(model_config['timefreq']['freq_strength'])
    project_norm = model_config['timefreq']['project_norm']
    encoder_ = TimeFreqEncoder(
        encoder, jitter_strength=jitter_strength, freq_ratio=freq_ratio,
        freq_strength=freq_strength, project_norm=project_norm)
    encoder_ = load_pretrain(model_config['timefreq'], encoder_)
    return encoder_

### MixingUp

In [None]:
class MixingUpEncoder(nn.Module):
    def __init__(self, encoder, alpha=1.0):
        r"""
        The MixingUp model described in the paper 'Self-Supervised Representation
        Learning for Time Series '. The implementation is abased on the
        github repository https://github.com/mims-harvard/TFC-pretraining

        Args:
            encoder (Module): The base encoder
            alpha (float, optional): the alpha for beta distribution.
                Default: 1.0.

        Shape:
            - Input: :math:`(N, C_{in}, L_{in})`.
            - Output: :math:`(N, C_{out})`.
        """
        super(MixingUpEncoder, self).__init__()

        self.pretrain_name = 'mixup'
        self.encoder = copy.deepcopy(encoder)
        self.alpha = alpha

        self.out_dim = self.encoder.out_dim
        self.dummy = nn.Parameter(torch.empty(0))

    def forward(self, ts, normalize=True, to_numpy=False, is_augment=False):
        if not is_augment:
            ts_emb = self.encoder.encode(
                ts, normalize=normalize, to_numpy=to_numpy)
            return ts_emb

        alpha = self.alpha

        n_ts = ts.shape[0]
        ts_0 = copy.deepcopy(ts)
        ts_1 = copy.deepcopy(ts)

        order = np.random.permutation(n_ts)
        ts_1 = ts_1[order, :, :]
        lam = np.random.beta(alpha, alpha)

        ts_aug = lam * ts_0 + (1 - lam) * ts_1

        ts_emb_0 = self.encoder.encode(
            ts_0, normalize=normalize, to_numpy=to_numpy)
        ts_emb_1 = self.encoder.encode(
            ts_1, normalize=normalize, to_numpy=to_numpy)
        ts_emb_aug = self.encoder.encode(
            ts_aug, normalize=normalize, to_numpy=to_numpy)

        if to_numpy:
            ts_emb_0 = ts_emb_0.cpu().detach().numpy()
            ts_emb_1 = ts_emb_1.cpu().detach().numpy()
            ts_emb_aug = ts_emb_aug.cpu().detach().numpy()
        return ts_emb_0, ts_emb_1, ts_emb_aug, lam

    def encode(self, ts, normalize=True, to_numpy=False):
        ts_emb = self.encoder.encode(
            ts, normalize=normalize, to_numpy=to_numpy)
        return ts_emb

def get_mixup(model_config, encoder):
    encoder_ = MixingUpEncoder(encoder)
    encoder_ = load_pretrain(model_config['mixup'], encoder_)
    return encoder_



### SimCLR

In [None]:
class SimCLREncoder(nn.Module):
    def __init__(self, encoder):
        r"""
        The SimCLR model described in the paper 'Exploring Contrastive
        Learning in Human Activity Recognition for Healthcare'. The
        implementation is abased on the github repository
        https://github.com/mims-harvard/TFC-pretraining

        Args:
            encoder (Module): The base encoder

        Shape:
            - Input: :math:`(N, C_{in}, L_{in})`.
            - Output: :math:`(N, C_{out})`.
        """
        super(SimCLREncoder, self).__init__()

        self.pretrain_name = 'simclr'
        self.encoder = copy.deepcopy(encoder)

        self.out_dim = self.encoder.out_dim
        self.dummy = nn.Parameter(torch.empty(0))

    def forward(self, ts, normalize=True, to_numpy=False, is_augment=False):
        if not is_augment:
            ts_emb = self.encoder.encode(
                ts, normalize=normalize, to_numpy=to_numpy)
            return ts_emb

        ts_aug = _augment_ts(ts)
        ts_emb_aug = self.encoder.encode(
            ts, normalize=normalize, to_numpy=to_numpy)
        return ts_emb_aug

    def encode(self, ts, normalize=True, to_numpy=False):
        ts_emb = self.encoder.encode(
            ts, normalize=normalize, to_numpy=to_numpy)
        return ts_emb


def _augment_ts(ts):
    ts_aug = copy.deepcopy(ts)
    ts_aug = _scaling_transform_vectorized(ts_aug)
    ts_aug = _negate_transform_vectorized(ts_aug)
    return ts_aug


def _scaling_transform_vectorized(X, sigma=0.1):
    """
    Scaling by a random factor
    """
    scaling_factor = np.random.normal(
        loc=1.0, scale=sigma, size=(X.shape[0], 1, X.shape[2]))
    return X * scaling_factor


def _negate_transform_vectorized(X):
    """
    Inverting the signals
    """
    return X * -1

def get_simclr(model_config, encoder):
    encoder_ = SimCLREncoder(encoder)
    encoder_ = load_pretrain(model_config['simclr'], encoder_)
    return encoder_


### TimeCLR

In [None]:
class TimeCLREncoder(nn.Module):
    def __init__(self, encoder, aug_bank):
        r"""
        The proposed TimeCLR method

        Args:
            encoder (Module): The base encoder
            aug_bank (list): A list of augmentation methods.

        Shape:
            - Input: :math:`(N, C_{in}, L_{in})`.
            - Output: :math:`(N, C_{out})`.
        """
        super(TimeCLREncoder, self).__init__()

        self.pretrain_name = 'timeclr'
        self.encoder = copy.deepcopy(encoder)

        self.aug_bank = aug_bank
        n_aug = len(aug_bank)
        self.n_aug = n_aug

        self.out_dim = self.encoder.out_dim
        self.dummy = nn.Parameter(torch.empty(0))

    def forward(self, ts, normalize=True, to_numpy=False, is_augment=False):
        if is_augment:
            ts = self._augment_ts(ts)

        ts_emb = self.encoder.encode(
            ts, normalize=normalize, to_numpy=to_numpy)
        return ts_emb

    def encode(self, ts, normalize=True, to_numpy=False):
        ts_emb = self.encoder.encode(
            ts, normalize=normalize, to_numpy=to_numpy)
        return ts_emb

    def _augment_ts(self, ts):
        n_ts = ts.shape[0]
        n_aug = self.n_aug
        ts_aug = copy.deepcopy(ts)
        aug_bank = self.aug_bank
        for i in range(n_ts):
            aug_idx = np.random.randint(n_aug)
            ts_aug[i, 0, :] = aug_bank[aug_idx](ts_aug[i, 0, :])
        return ts_aug

def get_timeclr(model_config, encoder):
    aug_bank_ver = int(model_config['timeclr']['aug_bank_ver'])
    if aug_bank_ver == 0:
        aug_bank = [
            lambda x:jittering(x, strength=0.1, seed=None),
            lambda x:smoothing(x, max_ratio=0.5, min_ratio=0.01, seed=None),
            lambda x:mag_warping(x, strength=1, seed=None),
            lambda x:add_slope(x, strength=1, seed=None),
            lambda x:add_spike(x, strength=3, seed=None),
            lambda x:add_step(x, min_ratio=0.1, strength=1, seed=None),
            lambda x:cropping(x, min_ratio=0.1, seed=None),
            lambda x:masking(x, max_ratio=0.5, seed=None),
            lambda x:shifting(x, seed=None),
            lambda x:time_warping(x, min_ratio=0.5, seed=None),
        ]

    encoder_ = TimeCLREncoder(encoder, aug_bank)
    encoder_ = load_pretrain(model_config['timeclr'], encoder_)
    return encoder_


### TS2Vec

In [None]:
class TS2VecEncoder(nn.Module):
    def __init__(self, encoder):
        r"""
        The TS2Vec model described in the paper 'TS2Vec: Towards Universal
        Representation of Time Series'. The implementation is abased on the
        github repository https://github.com/mims-harvard/TFC-pretraining

        Args:
            encoder (Module): The base encoder

        Shape:
            - Input: :math:`(N, C_{in}, L_{in})`.
            - Output: :math:`(N, C_{out})`.
        """
        super(TS2VecEncoder, self).__init__()

        self.pretrain_name = 'ts2vec'
        self.encoder = copy.deepcopy(encoder)

        self.out_dim = self.encoder.out_dim
        self.dummy = nn.Parameter(torch.empty(0))

    def forward(self, ts, normalize=True, to_numpy=False, is_augment=False):
        if not is_augment:
            ts_emb = self.encoder.encode_seq(
                ts, normalize=normalize, to_numpy=False)
            ts_emb = nn.AdaptiveMaxPool1d(1)(ts_emb)
            ts_emb = ts_emb[:, :, 0]
            if to_numpy:
                ts_emb = ts_emb.cpu().detach().numpy()
            return ts_emb

        n_ts = ts.shape[0]
        ts_len = ts.shape[2]
        corp_len = np.random.randint(low=4, high=ts_len - 3)
        crop_r_start = np.random.randint(
            low=4, high=ts_len - corp_len + 1,
            size=n_ts)

        low_val = crop_r_start - corp_len + 1
        low_val[low_val < 0] = 0
        crop_l_start = np.random.randint(
            low=low_val, high=crop_r_start,
            size=n_ts)

        corp_len = int(corp_len)
        crop_l_start = crop_l_start.astype(int)
        crop_r_start = crop_r_start.astype(int)

        ts_l = _get_corp(ts, crop_l_start, corp_len)
        ts_r = _get_corp(ts, crop_r_start, corp_len)
        ts_emb_l = self.encoder.encode_seq(
            ts_l, normalize=normalize, to_numpy=False)
        ts_emb_r = self.encoder.encode_seq(
            ts_r, normalize=normalize, to_numpy=False)
        if to_numpy:
            ts_emb_l = ts_emb_l.cpu().detach().numpy()
            ts_emb_r = ts_emb_r.cpu().detach().numpy()
        return ts_emb_l, ts_emb_r

    def encode(self, ts, normalize=True, to_numpy=False):
        ts_emb = self.forward(
            ts, normalize=normalize, to_numpy=to_numpy, is_augment=False)
        return ts_emb


def _get_corp(ts, corp_start, corp_len):
    n_ts = ts.shape[0]
    n_dim = ts.shape[1]
    corp = np.zeros((n_ts, n_dim, corp_len))
    for i in range(n_ts):
        corp[i, :, :] = ts[i, :, corp_start[i]:corp_start[i] + corp_len]
    return corp

def get_ts2vec(model_config, encoder):
    encoder_ = TS2VecEncoder(encoder)
    encoder_ = load_pretrain(model_config['ts2vec'], encoder_)
    return encoder_


### ResNet 1D

In [None]:
class ResNet1D(nn.Module):
    def __init__(self, in_dim=1, out_dim=128, n_dim=64,
                 block_type='standard', norm=None,
                 is_projector=True, project_norm=None):
        r"""
        1D ResNet-based time series encoder

        Args:
            in_dim (int, optional): Number of dimension for the input time
                series. Default: 1.
            out_dim (int, optional): Number of dimension for the output
                representation. Default: 128.
            n_dim (int, optional): Number of base dimension for the
                intermediate representation. Default: 64.
            block_type (string, optional): If set to ``standard``, the encoder
                will use the standard residual block for 1D ResNet. If set to
                ``alternative``, the encoder will use the alternative residual
                block inspired by the paper 'On Layer Normalization in the
                Transformer Architecture'. Default: ``standard``. See 'Deep
                learning for time series classification: a review' for the
                details.
            norm (string, optional): If set to ``BN``, the encoder will
                use batch normalization. If set to ``LN``, the encoder will
                use layer normalization. If set to None, the encoder will
                not use normalization. Default: None (no normalization).
            is_projector (bool, optional): If set to ``False``, the encoder
                will not use additional projection layers. Default: ``True``.
            project_norm (string, optional): If set to ``BN``, the projector
                will use batch normalization. If set to ``LN``, the projector
                will use layer normalization. If set to None, the projector
                will not use normalization. Default: None (no normalization).

        Shape:
            - Input: :math:`(N, C_{in}, L_{in})`, :math:`(N, L_{in})`, or
                :math:`(L_{in})`.
            - Output: :math:`(N, C_{out})`.
        """
        super(ResNet1D, self).__init__()
        assert block_type in ['standard', 'alternative', ]
        assert norm in ['BN', 'LN', None]
        assert project_norm in ['BN', 'LN', None]

        self.in_dim = in_dim
        self.out_dim = out_dim
        self.n_dim = n_dim
        self.is_projector = is_projector

        if block_type == 'standard':
            Block = Block_Standard
        elif block_type == 'alternative':
            Block = Block_Alt

        self.in_net = nn.Conv1d(
            in_dim, n_dim, 7, stride=2, padding=3, dilation=1)
        self.add_module('in_net', self.in_net)
        res_net_layer = OrderedDict()
        res_net_layer['block_0'] = Block(n_dim, n_dim, norm)
        res_net_layer['block_1'] = Block(n_dim, n_dim * 2, norm)
        res_net_layer['block_2'] = Block(n_dim * 2, n_dim * 2, norm)
        res_net_layer['pooling'] = nn.AdaptiveAvgPool1d(1)
        self.res_net_layer = res_net_layer
        self.res_net = nn.Sequential(res_net_layer)

        self.out_net = nn.Linear(n_dim * 2, out_dim)
        self.project_norm = project_norm
        if is_projector:
            if project_norm == 'BN':
                self.projector = nn.Sequential(
                    nn.BatchNorm1d(out_dim),
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.BatchNorm1d(out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
            elif project_norm == 'LN':
                self.projector = nn.Sequential(
                    nn.ReLU(),
                    nn.LayerNorm(out_dim),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.LayerNorm(out_dim * 2),
                    nn.Linear(out_dim * 2, out_dim)
                )
            else:
                self.projector = nn.Sequential(
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
        self.dummy = nn.Parameter(torch.empty(0))

    def forward(self, ts, normalize=True, to_numpy=False):
        device = self.dummy.device
        is_projector = self.is_projector

        ts = _normalize_t(ts, normalize)
        ts = ts.to(device, dtype=torch.float32)

        ts_emb = self.in_net(ts)
        ts_emb = self.res_net(ts_emb)
        ts_emb = ts_emb[:, :, 0]
        ts_emb = self.out_net(ts_emb)

        if is_projector:
            ts_emb = self.projector(ts_emb)

        if to_numpy:
            return ts_emb.cpu().detach().numpy()
        else:
            return ts_emb

    def encode(self, ts, normalize=True, to_numpy=False):
        return self.forward(ts, normalize=normalize, to_numpy=to_numpy)

    def encode_seq(self, ts, normalize=True, to_numpy=False):
        device = self.dummy.device
        is_projector = self.is_projector

        ts = _normalize_t(ts, normalize)
        ts = ts.to(device, dtype=torch.float32)

        ts_emb = self.in_net(ts)
        ts_emb = self.res_net_layer['block_0'](ts_emb)
        ts_emb = self.res_net_layer['block_1'](ts_emb)
        ts_emb = self.res_net_layer['block_2'](ts_emb)
        ts_emb = torch.transpose(ts_emb, 1, 2)
        ts_emb = self.out_net(ts_emb)

        if is_projector:
            project_norm = self.project_norm
            if project_norm == 'BN':
                layers = [module for module in projector.modules()]
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[1](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[2](ts_emb)
                ts_emb = layers[3](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[4](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[5](ts_emb)
                ts_emb = layers[6](ts_emb)
            else:
                ts_emb = self.projector(ts_emb)
        ts_emb = torch.transpose(ts_emb, 1, 2)

        if to_numpy:
            return ts_emb.cpu().detach().numpy()
        else:
            return ts_emb


class Block_Standard(nn.Module):
    def __init__(self, in_dim, n_dim, norm):
        super(Block_Standard, self).__init__()

        main_pass = OrderedDict()
        main_pass['cov_0'] = nn.Conv1d(
            in_dim, n_dim, 7, stride=1, padding=3, dilation=1)
        if norm == 'BN':
            main_pass['bn_0'] = nn.BatchNorm1d(n_dim)
        elif norm == 'LN':
            main_pass['ln_0'] = LayerNormT(n_dim)
        main_pass['relu_0'] = nn.ReLU()

        main_pass['cov_1'] = nn.Conv1d(
            n_dim, n_dim, 5, stride=1, padding=2, dilation=1)
        if norm == 'BN':
            main_pass['bn_1'] = nn.BatchNorm1d(n_dim)
        elif norm == 'LN':
            main_pass['ln_1'] = LayerNormT(n_dim)
        main_pass['relu_1'] = nn.ReLU()

        main_pass['cov_2'] = nn.Conv1d(
            n_dim, n_dim, 3, stride=1, padding=1, dilation=1)
        if norm == 'BN':
            main_pass['bn_2'] = nn.BatchNorm1d(n_dim)
        elif norm == 'LN':
            main_pass['ln_2'] = LayerNormT(n_dim)
        self.main_pass = nn.Sequential(main_pass)

        shortcut = OrderedDict()
        if in_dim != n_dim:
            shortcut['cov_0'] = nn.Conv1d(
                in_dim, n_dim, 1, stride=1, padding=0, dilation=1)
            if norm == 'BN':
                shortcut['bn_0'] = nn.BatchNorm1d(n_dim)
            elif norm == 'LN':
                main_pass['ln_0'] = LayerNormT(n_dim)
        else:
            shortcut['id_0'] = nn.Identity()
        self.shortcut = nn.Sequential(shortcut)

    def forward(self, data):
        hodden_0 = self.main_pass(data)
        hodden_1 = self.shortcut(data)
        output = nn.ReLU()(hodden_0 + hodden_1)
        return output


class Block_Alt(nn.Module):
    def __init__(self, in_dim, n_dim, norm):
        super(Block_Alt, self).__init__()

        main_pass = OrderedDict()
        if norm == 'BN':
            main_pass['bn_0'] = nn.BatchNorm1d(in_dim)
        elif norm == 'LN':
            main_pass['ln_0'] = LayerNormT(in_dim)
        main_pass['cov_0'] = nn.Conv1d(
            in_dim, n_dim, 7, stride=1, padding=3, dilation=1)
        main_pass['relu_0'] = nn.ReLU()

        if norm == 'BN':
            main_pass['bn_1'] = nn.BatchNorm1d(n_dim)
        elif norm == 'LN':
            main_pass['ln_1'] = LayerNormT(n_dim)
        main_pass['cov_1'] = nn.Conv1d(
            n_dim, n_dim, 5, stride=1, padding=2, dilation=1)
        main_pass['relu_1'] = nn.ReLU()

        if norm == 'BN':
            main_pass['bn_2'] = nn.BatchNorm1d(n_dim)
        elif norm == 'LN':
            main_pass['ln_2'] = LayerNormT(n_dim)
        main_pass['cov_2'] = nn.Conv1d(
            n_dim, n_dim, 3, stride=1, padding=1, dilation=1)
        main_pass['relu_2'] = nn.ReLU()
        self.main_pass = nn.Sequential(main_pass)

        shortcut = OrderedDict()
        if in_dim != n_dim:
            if norm == 'BN':
                shortcut['bn_0'] = nn.BatchNorm1d(in_dim)
            elif norm == 'LN':
                main_pass['ln_0'] = LayerNormT(in_dim)
            shortcut['cov_0'] = nn.Conv1d(
                in_dim, n_dim, 1, stride=1, padding=0, dilation=1)
        else:
            shortcut['id_0'] = nn.Identity()
        self.shortcut = nn.Sequential(shortcut)

    def forward(self, data):
        hodden_0 = self.main_pass(data)
        hodden_1 = self.shortcut(data)
        output = hodden_0 + hodden_1
        return output


class LayerNormT(nn.Module):
    def __init__(self, n_dim):
        super(LayerNormT, self).__init__()
        self.layer_norm = nn.LayerNorm(n_dim)
        self.add_module('layer_norm', self.layer_norm)

    def forward(self, data):
        data = torch.transpose(data, 1, 2)
        self.layer_norm.forward(data)
        data = torch.transpose(data, 1, 2)
        return data

def get_resnet1d(model_config):
    in_dim = int(model_config['encoder']['in_dim'])
    out_dim = int(model_config['encoder']['out_dim'])
    n_dim = int(model_config['encoder']['n_dim'])
    block_type = model_config['encoder']['block_type']
    norm = model_config['encoder']['norm']
    is_projector = model_config['encoder']['is_projector']
    is_projector = is_projector.lower() == 'true'
    project_norm = model_config['encoder']['project_norm']
    encoder = ResNet1D(
        in_dim=in_dim, out_dim=out_dim, n_dim=n_dim,
        block_type=block_type, norm=norm,
        is_projector=is_projector, project_norm=project_norm)

    encoder = load_pretrain(model_config['encoder'], encoder)
    return encoder

### RNN

In [None]:
class ALSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2, bidirectional=True, dropout = 0.0):
        """
        Initialize the network.

        Args:
            config:
            input_size: (int): size of the input
        """
        super(ALSTM, self).__init__()
        self.in_dim = input_size
        self.n_dim = hidden_size
        self.directions = 2
        self.n_layer = num_layers
        self.lstm = nn.LSTM(
            input_size=self.in_dim,
            hidden_size=self.n_dim,
            num_layers=num_layers, batch_first=True,
            bidirectional= True
        )
        # self.concat_linear = nn.Linear( self.directions * self.n_dim *2 , self.n_dim *2)
        # self.attn = nn.Linear(self.n_dim*2, self.n_dim*2)

        self.attention = nn.Linear( self.directions * self.n_dim , 1)
        self.fc = nn.Linear(self.n_dim * self.directions, self.n_dim * self.directions)


    def forward(self, input_data: torch.Tensor):
        """
        Forward computation.

        Args:
            input_data: (torch.Tensor): tensor of input data
        """
        # h0 = torch.zeros(self.n_layer * 2, input_data.size(0), self.n_dim).to(device)  # 2 for bidirection
        # c0 = torch.zeros(self.n_layer * 2, input_data.size(0), self.n_dim).to(device)
        # rnn_output, hidden_states = self.lstm(input_data, (h0, c0))
        # final_state = hidden_states[0].view(self.n_layer, self.directions, input_data.size(0), self.n_dim)[-1]
        # h_1, h_2 = final_state[0], final_state[1]
        # final_hidden_state = torch.cat((h_1, h_2), 1)
        # attn_weights = self.attn(rnn_output)
        # attn_weights = torch.bmm(attn_weights, final_hidden_state.unsqueeze(2))
        # attn_weights = F.softmax(attn_weights.squeeze(2), dim=1)
        # context = torch.bmm(rnn_output.transpose(1, 2), attn_weights.unsqueeze(2)).squeeze(2)
        # output = torch.tanh(self.concat_linear(torch.cat((context, final_hidden_state), dim=1)))


        h0 = torch.zeros(self.n_layer * 2, input_data.size(0), self.n_dim).to(device)
        c0 = torch.zeros(self.n_layer * 2, input_data.size(0), self.n_dim).to(device)
        rnn_output, hidden_states = self.lstm(input_data, (h0, c0))

        # Compute attention weights
        attn_weights = F.softmax(self.attention(rnn_output), dim=1)

        # Compute context vector
        context = torch.sum(attn_weights * rnn_output, dim=1)

        # Apply linear layer
        output = torch.tanh(self.fc(context))


        return output, attn_weights

In [None]:
class AGRU(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2, bidirectional=True, dropout = 0.0):
        """
        Initialize the network.
        Args:
            config:
            input_size: (int): size of the input
        """
        super(AGRU, self).__init__()
        self.in_dim = input_size
        self.n_dim = hidden_size
        self.directions = 2
        self.n_layer = num_layers
        self.gru = nn.GRU(
            input_size=self.in_dim,
            hidden_size=self.n_dim,
            num_layers=num_layers, batch_first=True,
            bidirectional= True
        )
        # self.concat_linear = nn.Linear( self.directions * self.n_dim *2 , self.n_dim *2)
        # self.attn = nn.Linear(self.n_dim*2, self.n_dim*2)

        self.attention = nn.Linear(self.directions * self.n_dim , 1)
        self.fc = nn.Linear(self.n_dim * self.directions, self.n_dim * self.directions)


    def forward(self, input_data: torch.Tensor):
        """
        Forward computation.

        Args:
            input_data: (torch.Tensor): tensor of input data
        """

        h0 = torch.zeros(self.n_layer * 2, input_data.size(0), self.n_dim).to(device)
        rnn_output, hidden_states = self.gru(input_data, h0)

        # Compute attention weights
        attn_weights = F.softmax(self.attention(rnn_output), dim=1)

        # Compute context vector
        context = torch.sum(attn_weights * rnn_output, dim=1)

        # Apply linear layer
        output = torch.tanh(self.fc(context))


        return output, attn_weights



In [None]:

class RNNet(nn.Module):
    def __init__(self, in_dim=1, out_dim=128, rnn_type='GRU',
                 n_layer=2, n_dim=64, seq_len = 512, is_projector=True,
                 project_norm=None, dropout=0.0):
        r"""
        RNN-based time series encoder

        Args:
            in_dim (int, optional): Number of dimension for the input time
                series. Default: 1.
            out_dim (int, optional): Number of dimension for the output
                representation. Default: 128.
            rnn_type (string, optional): The type of RNN cell to use. Can be
                either ``'GRU'`` or ``'LSTM'``. Default: ``'GRU'``
            n_layer (int, optional): Number of layer for the transformer
                encoder. Default: 8.
            n_dim (int, optional): Number of dimension for the intermediate
                representation. Default: 64.
            is_projector (bool, optional): If set to ``False``, the encoder
                will not use additional projection layers. Default: ``True``.
            project_norm (string, optional): If set to ``BN``, the projector
                will use batch normalization. If set to ``LN``, the projector
                will use layer normalization. If set to None, the projector
                will not use normalization. Default: None (no normalization).
            dropout (float, optional): The probability of an element to be
                zeroed for the dropout layers. Default: 0.0.

        Shape:
            - Input: :math:`(N, C_{in}, L_{in})`, :math:`(N, L_{in})`, or
                :math:`(L_{in})`.
            - Output: :math:`(N, C_{out})`.
        """
        super(RNNet, self).__init__()
        assert project_norm in ['BN', 'LN', None]

        self.in_dim = in_dim
        self.out_dim = out_dim
        self.n_dim = n_dim
        self.is_projector = is_projector
        self.seq_len = seq_len
        self.rnn_type = rnn_type


        self.in_net = nn.Conv1d(
            in_dim, n_dim, 7, stride=2, padding=3, dilation=1)
        self.add_module('in_net', self.in_net)
        if rnn_type == 'LSTM':
            self.rnn = nn.LSTM(
                input_size=n_dim, hidden_size=n_dim, num_layers=n_layer,
                batch_first=True, dropout=dropout, bidirectional=True)
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(
                input_size=n_dim, hidden_size=n_dim, num_layers=n_layer,
                batch_first=True, dropout=dropout, bidirectional=True)
        elif rnn_type == 'ALSTM':
            self.rnn = ALSTM(
                input_size=n_dim, hidden_size=n_dim, num_layers=n_layer,
                 dropout=dropout, bidirectional=True)
        elif rnn_type == 'AGRU':
            self.rnn = ALSTM(
                input_size=n_dim, hidden_size=n_dim, num_layers=n_layer,
                 dropout=dropout, bidirectional=True)

        self.out_net = nn.Linear(n_dim * 2, out_dim)
        self.project_norm = project_norm
        if is_projector:
            if project_norm == 'BN':
                self.is_projector = nn.Sequential(
                    nn.BatchNorm1d(out_dim),
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.BatchNorm1d(out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
            elif project_norm == 'LN':
                self.is_projector = nn.Sequential(
                    nn.ReLU(),
                    nn.LayerNorm(out_dim),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.LayerNorm(out_dim * 2),
                    nn.Linear(out_dim * 2, out_dim)
                )
            else:
                self.is_projector = nn.Sequential(
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
        self.dummy = nn.Parameter(torch.empty(0))

    def forward(self, ts, normalize=True, to_numpy=False):
        device = self.dummy.device
        is_projector = self.is_projector

        ts = _normalize_t(ts, normalize)
        ts = ts.to(device, dtype=torch.float32)
        ts_emb = self.in_net(ts)
        ts_emb = torch.transpose(ts_emb, 1, 2)
        ts_emb, _ = self.rnn(ts_emb)
        if self.rnn_type != 'ALSTM' and self.rnn_type != 'AGRU':
          ts_emb = torch.transpose(ts_emb, 1, 2)
          ts_emb = ts_emb[:, :, 0]
        ts_emb = self.out_net(ts_emb)

        if is_projector:
            ts_emb = self.is_projector(ts_emb)

        if to_numpy:
            return ts_emb.cpu().detach().numpy()
        else:
            return ts_emb

    def encode(self, ts, normalize=True, to_numpy=False):
        return self.forward(ts, normalize=normalize, to_numpy=to_numpy)

    def encode_seq(self, ts, normalize=True, to_numpy=False):
        device = self.dummy.device
        is_projector = self.is_projector

        ts = _normalize_t(ts, normalize)
        ts = ts.to(device, dtype=torch.float32)

        ts_emb = self.in_net(ts)
        ts_emb = torch.transpose(ts_emb, 1, 2)
        ts_emb, _ = self.rnn(ts_emb)
        ts_emb = self.out_net(ts_emb)

        if is_projector:
            project_norm = self.project_norm
            if project_norm == 'BN':
                layers = [module for module in is_projector.modules()]
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[1](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[2](ts_emb)
                ts_emb = layers[3](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[4](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[5](ts_emb)
                ts_emb = layers[6](ts_emb)
            else:
                ts_emb = self.is_projector(ts_emb)
        ts_emb = torch.transpose(ts_emb, 1, 2)

        if to_numpy:
            return ts_emb.cpu().detach().numpy()
        else:
            return ts_emb

def get_rnnet(model_config):
    in_dim = int(model_config['encoder']['in_dim'])
    out_dim = int(model_config['encoder']['out_dim'])
    rnn_type = model_config['encoder']['rnn_type']
    n_layer = int(model_config['encoder']['n_layer'])
    n_dim = int(model_config['encoder']['n_dim'])
    is_projector = model_config['encoder']['is_projector']
    is_projector = is_projector.lower() == 'true'
    project_norm = model_config['encoder']['project_norm']
    dropout = float(model_config['encoder']['dropout'])
    encoder = RNNet(
        in_dim=in_dim, out_dim=out_dim, rnn_type=rnn_type,
        n_layer=n_layer, n_dim=n_dim, is_projector=is_projector,
        project_norm=project_norm, dropout=dropout)
    encoder = load_pretrain(model_config['encoder'], encoder)
    return encoder

### Transformer

In [None]:
class Transformer(nn.Module):
    def __init__(self, in_dim=1, out_dim=128, n_layer=8, n_dim=64, n_head=8,
                 norm_first=False, is_pos=True, is_projector=True,
                 project_norm=None, dropout=0.0):
        r"""
        Transformer-based time series encoder

        Args:
            in_dim (int, optional): Number of dimension for the input time
                series. Default: 1.
            out_dim (int, optional): Number of dimension for the output
                representation. Default: 128.
            n_layer (int, optional): Number of layer for the transformer
                encoder. Default: 8.
            n_dim (int, optional): Number of dimension for the intermediate
                representation. Default: 64.
            n_head (int, optional): Number of head for the transformer
                encoder. Default: 8.
            norm_first: if ``True``, layer norm is done prior to attention and
                feedforward operations, respectively. Otherwise it's done
                after. Default: ``False`` (after).
            is_pos (bool, optional): If set to ``False``, the encoder will
                not use position encoding. Default: ``True``.
            is_projector (bool, optional): If set to ``False``, the encoder
                will not use additional projection layers. Default: ``True``.
            project_norm (string, optional): If set to ``BN``, the projector
                will use batch normalization. If set to ``LN``, the projector
                will use layer normalization. If set to None, the projector
                will not use normalization. Default: None (no normalization).
            dropout (float, optional): The probability of an element to be
                zeroed for the dropout layers. Default: 0.0.

        Shape:
            - Input: :math:`(N, C_{in}, L_{in})`, :math:`(N, L_{in})`, or
                :math:`(L_{in})`.
            - Output: :math:`(N, C_{out})`.
        """
        super(Transformer, self).__init__()
        assert project_norm in ['BN', 'LN', None]

        self.in_dim = in_dim
        self.out_dim = out_dim
        self.n_dim = n_dim
        self.is_projector = is_projector
        self.is_pos = is_pos
        self.max_len = 0
        self.dropout = dropout

        self.in_net = nn.Conv1d(
            in_dim, n_dim, 7, stride=2, padding=3, dilation=1)
        self.add_module('in_net', self.in_net)
        transformer = OrderedDict()
        for i in range(n_layer):
            transformer[f'encoder_{i:02d}'] = nn.TransformerEncoderLayer(
                n_dim, n_head, dim_feedforward=n_dim,
                dropout=dropout, batch_first=True,
                norm_first=norm_first)
        self.transformer = nn.Sequential(transformer)

        self.start_token = nn.Parameter(
            torch.randn(1, n_dim, 1))
        self.register_parameter(
            name='start_token',
            param=self.start_token)

        self.out_net = nn.Linear(n_dim, out_dim)
        self.project_norm = project_norm
        if is_projector:
            if project_norm == 'BN':
                self.projector = nn.Sequential(
                    nn.BatchNorm1d(out_dim),
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.BatchNorm1d(out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
            elif project_norm == 'LN':
                self.projector = nn.Sequential(
                    nn.ReLU(),
                    nn.LayerNorm(out_dim),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.LayerNorm(out_dim * 2),
                    nn.Linear(out_dim * 2, out_dim)
                )
            else:
                self.projector = nn.Sequential(
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
        self.dummy = nn.Parameter(torch.empty(0))

    def forward(self, ts, normalize=True, to_numpy=False):
        device = self.dummy.device
        is_projector = self.is_projector
        is_pos = self.is_pos

        ts = _normalize_t(ts, normalize)
        ts = ts.to(device, dtype=torch.float32)

        ts_emb = self.in_net(ts)


        if is_pos:
            n_dim = self.n_dim
            dropout = self.dropout
            ts_len = ts_emb.size()[2]
            if ts_len > self.max_len:
                self.max_len = ts_len
                self.pos_net = PositionalEncoding(
                    n_dim, ts_len, dropout=dropout)
                self.pos_net.to(device)
            ts_emb = self.pos_net(ts_emb)

        start_tokens = self.start_token.expand(ts_emb.size()[0], -1, -1)
        ts_emb = torch.cat((start_tokens, ts_emb, ), dim=2)
        ts_emb = torch.transpose(ts_emb, 1, 2)

        ts_emb = self.transformer(ts_emb)
        ts_emb = ts_emb[:, 0, :]
        ts_emb = self.out_net(ts_emb)

        if is_projector:
            ts_emb = self.projector(ts_emb)

        if to_numpy:
            return ts_emb.cpu().detach().numpy()
        else:
            return ts_emb

    def encode(self, ts, normalize=True, to_numpy=False):
        return self.forward(ts, normalize=normalize, to_numpy=to_numpy)

    def encode_seq(self, ts, normalize=True, to_numpy=False):
        device = self.dummy.device
        is_projector = self.is_projector
        is_pos = self.is_pos

        ts = _normalize_t(ts, normalize)
        ts = ts.to(device, dtype=torch.float32)

        ts_emb = self.in_net(ts)
        if is_pos:
            n_dim = self.n_dim
            dropout = self.dropout
            ts_len = ts_emb.size()[2]
            if ts_len > self.max_len:
                self.max_len = ts_len
                self.pos_net = PositionalEncoding(
                    n_dim, ts_len, dropout=dropout)
                self.pos_net.to(device)
            ts_emb = self.pos_net(ts_emb)

        start_tokens = self.start_token.expand(ts_emb.size()[0], -1, -1)
        ts_emb = torch.cat((start_tokens, ts_emb, ), dim=2)
        ts_emb = torch.transpose(ts_emb, 1, 2)

        ts_emb = self.transformer(ts_emb)
        ts_emb = self.out_net(ts_emb)
        if is_projector:
            project_norm = self.project_norm
            if project_norm == 'BN':
                layers = [module for module in is_projector.modules()]
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[1](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[2](ts_emb)
                ts_emb = layers[3](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[4](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[5](ts_emb)
                ts_emb = layers[6](ts_emb)
            else:
                ts_emb = self.projector(ts_emb)

        ts_emb = ts_emb[:, 1:, :]
        start_tokens = ts_emb[:, 0:1, :]
        start_tokens = start_tokens.expand(-1, ts_emb.size()[1], -1)
        ts_emb = ts_emb + start_tokens
        ts_emb = torch.transpose(ts_emb, 1, 2)

        if to_numpy:
            return ts_emb.cpu().detach().numpy()
        else:
            return ts_emb


class PositionalEncoding(nn.Module):
    def __init__(self, n_dim, max_len, dropout=0.0):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len)
        div_term = torch.exp(
            torch.arange(0, n_dim, 2) * (-math.log(10000.0) / n_dim))
        pos_emb = torch.zeros(1, n_dim, max_len)

        position = position.unsqueeze(0)
        div_term = div_term.unsqueeze(1)
        pos_emb[0, 0::2, :] = torch.sin(div_term * position)
        pos_emb[0, 1::2, :] = torch.cos(div_term * position)
        self.register_buffer('pos_emb', pos_emb, persistent=False)

    def forward(self, x):
        x = x + self.pos_emb[:, :, :x.size()[2]]
        return self.dropout(x)



def get_transformer(model_config):
    in_dim = int(model_config['encoder']['in_dim'])
    out_dim = int(model_config['encoder']['out_dim'])
    n_layer = int(model_config['encoder']['n_layer'])
    n_dim = int(model_config['encoder']['n_dim'])
    n_head = int(model_config['encoder']['n_head'])
    norm_first = model_config['encoder']['norm_first']
    norm_first = norm_first.lower() == 'true'
    is_pos = model_config['encoder']['is_pos']
    is_pos = is_pos.lower() == 'true'
    is_projector = model_config['encoder']['is_projector']
    is_projector = is_projector.lower() == 'true'
    project_norm = model_config['encoder']['project_norm']
    dropout = float(model_config['encoder']['dropout'])

    encoder = Transformer(
        in_dim=in_dim, out_dim=out_dim, n_layer=n_layer,
        n_dim=n_dim, n_head=n_head, norm_first=norm_first,
        is_pos=is_pos, is_projector=is_projector,
        project_norm=project_norm, dropout=dropout)
    encoder = load_pretrain(model_config['encoder'], encoder)
    return encoder

### WaveNet

In [None]:
class WaveNet(nn.Module):
    def __init__(self, in_dim=1, out_dim=128, n_layer = 10, n_dim=64, norm=None,
                 is_projector=True, project_norm=None):

        super(WaveNet, self).__init__()

        assert project_norm in ['BN', 'LN', None]

        self.in_dim = in_dim
        self.out_dim = out_dim
        self.n_dim = n_dim
        self.is_projector = is_projector
        self.n_layer = n_layer
        self.kernel_size = 2
        self.n_block = 4

        #Input layer
        self.in_net = nn.Conv1d(
            in_dim, n_dim, 7, stride=2, padding=3, dilation=1)
        self.add_module('in_net', self.in_net)

        #WaveNet
        self.dilations = []
        self.bias = False
        self.filter_convs = nn.ModuleList()
        self.gate_convs = nn.ModuleList()
        self.residual_convs = nn.ModuleList()
        self.skip_convs = nn.ModuleList()

        for b in range(self.n_block):
            additional_scope = self.kernel_size - 1
            new_dilation = 1
            for i in range(n_layer):

                self.dilations.append(new_dilation)
                self.filter_convs.append(nn.Conv1d(in_channels=self.n_dim,
                                                   out_channels=self.n_dim,
                                                   kernel_size=self.kernel_size,
                                                   dilation=new_dilation,
                                                   bias=self.bias))

                self.gate_convs.append(nn.Conv1d(in_channels=self.n_dim,
                                                 out_channels=self.n_dim,
                                                 kernel_size=self.kernel_size,
                                                 dilation=new_dilation,
                                                 bias=self.bias))

                self.residual_convs.append(nn.Conv1d(in_channels=self.n_dim,
                                                     out_channels=self.n_dim,
                                                     kernel_size=1,
                                                     bias=self.bias))

                self.skip_convs.append(nn.Conv1d(in_channels=self.n_dim,
                                                 out_channels=self.n_dim,
                                                 kernel_size=1,
                                                 bias=self.bias))
                additional_scope *= 2
                new_dilation *= 2
        self.conv_end = nn.Conv1d(in_channels=self.n_dim,
                                        out_channels=1,
                                        kernel_size=1,
                                        bias=self.bias)
        self.out_net = nn.Linear(n_dim*4, out_dim)

        #Projector
        self.project_norm = project_norm
        if is_projector:
            if project_norm == 'BN':
                self.projector = nn.Sequential(
                    nn.BatchNorm1d(out_dim),
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.BatchNorm1d(out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
            elif project_norm == 'LN':
                self.projector = nn.Sequential(
                    nn.ReLU(),
                    nn.LayerNorm(out_dim),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.LayerNorm(out_dim * 2),
                    nn.Linear(out_dim * 2, out_dim)
                )
            else:
                self.projector = nn.Sequential(
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
        self.dummy = nn.Parameter(torch.empty(0))

    def wave_net(self, input):
        x = input
        for i in range(self.n_block * self.n_layer):

            # filter convolution
            causal_padding = (int((self.kernel_size - 1) * (self.dilations[i])),0)
            padded_x = F.pad(x, causal_padding)
            filter = self.filter_convs[i](padded_x)
            filter = F.tanh(filter)
            #gated convolution
            gate = self.gate_convs[i](padded_x)
            gate = F.sigmoid(gate)
            z = filter * gate
            residual = self.residual_convs[i](z)
            x = x + residual
            if i == 0:
                output = self.skip_convs[i](z)
            else:
                output = self.skip_convs[i](z) + output
        return output



    def forward(self, ts, normalize=True, to_numpy=False):
        device = self.dummy.device
        is_projector = self.is_projector

        ts = _normalize_t(ts, normalize)
        ts = ts.to(device, dtype=torch.float32)

        ts_emb = self.in_net(ts)
        ts_emb = F.relu(self.wave_net(ts_emb))
        ts_emb = F.relu(self.conv_end(ts_emb))
        # print(ts_emb.shape)
        ts_emb = ts_emb[:, 0, :]
        # print(ts_emb.shape)
        ts_emb = self.out_net(ts_emb)

        if is_projector:
            ts_emb = self.projector(ts_emb)

        if to_numpy:
            return ts_emb.cpu().detach().numpy()
        else:
            return ts_emb

    def encode(self, ts, normalize=True, to_numpy=False):
        return self.forward(ts, normalize=normalize, to_numpy=to_numpy)

    def encode_seq(self, ts, normalize=True, to_numpy=False):
        device = self.dummy.device
        is_projector = self.is_projector
        projector = self.projector

        ts = _normalize_t(ts, normalize)
        ts = ts.to(device, dtype=torch.float32)

        ts_emb = self.in_net(ts)
        ts_emb = F.relu(self.wave_net(ts_emb))
        ts_emb = F.relu(self.conv_end(ts_emb))
        ts_emb = ts_emb[:, 0, :]

        if is_projector:
            project_norm = self.project_norm
            if project_norm == 'BN':
                layers = [module for module in projector.modules()]
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[1](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[2](ts_emb)
                ts_emb = layers[3](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[4](ts_emb)
                ts_emb = torch.transpose(ts_emb, 1, 2)
                ts_emb = layers[5](ts_emb)
                ts_emb = layers[6](ts_emb)
            else:
                ts_emb = self.projector(ts_emb)
        ts_emb = torch.transpose(ts_emb, 1, 2)

        if to_numpy:
            return ts_emb.cpu().detach().numpy()
        else:
            return ts_emb

def get_wavenet(model_config):
    in_dim = int(model_config['encoder']['in_dim'])
    out_dim = int(model_config['encoder']['out_dim'])
    n_layer = int(model_config['encoder']['n_layer'])
    n_dim = int(model_config['encoder']['n_dim'])
    is_projector = model_config['encoder']['is_projector']
    is_projector = is_projector.lower() == 'true'
    project_norm = model_config['encoder']['project_norm']

    encoder = WaveNet(
        in_dim=in_dim, out_dim=out_dim, n_layer=n_layer, n_dim=n_dim,
        is_projector=is_projector,project_norm=project_norm)
    encoder = load_pretrain(model_config['encoder'], encoder)
    return encoder

### TCNN

In [None]:
class GatedActivation(nn.Module):
    def __init__(self):
        super(GatedActivation, self).__init__()

    def forward(self, x):
        return torch.tanh(x) * torch.sigmoid(x)

class ResidualBlock(nn.Module):
    def __init__(self, n_dim, dilation):
        super(ResidualBlock, self).__init__()
        self.dilation = dilation
        self.kernel_size = 4
        self.n_dim = n_dim
        self.conv1 = nn.Conv1d(in_channels=self.n_dim, out_channels=self.n_dim, kernel_size=4, stride=1,  dilation=self.dilation)
        self.activation = GatedActivation()
        self.batchnorm = nn.BatchNorm1d(self.n_dim, momentum=0.6)
        self.conv2 = nn.Conv1d(in_channels=self.n_dim, out_channels=self.n_dim, kernel_size=4, stride=1, dilation=self.dilation)
        self.residual = nn.Conv1d(in_channels=self.n_dim, out_channels=self.n_dim, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        residual = self.residual(x)
        causal_padding=(self.dilation*(self.kernel_size - 1), 0)
        out = F.pad(x, causal_padding)
        out = self.conv1(x)
        out = self.batchnorm(out)
        out = self.activation(out)
        out = F.pad(x, causal_padding)
        out = self.conv2(out)
        out = self.batchnorm(out)
        out = self.activation(out)

        out = out + residual
        return out

class TemporalConvNet(nn.Module):
    def __init__(self, in_dim=1, out_dim=128, n_layer = 1, n_dim=64, norm=None,
                 is_projector=True, project_norm=None):

        super(TemporalConvNet, self).__init__()

        assert project_norm in ['BN', 'LN', None]
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.n_dim = n_dim
        self.is_projector = is_projector
        self.n_layer = n_layer
        self.kernel_size = 2
        self.n_block = 2
        # self.seq_length = seq_length

        #Input layer
        self.in_net = nn.Conv1d(
            in_dim, n_dim, 7, stride=2, padding=3, dilation=1)
        self.add_module('in_net', self.in_net)
        self.layers = nn.ModuleList()
        self.dilations = []
        for j in range(self.n_layer):
            dilation = 1
            for i in range(self.n_block):
              dilation = 2 * dilation
              self.dilations.append(dilation)
              self.layers.append(ResidualBlock(self.n_dim, dilation))

        self.out_net = nn.Linear(n_dim*4, out_dim)

        #Projector
        self.project_norm = project_norm
        if is_projector:
            if project_norm == 'BN':
                self.projector = nn.Sequential(
                    nn.BatchNorm1d(out_dim),
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.BatchNorm1d(out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
            elif project_norm == 'LN':
                self.projector = nn.Sequential(
                    nn.ReLU(),
                    nn.LayerNorm(out_dim),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.LayerNorm(out_dim * 2),
                    nn.Linear(out_dim * 2, out_dim)
                )
            else:
                self.projector = nn.Sequential(
                    nn.ReLU(),
                    nn.Linear(out_dim, out_dim * 2),
                    nn.ReLU(),
                    nn.Linear(out_dim * 2, out_dim)
                )
        self.dummy = nn.Parameter(torch.empty(0))


    def forward(self, ts, normalize=True, to_numpy=False):
        device = self.dummy.device
        is_projector = self.is_projector
        ts = _normalize_t(ts, normalize)
        ts = ts.to(device, dtype=torch.float32)
        ts_emb = self.in_net(ts)
        for i in range(self.n_block*self.n_layer):
            ts_emb = self.layers[i](ts_emb)

        ts_emb = self.out_net(ts_emb)
        if is_projector:
            ts_emb = self.projector(ts_emb)

        if to_numpy:
            return ts_emb.cpu().detach().numpy()
        else:
            return ts_emb

    def encode(self, ts, normalize=True, to_numpy=False):
        return self.forward(ts, normalize=normalize, to_numpy=to_numpy)



def get_tcn(model_config):
    in_dim = int(model_config['encoder']['in_dim'])
    out_dim = int(model_config['encoder']['out_dim'])
    n_layer = int(model_config['encoder']['n_layer'])
    n_dim = int(model_config['encoder']['n_dim'])
    is_projector = model_config['encoder']['is_projector']
    is_projector = is_projector.lower() == 'true'
    project_norm = model_config['encoder']['project_norm']

    encoder = TemporalConvNet(in_dim=in_dim, out_dim=out_dim, n_layer=n_layer, n_dim=n_dim,
                              is_projector=is_projector,project_norm=project_norm)
    encoder = load_pretrain(model_config['encoder'], encoder)
    return encoder

### Loss Functions

In [None]:
# Hierarchial Loss
class HierContrastLoss(nn.Module):
    def __init__(self, alpha=0.5, temporal_unit=0):
        super(HierContrastLoss, self).__init__()
        self.alpha = alpha
        self.temporal_unit = temporal_unit

    def forward(self, data_i, data_j):
        alpha = self.alpha
        temporal_unit = self.temporal_unit
        data_i = data_i.transpose(1, 2)
        data_j = data_j.transpose(1, 2)
        loss = hierarchical_contrastive_loss(
            data_i, data_j, alpha=alpha, temporal_unit=temporal_unit)
        return loss


def hierarchical_contrastive_loss(z1, z2, alpha=0.5, temporal_unit=0):
    loss = torch.tensor(0., device=z1.device)
    d = 0
    while z1.size(1) > 1:
        if alpha != 0:
            loss += alpha * instance_contrastive_loss(z1, z2)
        if d >= temporal_unit:
            if 1 - alpha != 0:
                loss += (1 - alpha) * temporal_contrastive_loss(z1, z2)
        d += 1
        z1 = F.max_pool1d(z1.transpose(1, 2), kernel_size=2).transpose(1, 2)
        z2 = F.max_pool1d(z2.transpose(1, 2), kernel_size=2).transpose(1, 2)
    if z1.size(1) == 1:
        if alpha != 0:
            loss += alpha * instance_contrastive_loss(z1, z2)
        d += 1
    return loss / d


def instance_contrastive_loss(z1, z2):
    B, T = z1.size(0), z1.size(1)
    if B == 1:
        return z1.new_tensor(0.)
    z = torch.cat([z1, z2], dim=0)  # 2B x T x C
    z = z.transpose(0, 1)  # T x 2B x C
    sim = torch.matmul(z, z.transpose(1, 2))  # T x 2B x 2B
    logits = torch.tril(sim, diagonal=-1)[:, :, :-1]    # T x 2B x (2B-1)
    logits += torch.triu(sim, diagonal=1)[:, :, 1:]
    logits = -F.log_softmax(logits, dim=-1)

    i = torch.arange(B, device=z1.device)
    loss = (logits[:, i, B + i - 1].mean() + logits[:, B + i, i].mean()) / 2
    return loss


def temporal_contrastive_loss(z1, z2):
    B, T = z1.size(0), z1.size(1)
    if T == 1:
        return z1.new_tensor(0.)
    z = torch.cat([z1, z2], dim=1)  # B x 2T x C
    sim = torch.matmul(z, z.transpose(1, 2))  # B x 2T x 2T
    logits = torch.tril(sim, diagonal=-1)[:, :, :-1]    # B x 2T x (2T-1)
    logits += torch.triu(sim, diagonal=1)[:, :, 1:]
    logits = -F.log_softmax(logits, dim=-1)

    t = torch.arange(T, device=z1.device)
    loss = (logits[:, t, T + t - 1].mean() + logits[:, T + t, t].mean()) / 2
    return loss

# MixUp Loss
class MixupLoss(nn.Module):
    def __init__(self, tau=0.5):
        super(MixupLoss, self).__init__()
        self.tau = tau

    def forward(self, ts_emb_0, ts_emb_1, ts_emb_aug, lam):
        batch_size = ts_emb_0.size()[0]
        device = ts_emb_0.device

        tau = self.tau

        ts_emb_0 = nn.functional.normalize(ts_emb_0)
        ts_emb_1 = nn.functional.normalize(ts_emb_1)
        ts_emb_aug = nn.functional.normalize(ts_emb_aug)

        labels_lam_0 = lam * torch.eye(batch_size)
        labels_lam_1 = (1 - lam) * torch.eye(batch_size)
        labels = torch.cat((labels_lam_0, labels_lam_1), 1)
        labels = labels.to(device)

        logits = torch.cat((torch.mm(ts_emb_aug, ts_emb_0.T),
                            torch.mm(ts_emb_aug, ts_emb_1.T)), 1)
        loss = _cross_entropy(logits / tau, labels)
        return loss


def _cross_entropy(logits, labels):
    logits = nn.LogSoftmax(dim=1)(logits)
    loss = torch.mean(torch.sum(-labels * logits, 1))
    return loss

#NTXent Loss

def _dot_similarity(x):
    return torch.mm(x, x.T)


def _cosine_similarity(x):
    return torch.nn.CosineSimilarity(dim=-1)(
        x.unsqueeze(1), x.unsqueeze(0))


def _get_mask(batch_size, device):
    diag_0 = np.eye(2 * batch_size)
    diag_1 = np.eye(2 * batch_size, k=-batch_size)
    diag_2 = np.eye(2 * batch_size, k=batch_size)

    mask = diag_0 + diag_1 + diag_2
    mask = 1 - mask
    mask = torch.from_numpy(mask)
    mask = mask.to(device, dtype=torch.bool)
    return mask


class NTXentLossPoly(nn.Module):
    def __init__(self, temperature=0.2, is_cosine=True):
        r"""
        modified from the implementation of NTXentLoss_poly from
        https://github.com/mims-harvard/TFC-pretraining
        """
        super(NTXentLossPoly, self).__init__()
        self.temperature = temperature
        self.is_cosine = is_cosine

    def _get_similarity(self, data):
        is_cosine = self.is_cosine
        if is_cosine:
            return _cosine_similarity(data)
        return _dot_similarity(data)

    def forward(self, data_i, data_j):
        batch_size = data_i.size()[0]
        device = data_i.device

        data = torch.cat((data_i, data_j, ), dim=0)
        similarity = self._get_similarity(data)

        positive_upper = torch.diag(similarity, batch_size)
        positive_lower = torch.diag(similarity, -batch_size)
        positive = torch.cat((positive_upper, positive_lower, ), dim=0)
        positive = positive.unsqueeze(1)

        negative_mask = _get_mask(batch_size, device)
        negative = similarity[negative_mask].view(
            2 * batch_size, 2 * batch_size - 2)

        logits = torch.cat((positive, negative), dim=1)
        logits = logits / self.temperature

        labels = torch.zeros(2 * batch_size)
        labels = labels.to(device, dtype=torch.long)
        cross_entropy = nn.CrossEntropyLoss(reduction='sum')(logits, labels)

        labels_onthot = torch.zeros((2 * batch_size, 2 * batch_size - 1))
        labels_onthot[:, 0] = 1
        labels_onthot = labels_onthot.to(device)
        poly_loss = torch.mean(labels_onthot * nn.Softmax(dim=-1)(logits))

        loss = (cross_entropy / (2 * batch_size) +
                batch_size * (1 / batch_size - poly_loss))
        return loss


class NTXentLoss(nn.Module):
    def __init__(self, temperature=0.2, is_cosine=True):
        r"""
        modified from the implementation of NTXentLoss from
        https://github.com/mims-harvard/TFC-pretraining
        """
        super(NTXentLoss, self).__init__()
        self.temperature = temperature
        self.is_cosine = is_cosine

    def _get_similarity(self, data):
        is_cosine = self.is_cosine
        if is_cosine:
            return _cosine_similarity(data)
        return _dot_similarity(data)

    def forward(self, data_i, data_j):
        batch_size = data_i.size()[0]
        device = data_i.device

        data = torch.cat((data_i, data_j, ), dim=0)
        similarity = self._get_similarity(data)

        positive_upper = torch.diag(similarity, batch_size)
        positive_lower = torch.diag(similarity, -batch_size)
        positive = torch.cat((positive_upper, positive_lower, ), dim=0)
        positive = positive.unsqueeze(1)

        negative_mask = _get_mask(batch_size, device)
        negative = similarity[negative_mask].view(
            2 * batch_size, 2 * batch_size - 2)

        logits = torch.cat((positive, negative), dim=1)
        logits = logits / self.temperature

        labels = torch.zeros(2 * batch_size)
        labels = labels.to(device, dtype=torch.long)
        loss = nn.CrossEntropyLoss(reduction='sum')(logits, labels)
        return loss / (2 * batch_size)


### Model Script

In [None]:
def load_pretrain(model_config, encoder):
    if 'pre_train_model' not in model_config:
        return encoder

    pre_train_model = model_config['pre_train_model']
    pkl = torch.load(pre_train_model, map_location='cpu')
    encoder.load_state_dict(pkl['model_state_dict'])
    return encoder

def get_model(model_config):
    model_name = model_config['model']['model_name']
    print(f'get model for {model_name}')

    if model_config['encoder']['in_dim'] == None:
        model_config['encoder']['in_dim'] = model_config['in_dim']

    if 'rnnet' in model_name:
        print('  get rnnet')
        encoder = get_rnnet(model_config)
    elif 'resnet1d' in model_name:
        print('  get resnet1d')
        encoder = get_resnet1d(model_config)
    elif 'transform' in model_name:
        print('  get transform')
        encoder = get_transformer(model_config)
    elif 'wavenet' in model_name:
        print('  get wavenet')
        encoder = get_wavenet(model_config)
    else:
        raise Exception(f'unknown encoder name: {model_name}')

    if 'timefreq' in model_name:
        print('  get timefreq')
        encoder = get_timefreq(model_config, encoder)
    elif 'ts2vec' in model_name:
        print('  get ts2vec')
        encoder = get_ts2vec(model_config, encoder)
    elif 'mixup' in model_name:
        print('  get mixup')
        encoder = get_mixup(model_config, encoder)
    elif 'simclr' in model_name:
        print('  get simclr')
        encoder = get_simclr(model_config, encoder)
    elif 'timeclr' in model_name:
        print('  get timeclr')
        encoder = get_timeclr(model_config, encoder)

    if 'classifier' in model_name:
        print('  get classifier')
        model_config['classifier']['n_class'] = model_config['n_class']
        model = get_classifier(model_config, encoder)
    else:
        model = encoder
    return model


# Config Files

In [None]:
def write_config(config_dict, config_path):
    config_parser = configparser.ConfigParser()
    for key in config_dict:
        config_parser[key] = config_dict[key]

    with open(config_path, 'w') as f:
        config_parser.write(f)

def get_dataset(ucr_dir):
    config_dict = {}
    config_dict['data'] = {}
    config_dict['data']['data_dir'] = ucr_dir
    config_dict['data']['max_len'] = 512
    config_dict['data']['seed'] = 666
    config_dict['data']['pretrain_frac'] = 0.5
    config_dict['data']['train_frac'] = 0.3
    config_dict['data']['valid_frac'] = 0.1
    config_dict['data']['test_frac'] = 0.1
    config_dict['data']['is_same_length'] = 'True'

    config_path = os.path.join(
        drive_path, 'config_files', 'ucr_00.config')
    write_config(config_dict, config_path)

def get_dist_classifier():
    config_dict = {}
    config_dict['model'] = {'metric': 'ed'}
    config_path = os.path.join(
        drive_path, 'config_files', 'dist_0000.config')
    write_config(config_dict, config_path)

    config_dict = {}
    config_dict['model'] = {'metric': 'dtw'}
    config_path = os.path.join(
        drive_path, 'config_files', 'dist_0001.config')
    write_config(config_dict, config_path)


def get_gru_setting():
    encoder = {}
    encoder['in_dim'] = 'None'
    encoder['out_dim'] = 128
    encoder['rnn_type'] = 'GRU'
    encoder['n_layer'] = 2
    encoder['n_dim'] = 64
    encoder['is_projector'] = 'False'
    encoder['project_norm'] = 'None'
    encoder['dropout'] = 0.0
    return encoder


def get_lst_setting():
    encoder = {}
    encoder['in_dim'] = 'None'
    encoder['out_dim'] = 128
    encoder['rnn_type'] = 'LSTM'
    encoder['n_layer'] = 2
    encoder['n_dim'] = 64
    encoder['is_projector'] = 'False'
    encoder['project_norm'] = 'None'
    encoder['dropout'] = 0.0
    return encoder

def get_alst_setting():
    encoder = {}
    encoder['in_dim'] = 'None'
    encoder['out_dim'] = 128
    encoder['rnn_type'] = 'ALSTM'
    encoder['n_layer'] = 2
    encoder['n_dim'] = 64
    encoder['is_projector'] = 'False'
    encoder['project_norm'] = 'None'
    encoder['dropout'] = 0.0
    encoder['seq_len'] = 512
    return encoder

def get_agru_setting():
    encoder = {}
    encoder['in_dim'] = 'None'
    encoder['out_dim'] = 128
    encoder['rnn_type'] = 'AGRU'
    encoder['n_layer'] = 2
    encoder['n_dim'] = 64
    encoder['is_projector'] = 'False'
    encoder['project_norm'] = 'None'
    encoder['dropout'] = 0.0
    encoder['seq_len'] = 512
    return encoder


def get_r1d_setting():
    encoder = {}
    encoder['in_dim'] = 'None'
    encoder['out_dim'] = 128
    encoder['n_dim'] = 64
    encoder['block_type'] = 'alternative'
    encoder['norm'] = 'None'
    encoder['is_projector'] = 'False'
    encoder['project_norm'] = 'None'
    return encoder


def get_trf_setting():
    encoder = {}
    encoder['in_dim'] = 'None'
    encoder['out_dim'] = 128
    encoder['n_layer'] = 4
    encoder['n_dim'] = 64
    encoder['n_head'] = 8
    encoder['norm_first'] = 'True'
    encoder['is_pos'] = 'True'
    encoder['is_projector'] = 'False'
    encoder['project_norm'] = 'None'
    encoder['dropout'] = 0.0
    return encoder

def get_wvnt_setting():
    encoder = {}
    encoder['in_dim'] = 'None'
    encoder['out_dim'] = 128
    encoder['n_dim'] = 64
    encoder['n_layer'] = 4
    encoder['norm'] = 'None'
    encoder['is_projector'] = 'False'
    encoder['project_norm'] = 'None'
    return encoder


def get_timefreq_setting():
    timefreq = {}
    timefreq['jitter_strength'] = 0.1
    timefreq['freq_ratio'] = 0.1
    timefreq['freq_strength'] = 0.1
    timefreq['project_norm'] = 'None'
    return timefreq


def get_ts2vec_setting():
    return {'ph': 0}


def get_mixup_setting():
    return {'ph': 0}


def get_simclr_setting():
    return {'ph': 0}


def get_timeclr_setting():
    timeclr = {}
    timeclr['aug_bank_ver'] = 0
    return timeclr


def get_classifier_setting():
    classifier = {}
    classifier['n_dim'] = 64
    classifier['n_layer'] = 2
    return classifier


def get_train_setting():
    train = {}
    train['lr'] = 0.001
    train['batch_size'] = 64
    train['n_epoch'] = 250
    train['n_ckpt'] = 50
    return train

def get_base_classifier():
    norm = 'LN'

    prefix = 'gru_c'
    config_id = 0

    config_dict = {}
    config_dict['model'] = {'model_name': 'classifier_rnnet'}
    config_dict['classifier'] = get_classifier_setting()
    config_dict['encoder'] = get_gru_setting()
    config_dict['train'] = get_train_setting()

    config_dict['encoder']['is_projector'] = 'True'
    config_dict['encoder']['project_norm'] = norm

    config_path = os.path.join(
        drive_path, 'config_files', f'{prefix}_{config_id:04d}.config')

    config_id += 1
    write_config(config_dict, config_path)

    prefix = 'agru_c'
    config_id = 0

    config_dict = {}
    config_dict['model'] = {'model_name': 'classifier_rnnet'}
    config_dict['classifier'] = get_classifier_setting()
    config_dict['encoder'] = get_agru_setting()
    config_dict['train'] = get_train_setting()

    config_dict['encoder']['is_projector'] = 'True'
    config_dict['encoder']['project_norm'] = norm

    config_path = os.path.join(
        drive_path, 'config_files', f'{prefix}_{config_id:04d}.config')

    config_id += 1
    write_config(config_dict, config_path)

    prefix = 'alst_c'
    config_id = 0

    config_dict = {}
    config_dict['model'] = {'model_name': 'classifier_rnnet'}
    config_dict['classifier'] = get_classifier_setting()
    config_dict['encoder'] = get_alst_setting()
    config_dict['train'] = get_train_setting()

    config_dict['encoder']['is_projector'] = 'True'
    config_dict['encoder']['project_norm'] = norm

    config_path = os.path.join(
        drive_path, 'config_files', f'{prefix}_{config_id:04d}.config')

    config_id += 1
    write_config(config_dict, config_path)

    prefix = 'lst_c'
    config_id = 0

    config_dict = {}
    config_dict['model'] = {'model_name': 'classifier_rnnet'}
    config_dict['classifier'] = get_classifier_setting()
    config_dict['encoder'] = get_lst_setting()
    config_dict['train'] = get_train_setting()

    config_dict['encoder']['is_projector'] = 'True'
    config_dict['encoder']['project_norm'] = norm

    config_path = os.path.join(
        drive_path, 'config_files', f'{prefix}_{config_id:04d}.config')

    config_id += 1
    write_config(config_dict, config_path)

    prefix = 'r1d_c'
    config_id = 0

    config_dict = {}
    config_dict['model'] = {'model_name': 'classifier_resnet1d'}
    config_dict['classifier'] = get_classifier_setting()
    config_dict['encoder'] = get_r1d_setting()
    config_dict['train'] = get_train_setting()

    config_dict['encoder']['is_projector'] = 'True'
    config_dict['encoder']['project_norm'] = norm
    config_dict['encoder']['norm'] = norm

    config_path = os.path.join(
        drive_path, 'config_files', f'{prefix}_{config_id:04d}.config')

    config_id += 1
    write_config(config_dict, config_path)

    prefix = 'trf_c'
    config_id = 0

    config_dict = {}
    config_dict['model'] = {'model_name': 'classifier_transform'}
    config_dict['classifier'] = get_classifier_setting()
    config_dict['encoder'] = get_trf_setting()
    config_dict['train'] = get_train_setting()

    config_dict['encoder']['is_projector'] = 'True'
    config_dict['encoder']['project_norm'] = norm

    config_path = os.path.join(
        drive_path, 'config_files', f'{prefix}_{config_id:04d}.config')

    config_id += 1
    write_config(config_dict, config_path)

    prefix = 'wvnt_c'
    config_id = 0

    config_dict = {}
    config_dict['model'] = {'model_name': 'classifier_wavenet'}
    config_dict['classifier'] = get_classifier_setting()
    config_dict['encoder'] = get_wvnt_setting()
    config_dict['train'] = get_train_setting()

    config_dict['encoder']['is_projector'] = 'True'
    config_dict['encoder']['project_norm'] = norm

    config_path = os.path.join(
        drive_path, 'config_files', f'{prefix}_{config_id:04d}.config')

    config_id += 1
    write_config(config_dict, config_path)


def _get_pretrain_setting(short_name, setting_fun, setting_str,
                          pretrain_setting):
    prefix = f'{short_name}_{pretrain_setting[0]}'
    model_name = f'{pretrain_setting[1]}_{setting_str}'

    config_id = 0
    batch_size = 256
    norm = 'LN'
    config_dict = {}
    config_dict['model'] = {'model_name': model_name}

    config_dict['encoder'] = setting_fun()
    if 'norm' in config_dict['encoder']:
        config_dict['encoder']['norm'] = norm
    config_dict['encoder']['in_dim'] = 1

    if pretrain_setting[1] == 'timefreq':
        if 'out_dim' in config_dict['encoder']:
            config_dict['encoder']['out_dim'] = int(
                config_dict['encoder']['out_dim'] / 2)
        if 'n_dim' in config_dict['encoder']:
            config_dict['encoder']['n_dim'] = int(
                config_dict['encoder']['n_dim'] / 2)
        config_dict['timefreq'] = get_timefreq_setting()
        config_dict['timefreq']['project_norm'] = norm

    elif pretrain_setting[1] == 'ts2vec':
        config_dict['ts2vec'] = get_ts2vec_setting()
        config_dict['encoder']['is_projector'] = 'True'
        config_dict['encoder']['project_norm'] = norm

    elif pretrain_setting[1] == 'mixup':
        config_dict['mixup'] = get_mixup_setting()
        config_dict['encoder']['is_projector'] = 'True'
        config_dict['encoder']['project_norm'] = norm

    elif pretrain_setting[1] == 'simclr':
        config_dict['simclr'] = get_simclr_setting()
        config_dict['encoder']['is_projector'] = 'True'
        config_dict['encoder']['project_norm'] = norm

    elif pretrain_setting[1] == 'timeclr':
        config_dict['timeclr'] = get_timeclr_setting()
        config_dict['encoder']['is_projector'] = 'True'
        config_dict['encoder']['project_norm'] = norm

    config_dict['train'] = get_train_setting()
    # config_dict['train']['n_ckpt'] = 50
    config_dict['train']['batch_size'] = batch_size

    config_path = os.path.join(
        drive_path, 'config_files', f'{prefix}_{config_id:04d}.config')

    config_id += 1
    write_config(config_dict, config_path)


def _get_classifier_setting(short_name, setting_fun, setting_str,
                            pretrain_setting):
    prefix = f'{short_name}_{pretrain_setting[0]}_c'
    model_name = f'classifier_{pretrain_setting[1]}_{setting_str}'

    config_id = 0
    batch_size = 256
    norm = 'LN'
    pretrain_data = 'ucr_00_pretrain'
    config_dict = {}
    config_dict['model'] = {'model_name': model_name}

    config_dict['classifier'] = get_classifier_setting()

    config_dict['encoder'] = setting_fun()
    if 'norm' in config_dict['encoder']:
        config_dict['encoder']['norm'] = norm
    config_dict['encoder']['in_dim'] = 1

    if pretrain_setting[1] == 'timefreq':
        if 'out_dim' in config_dict['encoder']:
            config_dict['encoder']['out_dim'] = int(
                config_dict['encoder']['out_dim'] / 2)
        if 'n_dim' in config_dict['encoder']:
            config_dict['encoder']['n_dim'] = int(
                config_dict['encoder']['n_dim'] / 2)
        config_dict['timefreq'] = get_timefreq_setting()
        config_dict['timefreq']['project_norm'] = norm
        config_dict['timefreq']['pre_train_model'] = (
            os.path.join(
                drive_path, 'model', pretrain_data,
                f'{short_name}_tf_0000_0249.npz'))

    elif pretrain_setting[1] == 'ts2vec':
        config_dict['ts2vec'] = get_ts2vec_setting()
        config_dict['encoder']['is_projector'] = 'True'
        config_dict['encoder']['project_norm'] = norm
        config_dict['ts2vec']['pre_train_model'] = (
            os.path.join(
                drive_path, 'model', pretrain_data,
                f'{short_name}_tv_0000_0249.npz'))

    elif pretrain_setting[1] == 'mixup':
        config_dict['mixup'] = get_mixup_setting()
        config_dict['encoder']['is_projector'] = 'True'
        config_dict['encoder']['project_norm'] = norm
        config_dict['mixup']['pre_train_model'] = (
            os.path.join(
                drive_path, 'model', pretrain_data,
                f'{short_name}_mu_0000_0249.npz'))

    elif pretrain_setting[1] == 'simclr':
        config_dict['simclr'] = get_simclr_setting()
        config_dict['encoder']['is_projector'] = 'True'
        config_dict['encoder']['project_norm'] = norm
        config_dict['simclr']['pre_train_model'] = (
            os.path.join(
                drive_path, 'model', pretrain_data,
                f'{short_name}_sc_0000_0249.npz'))

    elif pretrain_setting[1] == 'timeclr':
        config_dict['timeclr'] = get_timeclr_setting()
        config_dict['encoder']['is_projector'] = 'True'
        config_dict['encoder']['project_norm'] = norm
        config_dict['timeclr']['pre_train_model'] = (
            os.path.join(
                drive_path, 'model', pretrain_data,
                f'{short_name}_tc_0000_0249.npz'))

    config_dict['train'] = get_train_setting()
    config_path = os.path.join(
        drive_path, 'config_files', f'{prefix}_{config_id:04d}.config')

    config_id += 1
    write_config(config_dict, config_path)


def get_pretrain_model():
    pretrain_settings = [
        ['tf', 'timefreq', ],
        ['tv', 'ts2vec', ],
        ['mu', 'mixup', ],
        ['sc', 'simclr', ],
        ['tc', 'timeclr', ],
    ]

    for pretrain_setting in pretrain_settings:
        _get_pretrain_setting(
            'agru', get_agru_setting,
            'rnnet', pretrain_setting)
        _get_pretrain_setting(
            'gru', get_gru_setting,
            'rnnet', pretrain_setting)
        _get_pretrain_setting(
            'lst', get_lst_setting,
            'rnnet', pretrain_setting)
        _get_pretrain_setting(
            'alst', get_alst_setting,
            'rnnet', pretrain_setting)
        _get_pretrain_setting(
            'r1d', get_r1d_setting,
            'resnet1d', pretrain_setting)
        _get_pretrain_setting(
            'trf', get_trf_setting,
            'transform', pretrain_setting)
        _get_pretrain_setting(
            'wvnt', get_wvnt_setting,
            'wavenet', pretrain_setting)

    for pretrain_setting in pretrain_settings:
        _get_classifier_setting(
            'agru', get_agru_setting,
            'rnnet', pretrain_setting)
        _get_classifier_setting(
            'gru', get_gru_setting,
            'rnnet', pretrain_setting)
        _get_classifier_setting(
            'lst', get_lst_setting,
            'rnnet', pretrain_setting)
        _get_classifier_setting(
            'alst', get_alst_setting,
            'rnnet', pretrain_setting)
        _get_classifier_setting(
            'r1d', get_r1d_setting,
            'resnet1d', pretrain_setting)
        _get_classifier_setting(
            'trf', get_trf_setting,
            'transform', pretrain_setting)
        _get_classifier_setting(
            'wvnt', get_wvnt_setting,
            'wavenet', pretrain_setting)



In [None]:
ucr_dir = os.path.join(drive_path, 'UCRArchive_2018')
config_dir = os.path.join(drive_path, 'config_files')

path = pathlib.Path(config_dir)
path.mkdir(parents=True, exist_ok=True)

get_dataset(ucr_dir)
get_dist_classifier()
get_base_classifier()
get_pretrain_model()

# Pretraining Script

### Utils

In [None]:
def parse_config(config_path, verbose=True):
    """method for parsing configs"""
    parser = configparser.ConfigParser()
    parser.read(config_path)
    if verbose:
        print(config_path)
    config_dict = OrderedDict()
    for key_0 in parser:
        config_dict[key_0] = OrderedDict()
        for key_1 in parser[key_0]:
            val = parser[key_0][key_1]
            if val == 'None':
                val = None
            config_dict[key_0][key_1] = val
            if verbose:
                print(f'  {key_0}.{key_1}={val}')
    return config_dict

def _get_checkpoint(n_ckpt, n_epoch):
    if n_ckpt >= n_epoch:
        ckpts = np.arange(n_epoch)
    else:
        ckpts = np.arange(1, n_ckpt + 1)
        ckpts = n_epoch * (ckpts / n_ckpt) - 1
    ckpts = ckpts.astype(int)
    ckpts_dict = {}
    for ckpt in ckpts:
        ckpts_dict[ckpt] = 0

    last_ckpt = n_epoch - 1
    if last_ckpt not in ckpts_dict:
        ckpts_dict[last_ckpt] = 0
    return ckpts_dict

def _get_start_epoch(model_path, ckpts):
    start_epoch = 0
    for i in ckpts:
        model_path_i = model_path.format(i)
        if os.path.isfile(model_path_i):
            start_epoch = i

    model_path_i = model_path.format(start_epoch)
    if not os.path.isfile(model_path_i):
        return start_epoch

    try:
        pkl = torch.load(model_path_i, map_location='cpu')
    except:
        print(f'{model_path_i} can not be opened. It is removed!')
        os.remove(model_path_i)
        start_epoch = _get_start_epoch(model_path, ckpts)
    return start_epoch

def _timefreq_encoder_forward(model, idx_batch, data):
    data_batch = data[idx_batch, :, :]
    h_t, z_t, h_f, z_f = model.forward(
        data_batch, normalize=False, to_numpy=False, is_augment=False)
    h_t_aug, z_t_aug, h_f_aug, z_f_aug = model.forward(
        data_batch, normalize=False, to_numpy=False, is_augment=True)
    loss_fun = NTXentLossPoly()
    loss_t = loss_fun(h_t, h_t_aug)
    loss_f = loss_fun(h_f, h_f_aug)
    loss_tf = loss_fun(z_t, z_f)
    loss = 0.2 * (loss_t + loss_f) + loss_tf
    return loss


def _simclr_encoder_forward(model, idx_batch, data):
    data_batch = data[idx_batch, :, :]
    ts_emb_aug_0 = model.forward(
        data_batch, normalize=False, to_numpy=False, is_augment=True)
    ts_emb_aug_1 = model.forward(
        data_batch, normalize=False, to_numpy=False, is_augment=True)
    loss_fun = NTXentLoss()
    loss = loss_fun(ts_emb_aug_0, ts_emb_aug_1)
    return loss


def _timeclr_encoder_forward(model, idx_batch, data):
    data_batch = data[idx_batch, :, :]
    ts_emb_aug_0 = model.forward(
        data_batch, normalize=False, to_numpy=False, is_augment=True)
    ts_emb_aug_1 = model.forward(
        data_batch, normalize=False, to_numpy=False, is_augment=True)
    loss_fun = NTXentLossPoly()
    loss = loss_fun(ts_emb_aug_0, ts_emb_aug_1)
    return loss


def _ts2vec_encoder_forward(model, idx_batch, data):
    data_batch = data[idx_batch, :, :]
    ts_emb_l, ts_emb_r = model.forward(
        data_batch, normalize=False, to_numpy=False, is_augment=True)
    loss_fun = HierContrastLoss()
    loss = loss_fun(ts_emb_l, ts_emb_r)
    return loss


def _mixup_encoder_forward(model, idx_batch, data):
    data_batch = data[idx_batch, :, :]
    ts_emb_0, ts_emb_1, ts_emb_aug, lam = model.forward(
        data_batch, normalize=False, to_numpy=False, is_augment=True)
    loss_fun = MixupLoss()
    loss = loss_fun(ts_emb_0, ts_emb_1, ts_emb_aug, lam)
    return loss


### Pretraining Loop

In [None]:
def nn_pretrain(data, model, model_path, train_config, device):
    model.to(device)
    model.train()

    pretrain_name = model.pretrain_name
    lr = float(train_config['lr'])
    optimizer = torch.optim.AdamW(
        model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', verbose=True)

    n_data = data.shape[0]
    batch_size = int(train_config['batch_size'])
    n_iter = np.ceil(n_data / batch_size)
    n_iter = int(n_iter)
    n_epoch = int(train_config['n_epoch'])
    n_ckpt = int(train_config['n_ckpt'])

    ckpts = _get_checkpoint(n_ckpt, n_epoch)
    start_epoch = _get_start_epoch(model_path, ckpts)

    loss_train = np.zeros(n_epoch)
    toc_train = np.zeros(n_epoch)
    for i in range(start_epoch, n_epoch):
        if start_epoch != 0 and i == start_epoch:
            print(f'resume training from epoch {i + 1:d}')
        model_path_i = model_path.format(i)
        if os.path.isfile(model_path_i):
            print(f'loading {model_path_i}')
            pkl = torch.load(model_path_i, map_location='cpu')
            loss_train = pkl['loss_train']
            toc_train = pkl['toc_train']
            loss_epoch = loss_train[i]
            toc_epoch = toc_train[i]

            model.load_state_dict(
                pkl['model_state_dict'])
            model.to(device)
            model.train()

            optimizer.load_state_dict(
                pkl['optimizer_state_dict'])
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode='min', verbose=True)
            print((f'epoch {i + 1}/{n_epoch}, '
                   f'loss={loss_epoch:0.4f}, '
                   f'time={toc_epoch:0.2f}.'))
            continue

        model_state_dict_old = copy.deepcopy(
            model.state_dict())
        optimizer_state_dict_old = copy.deepcopy(
            optimizer.state_dict())
        while True:
            tic = time.time()
            loss_epoch = 0
            idx_order = np.random.permutation(n_data)
            for j in range(n_iter):
                optimizer.zero_grad()

                idx_start = j * batch_size
                idx_end = (j + 1) * batch_size
                if idx_end > n_data:
                    idx_end = n_data
                idx_batch = idx_order[idx_start:idx_end]

                batch_size_ = idx_end - idx_start
                if batch_size_ < batch_size:
                    n_fill = batch_size - batch_size_
                    idx_fill = idx_order[:n_fill]
                    idx_batch = np.concatenate(
                        (idx_batch, idx_fill, ), axis=0)

                pretrain_name = model.pretrain_name
                if pretrain_name == 'timefreq':
                    loss = _timefreq_encoder_forward(
                        model, idx_batch, data)
                elif pretrain_name == 'ts2vec':
                    loss = _ts2vec_encoder_forward(
                        model, idx_batch, data)
                elif pretrain_name == 'mixup':
                    loss = _mixup_encoder_forward(
                        model, idx_batch, data)
                elif pretrain_name == 'simclr':
                    loss = _simclr_encoder_forward(
                        model, idx_batch, data)
                elif pretrain_name == 'timeclr':
                    loss = _timeclr_encoder_forward(
                        model, idx_batch, data)
                else:
                    raise Exception(
                        f'unknown pretrain name: {pretrain_name}')

                loss.backward()
                optimizer.step()
                loss_epoch += loss.item()

            loss_epoch /= n_iter
            toc_epoch = time.time() - tic

            loss_train[i] = loss_epoch
            toc_train[i] = toc_epoch

            if i in ckpts or i == n_epoch-1:
                pkl = {}
                pkl['loss_train'] = loss_train
                pkl['toc_train'] = toc_train
                pkl['model_state_dict'] = model.state_dict()
                pkl['optimizer_state_dict'] = optimizer.state_dict()
                torch.save(pkl, model_path_i)

            print((f'epoch {i + 1}/{n_epoch}, '
                   f'loss={loss_epoch:0.4f}, '
                   f'time={toc_epoch:0.2f}.'))

            if np.isfinite(loss_epoch):
                break
            else:
                print('restart model training...')
                model.load_state_dict(
                    model_state_dict_old)
                model.to(device)
                model.train()

                optimizer.load_state_dict(
                    optimizer_state_dict_old)
                scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer, mode='min', verbose=True)

        scheduler.step(loss_epoch)


In [None]:
def pretrain_ucr(data_config_name, method_name):
    data_config = os.path.join(
        drive_path, 'config_files', f'{data_config_name}.config')
    data_config = parse_config(data_config)

    method_config = os.path.join(
        drive_path, 'config_files', f'{method_name}.config')
    method_config = parse_config(method_config)

    model_dir = os.path.join(
        drive_path, 'model', f'{data_config_name}_pretrain')
    path = pathlib.Path(model_dir)
    path.mkdir(parents=True, exist_ok=True)

    fmt_str = '{0:04d}'
    model_path = os.path.join(
        model_dir, f'{method_name}_{fmt_str}.npz')

    dataset = load_dataset(data_config)
    method_config['in_dim'] = dataset.shape[1]
    method_config['data_len'] = dataset.shape[2]
    model = get_model(method_config)
    nn_pretrain(dataset, model, model_path,
                method_config['train'], device)


In [None]:
# SimCLR + transformer -> trf_sc_0000
# SimCLR + ResNet1D -> r1d_sc_0000

data_name = 'ucr_00'
method_name = 'tcn_sc_0000'
pretrain_ucr(data_name, method_name)

/mnt/drive/MyDrive/STAT940/config_files/ucr_00.config
  data.data_dir=/mnt/drive/MyDrive/STAT940/UCRArchive_2018
  data.max_len=512
  data.seed=666
  data.pretrain_frac=0.5
  data.train_frac=0.3
  data.valid_frac=0.1
  data.test_frac=0.1
  data.is_same_length=True
/mnt/drive/MyDrive/STAT940/config_files/tcn_sc_0000.config
  model.model_name=simclr_temporalnet
  encoder.in_dim=1
  encoder.out_dim=128
  encoder.n_layer=1
  encoder.n_dim=64
  encoder.norm=LN
  encoder.is_projector=True
  encoder.project_norm=LN
  simclr.ph=0
  train.lr=0.001
  train.batch_size=256
  train.n_epoch=250
  train.n_ckpt=100
get model for simclr_temporalnet
  get temporalnet
  get simclr




OutOfMemoryError: CUDA out of memory. Tried to allocate 8.00 GiB. GPU 0 has a total capacity of 15.77 GiB of which 6.63 GiB is free. Process 17307 has 9.14 GiB memory in use. Of the allocated memory 8.73 GiB is allocated by PyTorch, and 38.78 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

# Fine-tuning / Testing Models

### Utils

In [None]:
def nn_train(dataset, model, model_path,
             train_config, device):
    data = dataset['data_train']
    label = dataset['label_train']

    data = _normalize_dataset(data)

    model.to(device)
    model.train()

    lr = float(train_config['lr'])
    optimizer = torch.optim.AdamW(
        model.parameters(), lr=lr)

    n_data = data.shape[0]
    batch_size = int(train_config['batch_size'])
    n_iter = np.ceil(n_data / batch_size)
    n_iter = int(n_iter)
    n_epoch = int(train_config['n_epoch'])
    n_ckpt = int(train_config['n_ckpt'])

    ckpts = _get_checkpoint(n_ckpt, n_epoch)
    start_epoch = _get_start_epoch(model_path, ckpts)

    loss_train = np.zeros(n_epoch)
    toc_train = np.zeros(n_epoch)
    for i in range(start_epoch, n_epoch):
        if start_epoch != 0 and i == start_epoch:
            print(f'resume training from epoch {i + 1:d}')
        model_path_i = model_path.format(i)
        if os.path.isfile(model_path_i):
            print(f'loading {model_path_i}')
            pkl = torch.load(model_path_i, map_location='cpu')
            loss_train = pkl['loss_train']
            toc_train = pkl['toc_train']
            loss_epoch = loss_train[i]
            toc_epoch = toc_train[i]

            model.load_state_dict(
                pkl['model_state_dict'])
            model.to(device)
            model.train()

            optimizer.load_state_dict(
                pkl['optimizer_state_dict'])
            print((f'epoch {i + 1}/{n_epoch}, '
                   f'loss={loss_epoch:0.4f}, '
                   f'time={toc_epoch:0.2f}.'))
            continue

        tic = time.time()
        loss_epoch = 0
        idx_order = np.random.permutation(n_data)
        for j in range(n_iter):
            optimizer.zero_grad()

            idx_start = j * batch_size
            idx_end = (j + 1) * batch_size
            if idx_end > n_data:
                idx_end = n_data
            idx_batch = idx_order[idx_start:idx_end]

            batch_size_ = idx_end - idx_start
            if batch_size_ < batch_size:
                n_fill = batch_size - batch_size_
                idx_fill = idx_order[:n_fill]
                idx_batch = np.concatenate(
                    (idx_batch, idx_fill, ), axis=0)

            data_batch = data[idx_batch, :, :]
            label_batch = label[idx_batch]

            label_batch = torch.from_numpy(label_batch)
            label_batch = label_batch.to(device, dtype=torch.long)

            logit = model.forward(
                data_batch, normalize=False, to_numpy=False)

            loss = nn.CrossEntropyLoss()(logit, label_batch)
            loss.backward()
            optimizer.step()
            loss_epoch += loss.item()

        loss_epoch /= n_iter
        toc_epoch = time.time() - tic

        loss_train[i] = loss_epoch
        toc_train[i] = toc_epoch
        if i in ckpts:
            pkl = {}
            pkl['loss_train'] = loss_train
            pkl['toc_train'] = toc_train
            pkl['model_state_dict'] = model.state_dict()
            pkl['optimizer_state_dict'] = optimizer.state_dict()
            torch.save(pkl, model_path_i)

        print((f'epoch {i + 1}/{n_epoch}, '
               f'loss={loss_epoch:0.4f}, '
               f'time={toc_epoch:0.2f}.'))

def _get_predict(data, label, model, train_config):
    n_data = data.shape[0]

    batch_size = int(train_config['batch_size'])
    n_iter = np.ceil(n_data / batch_size)
    n_iter = int(n_iter)

    tic = time.time()
    predict = np.zeros(n_data, dtype=int)
    for i in range(n_iter):
        idx_start = i * batch_size
        idx_end = (i + 1) * batch_size
        if idx_end > n_data:
            idx_end = n_data

        data_batch = data[idx_start:idx_end, :, :]
        logit = model.forward(
            data_batch, normalize=False, to_numpy=True)
        predict[idx_start:idx_end] = np.argmax(logit, axis=1)
    predict_time = time.time() - tic
    acc = np.sum(predict == label) / n_data
    return predict, acc, predict_time

def nn_eval(dataset, model, model_path, result_path, train_config, device):
    data_valid = dataset['data_valid']
    data_test = dataset['data_test']

    label_valid = dataset['label_valid']
    label_test = dataset['label_test']

    data_valid = _normalize_dataset(data_valid)
    data_test = _normalize_dataset(data_test)

    n_epoch = int(train_config['n_epoch'])
    n_ckpt = int(train_config['n_ckpt'])
    ckpts = _get_checkpoint(n_ckpt, n_epoch)
    ckpts = [ckpt for ckpt in ckpts]
    ckpts = ckpts[::-1]
    shuffle(ckpts)

    for i in ckpts:
        result_path_i = result_path.format(i)
        if not os.path.isfile(result_path_i):
            continue

        try:
            result = np.load(result_path_i, allow_pickle=True)
        except:
            print(f'{result_path_i} can not be opened. It is removed!')
            os.remove(result_path_i)

    for i in ckpts:
        model_path_i = model_path.format(i)
        result_path_i = result_path.format(i)
        if os.path.isfile(result_path_i):
            result = np.load(result_path_i, allow_pickle=True)
            acc_valid = result['acc_valid']
            acc_test = result['acc_test']
            time_valid = result['time_valid']
            time_test = result['time_test']
            print((f'{result_path_i}, {acc_valid:0.4f}, {acc_test:0.4f}, '
                  f'{time_valid+time_test:0.2f}'))
            continue

        pkl = torch.load(model_path_i, map_location='cpu')
        model.load_state_dict(
            pkl['model_state_dict'])
        model.to(device)
        model.eval()

        predict_valid, acc_valid, time_valid = _get_predict(
            data_valid, label_valid, model, train_config)
        predict_test, acc_test, time_test = _get_predict(
            data_test, label_test, model, train_config)

        np.savez(result_path_i,
                 label_valid=label_valid,
                 label_test=label_test,
                 predict_valid=predict_valid,
                 predict_test=predict_test,
                 acc_valid=acc_valid,
                 acc_test=acc_test,
                 time_valid=time_valid,
                 time_test=time_test)
        print((f'{result_path_i}, {acc_valid:0.4f}, {acc_test:0.4f}, '
               f'{time_valid+time_test:0.2f}'))

def get_agg_result(result_path, result_agg_path, train_config):
    """method for aggregating results"""
    if os.path.isfile(result_agg_path):
        pkl = np.load(result_agg_path)
        epoch = pkl['epoch']
        return epoch

    n_epoch = int(train_config['n_epoch'])
    acc_valid_bsf = 0
    for i in range(n_epoch):
        result_path_ = result_path.format(i)
        if not os.path.isfile(result_path_):
            return

        pkl = np.load(result_path_)
        acc_valid = pkl['acc_valid']
        acc_test = pkl['acc_test']

        if acc_valid_bsf == 0:
            acc_valid_bsf = acc_valid
            acc_test_bsf = acc_test
            epoch_bsf = i
        elif acc_valid > acc_valid_bsf:
            acc_valid_bsf = acc_valid
            acc_test_bsf = acc_test
            epoch_bsf = i
    np.savez(result_agg_path,
             acc_valid=acc_valid_bsf,
             acc_test=acc_test_bsf,
             epoch=epoch_bsf)


### Fine tuning Loop

In [None]:
def fine_tune(data_config_name, method_name, dataset_order = 1):

    data_config = os.path.join(
        drive_path, 'config_files', f'{data_config_name}.config')
    data_config = parse_config(data_config)
    method_config = os.path.join(
        drive_path, 'config_files', f'{method_name}.config')
    method_config = parse_config(method_config)
    dataset_names = get_ucr_data_names()
    if dataset_order == -1:
        dataset_names = dataset_names[::-1]
    elif dataset_order == 0:
        shuffle(dataset_names)
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'

    fmt_str = '{0:04d}'
    for dataset_name in dataset_names:
        result_dir = os.path.join(
            drive_path, 'result', f'{data_config_name}_{dataset_name}')
        result_agg_dir = os.path.join(
            drive_path, 'result_agg', f'{data_config_name}_{dataset_name}')
        model_dir = os.path.join(
            drive_path, 'model', f'{data_config_name}_{dataset_name}')

        path = pathlib.Path(result_dir)
        path.mkdir(parents=True, exist_ok=True)
        path = pathlib.Path(result_agg_dir)
        path.mkdir(parents=True, exist_ok=True)
        path = pathlib.Path(model_dir)
        path.mkdir(parents=True, exist_ok=True)

        result_path = os.path.join(
            result_dir, f'{method_name}_{fmt_str}.npz')
        result_agg_path = os.path.join(
            result_agg_dir, f'{method_name}.npz')
        model_path = os.path.join(
            model_dir, f'{method_name}_{fmt_str}.npz')
        if os.path.isfile(result_agg_path):
            continue

        dataset = load_ucr_dataset(dataset_name, data_config)
        method_config_ = copy.deepcopy(method_config)
        method_config_['in_dim'] = dataset['n_dim']
        method_config_['n_class'] = dataset['n_class']
        method_config_['data_len'] = dataset['data_len']
        model = get_model(method_config_)
        nn_train(dataset, model, model_path,
                 method_config_['train'], device)
        nn_eval(dataset, model, model_path,
                result_path, method_config_['train'], device)
        get_agg_result(result_path, result_agg_path,
                       method_config_['train'])

In [None]:
data_name = 'ucr_00'
method_name = 'alst_sc_c_0000'
fine_tune(data_name, method_name)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
epoch 165/250, loss=0.0001, time=0.02.
epoch 166/250, loss=0.0001, time=0.02.
epoch 167/250, loss=0.0001, time=0.02.
epoch 168/250, loss=0.0001, time=0.02.
epoch 169/250, loss=0.0001, time=0.02.
epoch 170/250, loss=0.0001, time=0.02.
epoch 171/250, loss=0.0001, time=0.02.
epoch 172/250, loss=0.0001, time=0.02.
epoch 173/250, loss=0.0001, time=0.02.
epoch 174/250, loss=0.0001, time=0.02.
epoch 175/250, loss=0.0001, time=0.02.
epoch 176/250, loss=0.0001, time=0.02.
epoch 177/250, loss=0.0001, time=0.02.
epoch 178/250, loss=0.0001, time=0.02.
epoch 179/250, loss=0.0001, time=0.02.
epoch 180/250, loss=0.0001, time=0.02.
epoch 181/250, loss=0.0001, time=0.02.
epoch 182/250, loss=0.0001, time=0.02.
epoch 183/250, loss=0.0000, time=0.02.
epoch 184/250, loss=0.0001, time=0.02.
epoch 185/250, loss=0.0001, time=0.02.
epoch 186/250, loss=0.0001, time=0.02.
epoch 187/250, loss=0.0000, time=0.02.
epoch 188/250, loss=0.0001, time=0.02.