## Dynamic Time Warping
* 1NN + bespoke distance metric

In [2]:
import pandas as pd
import numpy as np
import scipy
import matplotlib
from matplotlib import pyplot as plt
import os
import re
import time
import sklearn
from glob import glob
from sklearn.neighbors import KNeighborsClassifier
os.getcwd()

'/Users/kangshuoli/Documents/VScode_workspace/GR5398/doc'

#### Calculate the optimal distance in a recursion fashion

In [5]:
def DTW(a, b, MTSC = True, method = "dependent"):
    '''
    Input: 
    a -> time seires m * d numpy array, 
    b -> time series m * d numpy array, 
    MTSC -> default is multivariate time series, 
    method: use dependent warping (DTWd) by default, combined both independent and dependent warping

    Output:
    optimal distance
    '''
    import numpy as np
    import scipy.stats
    if a.shape[0] != b.shape[0]:
        raise ValueError("Time length are not the same!")
    if not MTSC:
        raise ValueError("Input should be MTS!")
    m = a.shape[0] # number of time steps
    M = np.zeros((m,m)) # initialize distance matrix M
    dp = np.zeros((m,m)) # define dp table -> DTW
    if method == "dependent": # calculate distance
        for i in range(m):
            for j in range(i, m):
                a_vec_i = a[i,:]
                b_vec_j = b[j,:]
                diff_vec = a_vec_i - b_vec_j
                M[i,j] = diff_vec.T @ diff_vec
                M[j,i] = M[i,j]
        # find the warping path
        for i in range(m):
            for j in range(m):
                # base case
                if i == 0 and j == 0:
                    dp[i,j] = M[i,j]
                    continue
                elif i == 0 and j != 0:
                    dp[i,j] = M[i,j] + dp[i,j-1]
                    continue
                elif i != 0 and j == 0:
                    dp[i,j] = M[i,j] + dp[i-1, j]
                    continue
                dp[i,j] = M[i,j] + min(dp[i-1, j], dp[i,j-1], dp[i-1,j-1])
    return dp[m-1,m-1]

def NN_DTW(mts_train, mts_test, label_train, label_test):
    '''
    Input:
    mts_train: traning set of multivariate time series
    mts_test: test set of multivariate time series
    label_train: labels for training set
    label_test: labels for test set

    Use 1NN classifier

    Output:
    label_pred: list of label predicted by the 1NN
    '''
    import math
    label_pred = []
    for i in range(mts_test.shape[0]): # for each test data
        curr_min_distance = math.inf
        min_distance_label = None
        for j in range(mts_train.shape[0]): # for each training data
            curr_distance = DTW(mts_test[i,:,:], mts_train[j,:,:])
            if curr_distance <= curr_min_distance:
                curr_min_distance = curr_distance
                min_distance_label = label_train[j]
        label_pred.append(min_distance_label)
    return pd.Series(label_pred, index = label_test.index)