In [1]:
import numpy as np
from scipy.spatial import distance
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

In [2]:
seed = 0
np.random.seed(seed)

# Prep Train Data

In [3]:
X_train = np.loadtxt('data/ECG200_TRAIN')[:,1:]

In [4]:
y_train = np.loadtxt('data/ECG200_TRAIN',usecols=0)

In [5]:
print(X_train[0])

[ 0.50205548  0.54216265  0.72238348  1.4288852   2.1365158   2.281149
  1.9362737   1.46889     1.0088451   0.38028224 -0.29677967 -0.51392868
 -0.25564469 -0.10720254 -0.28782655 -0.41800901 -0.31916313 -0.2603787
 -0.35035721 -0.50548599 -0.71088709 -0.82391982 -0.89970154 -1.1539497
 -1.2298306  -1.044091   -1.2020312  -1.3921949  -1.1301083  -1.1798666
 -1.6492718  -1.7265754  -1.6083704  -1.6628022  -1.6506724  -1.6973094
 -1.8386968  -1.8025962  -1.7805361  -1.8251665  -1.6447633  -1.4238097
 -1.3921949  -1.3604156  -1.2001781  -0.91863234 -0.68591581 -0.66794346
 -0.51272154 -0.10169069  0.06395426  0.08261431  0.23760718  0.17479318
  0.12320539  0.5033942   0.6838702   0.47499476  0.53279711  0.72354995
  0.6644198   0.64793559  0.75705403  0.7320716   0.62021499  0.60629677
  0.67795016  0.68908363  0.59880728  0.54264656  0.58180737  0.63063068
  0.6644198   0.6863178   0.65738251  0.65088944  0.72173837  0.73970645
  0.69148052  0.69522947  0.7501145   0.85384485  0.916018

In [6]:
print(y_train[0])

-1.0


# Prep Test Data

In [7]:
X_test = np.loadtxt('data/ECG200_TEST')[:,1:]

In [8]:
y_test = np.loadtxt('data/ECG200_TEST',usecols=0)

In [9]:
print(X_test[0])

[ 0.4248307   1.417371    2.6664697   3.2957564   2.262586    0.16504829
 -0.95887355  0.28243436  0.84212633  0.68287365  1.0811545   0.72195807
  0.1097286  -0.6959863  -0.66167509 -0.83923654 -0.9344658  -0.89456506
 -0.92344405 -1.087212   -1.112597   -1.1296782  -1.2008301  -1.5381487
 -1.5338706  -1.4671187  -2.0761806  -1.8549098  -1.9811204  -2.0394123
 -1.7922777  -1.984411   -1.945186   -1.7050863  -1.3239691  -1.1460494
 -0.86285965 -0.35569031 -0.20469707  0.18286321  0.5176051   0.61876238
  0.82189443  0.97593788  0.84240691  1.1848391   1.1726587   0.82393546
  0.88328054  0.77614415  0.87428119  0.79460665  0.55369498  0.67931716
  0.6130278   0.62255797  0.46331937  0.58537876  0.62205506  0.31377112
  0.42132057  0.63884571  0.47035729  0.29973327  0.24724335  0.31969066
  0.29951892  0.20076289  0.15224845  0.06925952  0.2903854   0.0961461
  0.3793962   0.79095196  0.60614418  0.80085582  0.69571698  0.20422311
  0.20495593 -0.04510588 -0.18384252  0.11568678 -0.098

In [10]:
print(y_test[0])

1.0


In [11]:
print(type(X_test[0][1]))

<class 'numpy.float64'>


# Train Model

In [12]:
def DTW(a, b):   
    an = a.size
    bn = b.size
    pointwise_distance = distance.cdist(a.reshape(-1,1),b.reshape(-1,1))
    cumdist = np.matrix(np.ones((an+1,bn+1)) * np.inf)
    cumdist[0,0] = 0
    
    for ai in range(an):
        for bi in range(bn):
            minimum_cost = np.min([cumdist[ai, bi+1],
                                   cumdist[ai+1, bi],
                                   cumdist[ai, bi]])
            cumdist[ai+1, bi+1] = pointwise_distance[ai,bi] + minimum_cost

    return cumdist[an, bn]

In [13]:
clf = KNeighborsClassifier(metric =DTW,n_neighbors=1,n_jobs=-1)
clf.fit(X_train[0:5], y_train[0:5])

# Test Model

In [14]:
y_pred = clf.predict(X_test[0:5])

In [15]:
print(classification_report(y_test[0:5], y_pred[0:5]))

              precision    recall  f1-score   support

        -1.0       0.50      1.00      0.67         1
         1.0       1.00      0.75      0.86         4

    accuracy                           0.80         5
   macro avg       0.75      0.88      0.76         5
weighted avg       0.90      0.80      0.82         5

