In [1]:
import numpy as np
from fastdtw import fastdtw
from sklearn.base import BaseEstimator, ClassifierMixin

In [2]:
class KnnDtwClassifier(BaseEstimator, ClassifierMixin):
    """pehle DTW laga kr distance nikaal re
    fir dekhre ki konsa sbse paas hai using k nearest neighbour
    mai personally 3 nearest neighbour nikaal kr usme nearest na dekh k majority pick kr raha
    fastdtw use kar re kyuki??? obviously wo fast hai isliye...
    pip install fastdtw
    or if we dont want fast then
    pip install dtw-python
    """
    def __init__(self, n_neighbors=1):
        self.n_neighbors = n_neighbors
        self.features = []
        self.labels = []

    def get_distance(self, x, y):
        return fastdtw(x, y)[0]

    def fit(self, X, y=None):
        for index, l in enumerate(y):
            self.features.append(X[index])
            self.labels.append(l)
        return self

    def predict(self, X):
        dist = np.array([self.get_distance(X, seq) for seq in self.features])
        indices = dist.argsort()[:self.n_neighbors]
        return np.array(self.labels)[indices]

    def predict_ext(self, X):
        dist = np.array([self.get_distance(X, seq) for seq in self.features])
        indices = dist.argsort()[:self.n_neighbors]
        return (dist[indices],
                indices)

In [3]:
import pandas as pd
import numpy as np
import os

train_labels = []
test_labels = []
train_data_raw = []
train_data_hist = []
test_data_raw = []
test_data_hist = []

# Utility function for normalizing numpy arrays
def normalize(v):
    norm = np.linalg.norm(v)
    if norm == 0:
        return v
    return v / norm

# Loading all data for training and testing from TXT files
def load_data():
    for d in os.listdir("data"):
        for f in os.listdir(os.path.join("data", d)):
            if f.startswith("TEST"):
                train_labels.append(d)
                tr = normalize(np.ravel(pd.read_csv(os.path.join("data", d, f),
                                                    delim_whitespace=True,
                                                    header=None)))
                train_data_raw.append(tr)
                train_data_hist.append(np.histogram(tr, bins=20)[0])
            else:
                test_labels.append(d)
                td = normalize(np.ravel(pd.read_csv(os.path.join("data", d, f),
                                                delim_whitespace=True,
                                                header=None)))
                test_data_raw.append(td)
                test_data_hist.append(np.histogram(td, bins=20)[0])

In [4]:
load_data()

In [5]:
# Raw sequence labeling with KnnDtwClassifier and KNN=1
clf1 = KnnDtwClassifier(1)
clf1.fit(train_data_raw, train_labels)

for index, t in enumerate(test_data_raw):
    print("KnnDtwClassifier prediction for " + 
          str(test_labels[index]) + " = " + str(clf1.predict(t)))

KnnDtwClassifier prediction for top = ['bottom']
KnnDtwClassifier prediction for top = ['bottom']
KnnDtwClassifier prediction for top = ['bottom']
KnnDtwClassifier prediction for top = ['bottom']


In [6]:
clf2 = KnnDtwClassifier(3)
clf2.fit(train_data_raw, train_labels)

def classify2():
    for index, t in enumerate(test_data_raw):
        res = clf2.predict_ext(t)
        nghs = np.array(train_labels)[res[1]]
        #chossing majority of 3
        d=dict()
        max=-1
        val=""
        for i in nghs:
            if i in d:
                d[i]+=1
            else:
                d[i]=1
            if d[i]>max:
                max=d[i]
                val=i
        print("KnnDtwClassifier neighbors for " + str(test_labels[index]) + " = " + str(nghs))
        print("KnnDtwClassifier distances to " + str(nghs) + " = " + str(res[0]))
        print("choosing =======>",val)

%time classify2()

KnnDtwClassifier neighbors for top = ['bottom' 'left' 'bottom']
KnnDtwClassifier distances to ['bottom' 'left' 'bottom'] = [2.12481937 2.52281546 3.0358696 ]
KnnDtwClassifier neighbors for top = ['bottom' 'left' 'bottom']
KnnDtwClassifier distances to ['bottom' 'left' 'bottom'] = [1.87337145 2.60465578 2.84903565]
KnnDtwClassifier neighbors for top = ['bottom' 'left' 'bottom']
KnnDtwClassifier distances to ['bottom' 'left' 'bottom'] = [1.53842123 2.10486484 2.48618451]
KnnDtwClassifier neighbors for top = ['bottom' 'bottom' 'bottom']
KnnDtwClassifier distances to ['bottom' 'bottom' 'bottom'] = [1.92439724 1.93794823 2.24289277]
Wall time: 529 ms
