In [44]:
import numpy as np
from fastdtw import fastdtw
from sklearn.base import BaseEstimator, ClassifierMixin
import pickle

In [45]:
class KnnDtwClassifier(BaseEstimator, ClassifierMixin):
    """pehle DTW laga kr distance nikaal re
    fir dekhre ki konsa sbse paas hai using k nearest neighbour
    mai personally 3 nearest neighbour nikaal kr usme nearest na dekh k majority pick kr raha
    fastdtw use kar re kyuki??? obviously wo fast hai isliye...
    pip install fastdtw
    or if we dont want fast then
    pip install dtw-python
    """
    def __init__(self, n_neighbors=1):
        self.n_neighbors = n_neighbors
        self.features = []
        self.labels = []

    def get_distance(self, x, y):
        return fastdtw(x, y)[0]

    def fit(self, X, y=None):
        for index, l in enumerate(y):
            self.features.append(X[index])
            self.labels.append(l)
        return self

    def predict(self, X):
        dist = np.array([self.get_distance(X, seq) for seq in self.features])
        indices = dist.argsort()[:self.n_neighbors]
        return np.array(self.labels)[indices]

    def predict_ext(self, X):
        dist = np.array([self.get_distance(X, seq) for seq in self.features])
        indices = dist.argsort()[:self.n_neighbors]
        return (dist[indices],
                indices)

In [46]:
import pandas as pd
import numpy as np
import os

train_labels = []
test_labels = []
train_data_raw = []
test_data_raw = []

# Utility function for normalizing numpy arrays
def normalize(v):
    norm = np.linalg.norm(v)
    if norm == 0:
        return v
    return v / norm

# Loading all data for training and testing from TXT files
def load_data():
    for d in os.listdir("data"):
        for f in os.listdir(os.path.join("data", d)):
            if f.startswith("TEST"):
                train_labels.append(d)
                tr = normalize(np.ravel(pd.read_csv(os.path.join("data", d, f),
                                                    delim_whitespace=True,
                                                    header=None)))
                train_data_raw.append(tr)
            else:
                test_labels.append(d)
                df=pd.read_csv(os.path.join("data", d, f),
                                                delim_whitespace=True,
                                                header=None)
                td = normalize(np.ravel(df))
                print(td)
                test_data_raw.append(td)

In [47]:
def update_pickle(clf3,train_labels):
    dir=os.path.dirname(os.path.realpath('__file__')) #getting relative path
    dirname = os.path.join(dir,'models')
    # print(dirname)
    with open('DTW_model','wb') as f:
        pickle.dump(clf3,f)
    with open('train_labels','wb') as f:
        pickle.dump(train_labels,f)

In [48]:
load_data()
clf3 = KnnDtwClassifier(3)
clf3.fit(train_data_raw, train_labels)
update_pickle(clf3,train_labels)

[0.03404223 0.18100501 0.16490315 0.03671104 0.17886996 0.15855731
 0.03439807 0.17676456 0.16519969 0.02903079 0.17166416 0.17424401
 0.02532409 0.16840226 0.17344337 0.02176567 0.16629687 0.16849123
 0.0194527  0.16333151 0.16686028 0.01936374 0.16018824 0.17136762
 0.01797003 0.15713393 0.16632652 0.0156867  0.15538438 0.16128543
 0.01666527 0.15235973 0.16175989 0.01696181 0.15069912 0.16718648
 0.01954166 0.1475262  0.17866238 0.01797003 0.14218857 0.17892927
 0.01589428 0.13702886 0.18002644 0.01601289 0.13403385 0.19141339
 0.01708042 0.12991201 0.20585465 0.01550878 0.12555295 0.20481677]
[ 0.02181419  0.19226576  0.10792758  0.02373986  0.19377018  0.11316299
  0.02542481  0.19473302  0.12832761  0.02524428  0.19325867  0.13915949
  0.02578588  0.19009937  0.13461612  0.02349915  0.18492415  0.12892938
  0.02049029  0.17830467  0.13458602  0.01724073  0.17189581  0.14794534
  0.01579648  0.16380199  0.15534712  0.01474338  0.15495597  0.15053296
  0.00992922  0.14604976  0.161

In [49]:
def open_pickle():
    with open('DTW_model','rb') as f:
        clf3=pickle.load(f)
    with open('train_labels','rb') as f:
        TL=pickle.load(f)
        return [clf3,TL]

In [50]:
t=open_pickle()
clf3=t[0]
def classify2():
    for index, t in enumerate(test_data_raw):
        res = clf3.predict_ext(t)
        nghs = np.array(train_labels)[res[1]]
        #chossing majority of 3
        d=dict()
        max=-1
        val=""
        for i in nghs:
            if i in d:
                d[i]+=1
            else:
                d[i]=1
            if d[i]>max:
                max=d[i]
                val=i
        print("KnnDtwClassifier neighbors for " + str(test_labels[index]) + " = " + str(nghs))
        print("KnnDtwClassifier distances to " + str(nghs) + " = " + str(res[0]))
        print("choosing =======>",val)

%time classify2()

KnnDtwClassifier neighbors for top = ['bottom' 'left' 'bottom']
KnnDtwClassifier distances to ['bottom' 'left' 'bottom'] = [2.12481937 2.52281546 3.0358696 ]
KnnDtwClassifier neighbors for top = ['bottom' 'left' 'bottom']
KnnDtwClassifier distances to ['bottom' 'left' 'bottom'] = [1.87337145 2.60465578 2.84903565]
KnnDtwClassifier neighbors for top = ['bottom' 'left' 'bottom']
KnnDtwClassifier distances to ['bottom' 'left' 'bottom'] = [1.53842123 2.10486484 2.48618451]
KnnDtwClassifier neighbors for top = ['bottom' 'bottom' 'bottom']
KnnDtwClassifier distances to ['bottom' 'bottom' 'bottom'] = [1.92439724 1.93794823 2.24289277]
Wall time: 483 ms


# for KNN=1 uncomment

In [51]:
# # Raw sequence labeling with KnnDtwClassifier and KNN=1
# clf1 = KnnDtwClassifier(1)
# clf1.fit(train_data_raw, train_labels)

# for index, t in enumerate(test_data_raw):
#     print("KnnDtwClassifier prediction for " + 
#           str(test_labels[index]) + " = " + str(clf1.predict(t)))

In [52]:
data_predict=['0.2825157940387726 -0.950498104095459 10.125079154968262', '3.952826976776123 -0.2932897210121155 9.527725219726562', '6.803123950958252 0.5422866940498352 8.166622161865234', '8.81784439086914 2.1092917919158936 2.5438392162323', '9.634267807006836 2.738966703414917 -1.5861586332321167']
arr=list()
import pandas as pd
for i in data_predict:
    x=list(map(float,i.split()))
    print(x)
    arr.append(x)
print(arr)
df=pd.DataFrame(arr)
print(df)

[0.2825157940387726, -0.950498104095459, 10.125079154968262]
[3.952826976776123, -0.2932897210121155, 9.527725219726562]
[6.803123950958252, 0.5422866940498352, 8.166622161865234]
[8.81784439086914, 2.1092917919158936, 2.5438392162323]
[9.634267807006836, 2.738966703414917, -1.5861586332321167]
[[0.2825157940387726, -0.950498104095459, 10.125079154968262], [3.952826976776123, -0.2932897210121155, 9.527725219726562], [6.803123950958252, 0.5422866940498352, 8.166622161865234], [8.81784439086914, 2.1092917919158936, 2.5438392162323], [9.634267807006836, 2.738966703414917, -1.5861586332321167]]
          0         1          2
0  0.282516 -0.950498  10.125079
1  3.952827 -0.293290   9.527725
2  6.803124  0.542287   8.166622
3  8.817844  2.109292   2.543839
4  9.634268  2.738967  -1.586159


In [53]:
td = normalize(np.ravel(df))
print(td)

[ 0.01245191 -0.04189328  0.44626363  0.17422115 -0.01292677  0.41993521
  0.29984821  0.02390133  0.35994449  0.38864716  0.0929672   0.11211991
  0.42463109  0.12072017 -0.06991006]


In [55]:
res = clf3.predict_ext(td)
nghs = np.array(train_labels)[res[1]]
#chossing majority of 3
d=dict()
max=-1
val=""
for i in nghs:
    if i in d:
        d[i]+=1
    else:
        d[i]=1
    if d[i]>max:
        max=d[i]
        val=i
print("KnnDtwClassifier neighbors for " + " = " + str(nghs))
print("KnnDtwClassifier distances to " + str(nghs) + " = " + str(res[0]))
print("choosing =======>",val)

KnnDtwClassifier neighbors for  = ['left' 'left' 'top']
KnnDtwClassifier distances to ['left' 'left' 'top'] = [1.42352399 1.62954169 1.76674948]
