In [54]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.metrics import classification_report

In [2]:
#Data import
train_df = pd.read_csv("./Lightning7/Lighting7_TRAIN.txt", header=None)
test_df = pd.read_csv("./Lightning7/Lighting7_TEST.txt",header=None)
all_data = pd.concat([train_df, test_df]) #combining data to experiment with different train/test splits

In [19]:
classes = {i:all_data.loc[all_data[0]==i] for i in range(7)}

In [81]:
def euclidean_dist(t1, t2,w=0):
    return math.sqrt(sum((t1-t2)**2))



In [82]:
def DTWDistance(s1, s2,w):
    DTW={}
    
    w = max(w, abs(len(s1)-len(s2)))
    
    for i in range(-1,len(s1)):
        for j in range(-1,len(s2)):
            DTW[(i, j)] = float('inf')
    DTW[(-1, -1)] = 0
  
    for i in range(len(s1)):
        for j in range(max(0, i-w), min(len(s2), i+w)):
            dist= (s1[i]-s2[j])**2
            DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)])
    return math.sqrt(DTW[len(s1)-1, len(s2)-1])

In [83]:
def LB_Keogh(s1,s2,r):
    LB_sum=0
    for ind,i in enumerate(s1):
        
        lower_bound=min(s2[(ind-r if ind-r>=0 else 0):(ind+r)])
        upper_bound=max(s2[(ind-r if ind-r>=0 else 0):(ind+r)])
        
        if i>upper_bound:
            LB_sum=LB_sum+(i-upper_bound)**2
        elif i<lower_bound:
            LB_sum=LB_sum+(i-lower_bound)**2
    
    return math.sqrt(LB_sum)

In [84]:
def knn(train,test,w,dist_func):
    preds=[]
    for ind,i in enumerate(test):
        min_dist=float('inf')
        closest_seq=[]
        #print ind
        for j in train:
            if LB_Keogh(i[1:],j[1:],5)<min_dist:
                dist=dist_func(i[1:],j[1:],w)
                if dist<min_dist:
                    min_dist=dist
                    closest_seq=j
        preds.append(closest_seq[0])
    return classification_report(test[:,0],preds)

In [89]:
print (knn(train_df.values, test_df.values, 30,euclidean_dist))

             precision    recall  f1-score   support

        0.0       0.75      0.60      0.67        10
        1.0       0.20      0.11      0.14         9
        2.0       0.56      0.83      0.67         6
        3.0       0.25      0.14      0.18         7
        4.0       0.50      0.30      0.37        10
        5.0       0.59      0.84      0.70        19
        6.0       0.71      0.83      0.77        12

avg / total       0.54      0.58      0.54        73



In [88]:
print (knn(train_df.values, test_df.values, 30,DTWDistance))

             precision    recall  f1-score   support

        0.0       0.73      0.80      0.76        10
        1.0       0.80      0.44      0.57         9
        2.0       0.50      0.67      0.57         6
        3.0       0.46      0.86      0.60         7
        4.0       1.00      0.20      0.33        10
        5.0       0.77      0.89      0.83        19
        6.0       0.92      0.92      0.92        12

avg / total       0.77      0.71      0.69        73

