In [2]:
import numpy as np
import pandas as pd
import os

In [3]:
def read(filepath):
    try:
        df = pd.DataFrame(columns=['number', 'id', 'time_sequence'])
        lines = [line.strip() for line in open(filepath,'r')]
        number = int(lines[1].split(" ")[3])
        id = int(lines[2].split(" ")[3])
        matrix = []
        for i in range(5,len(lines)):
            line = lines[i].split(",")
            line = np.array(line).astype(np.float64)
            matrix.append([line[3], *line[:3]])
        df.loc[0] = [number, id, matrix]
        return df
    except IOError as e:
        print("Unable to read dataset file!\n")

In [4]:
directory = 'Sketch-Data-master\SketchData\SketchData\Domain01'

df = pd.DataFrame(columns=['number', 'id', 'time_sequence'])
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        df=df.append(read(f), ignore_index=True)
        '''le = len(read(f)["time_sequence"].values[0])
        if min > le:
            if le==31:
                print(f)
            min = le'''
# min = 31
print(df)

    number  id                                      time_sequence
0        1   1  [[6.0, 0.042075, 0.036799, 0.25838], [37.0, 0....
1        1   1  [[27.0, -0.030138, 0.061892, 0.356577], [58.0,...
2       10   1  [[9.0, 0.102607, 0.069246, 0.302644], [41.0, 0...
3       10  10  [[38.0, 0.125652, 0.054037, 0.366598], [69.0, ...
4        1   2  [[11.0, 0.018762, 0.056202, 0.334956], [60.0, ...
..     ...  ..                                                ...
995     10  10  [[30.0, 0.113368, 0.034153, 0.334738], [63.0, ...
996     10  10  [[32.0, 0.111792, 0.018394, 0.323929], [62.0, ...
997     10  10  [[47.0, 0.120695, 0.035941, 0.33836], [78.0, 0...
998     10  10  [[30.0, 0.129327, 0.026097, 0.343372], [79.0, ...
999     10  10  [[8.0, 0.128029, 0.035957, 0.35456], [53.0, 0....

[1000 rows x 3 columns]


In [19]:
def cross_validation_split():
    dataset_split = []
    for i in range(10):
        fold=[x for x in range(100*i, 100*(i+1))]
        other=[x for x in range(1000) if x not in fold]
        dataset_split+=[(fold,other)]
    return dataset_split


In [38]:
def DTWdistance(data1,data2, w):
    n=len(data1)
    m=len(data2)
    DTW=np.zeros((n,m))
    w=max(w,abs(n-m))

    for i in range(n):
        for j in range(m):
            DTW[i,j]=999999
    DTW[0,0]=0

    for i in range(1,n):
        for j in range(max(1,i-w),min(m,i+w)):
            DTW[i,j]=0
    
    for i in range(1,n):
        for j in range(max(1,i-w),min(m,i+w)):
            cost=distance(data1[i],data2[j])
            DTW[i,j]=cost+np.min([DTW[i-1,j],#insertion
                                DTW[i,j-1],#deletion
                                DTW[i-1,j-1]])#match
    
    return DTW[n-1,m-1]

def distance(a,b):
    return np.linalg.norm(np.array(a)-np.array(b))

In [7]:
def get_neighbors(train, test_row, num_neighbors):
	distances = list()
	for train_row in train:
		dist = DTWdistance(test_row, train_row)
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append(distances[i][0])
	return neighbors

In [57]:
def dist_matrix(x):
    dist_m = np.zeros((x.shape[0],x.shape[0]))
    for i in range(x.shape[0]):
        for j in range(x.shape[0]):
            dist_m[i,j]=DTWdistance(x[i],x[j],10)
        print("z")
    return dist_m



In [58]:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets

n_neighbors = 10

# we only take the first two features. We could avoid this ugly
# slicing by using a two-dim dataset
X = df["time_sequence"].array
X=dist_matrix(X)
print(X)
y = df["number"]

for test_ind, train_ind in cross_validation_split():
    

    x_train, y_train = X[train_ind,train_ind], y[train_ind]
    x_test, y_test = X[test_ind, test_ind], y[test_ind]

    clf = neighbors.KNeighborsClassifier(n_neighbors, metric="precomputed")
    clf.fit(x_train, y_train)


'''h = 0.02  # step size in the mesh

# Create color maps
cmap_light = ListedColormap(["orange", "cyan", "cornflowerblue"])
cmap_bold = ["darkorange", "c", "darkblue"]

for weights in [DTWdistance]:
    # we create an instance of Neighbours Classifier and fit the data.
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights, metric=DTWdistance)
    clf.fit(X, y)

    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z) #cmap=cmap_light)

    # Plot also the training points
    sns.scatterplot(
        x=X[:, 0],
        y=X[:, 1],
        #hue=iris.target_names[y],
        palette=cmap_bold,
        alpha=1.0,
        edgecolor="black",
    )
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title(
        "3-Class classification (k = %i, weights = '%s')" % (n_neighbors, weights)
    )
    plt.xlabel(iris.feature_names[0])
    plt.ylabel(iris.feature_names[1])

plt.show()'''

z


KeyboardInterrupt: 