In [318]:
import csv
import matplotlib.pyplot as plt
import numpy as np
from scipy.fftpack import fft
from scipy.cluster.vq import vq, kmeans, whiten
from scipy.interpolate import interp1d
from sklearn.cross_validation import KFold

Xcon = []
Ycon = []
Zcon = []
target = []
with open('dataqm30.csv', 'rb') as csvfile:
    letter = ''
    reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in reader:
        letter = row[-1]
        target.append(letter)
        del row[-1]
        frow = np.array(map(float, row))
        Xcon.append(np.array(frow[range(0,len(row),3)]))
        Ycon.append(np.array(frow[range(1,len(row),3)]))
        Zcon.append(np.array(frow[range(2,len(row),3)]))
target = np.array(target)

In [31]:
def inter(Obs, ndots=100):
    interpolated = []
    for obs in Obs:
        x = np.linspace(0, 100, num=obs.shape[0], endpoint=True)
        f = interp1d(x, obs, kind='slinear')
        xnew = np.linspace(0, 100, num=ndots, endpoint=True)
        interpolated.append(f(xnew))
    return interpolated

In [32]:
def partition_magic(Obs, n):
    P = []
    for x in Obs:
        xm = np.zeros(n)
        parts = [len(x)/n]*n
        for i in range(int(round(len(x)%n))):
            parts[i] += 1
        parts_sum = 0
        for i in range(len(parts)):
            part = parts[i]
            xm[i] = np.mean(x[parts_sum:parts_sum+part])
            parts_sum += part
        P.append(xm)
    return np.array(P)

In [33]:
def windowing_and_fourier(X, win_size, win_step):
    Y = []
    features = []
    for x in X:
        win = np.hamming(win_size)
        y = []
        for j in xrange(0, x.shape[0]-win_size, win_step):
            a = win * x[j:j+win_size]
            a = np.abs(fft(a)) # amplitude spectrum
            #print a.shape
            y.append(a)
            features.append(a)
        Y.append(np.array(y))
    return np.array(Y), features

In [34]:
def diff_sign(X, win_size):
    Y = []
    features = []
    for x in X:
        y = []
        for j in xrange(0, x.shape[0]-win_size, win_size):
            a = x[j:j+win_size]
            #a=np.log(a*a)
            y.append(a)
            features.append(a)
        Y.append(np.array(y))
    return np.array(Y), features

In [35]:
def diff_part(Obs, n):
    P = []
    features= []
    for x in Obs:
        dx = np.diff(x)
        ddx = np.diff(x)
        xm = []
        parts = [len(x)/n]*n
        for i in range(int(round(len(x)%n))):
            parts[i] += 1
        parts_sum = 0
        for i in range(len(parts)):
            part = parts[i]
            a = x[parts_sum:parts_sum+part]
            #a = np.nan_to_num(np.concatenate((np.abs(fft(a))[np.abs(fft(a)).argsort()[:2]], 
            a = np.nan_to_num([np.mean(a),
                               #x[parts_sum]-x[parts_sum+part-1],
                               np.std(a),
                               np.mean(dx[parts_sum:parts_sum+part-1]),
                               np.mean(ddx[parts_sum:parts_sum+part-2])])#))
            xm.append(a)
            parts_sum += part
            features.append(a)
        xm = np.array(xm)
        P.append(xm)
    return np.array(P), features

In [36]:
def quantization(Obs, k=2, method='wf', win_size=10, win_step=10):
    coded_obs = []
    Obs = inter(Obs)
    if method=='wf':
        Obs, features = windowing_and_fourier(Obs, win_size, win_step)
        codebook, distortion = kmeans(whiten(features),k)
        for o in Obs:
            codes, disr = vq(o, codebook)
            coded_obs.append(codes)
    if method=='ds':
        Obs, features = diff_sign(Obs, win_size)
        codebook, distortion = kmeans(whiten(features),k)
        for o in Obs:
            codes, disr = vq(o, codebook)
            coded_obs.append(codes)
    if method=='dp':
        Obs, features = diff_part(Obs, win_size)
        codebook, distortion = kmeans(whiten(features),k)
        for o in Obs:
            codes, disr = vq(o, codebook)
            coded_obs.append(codes)
    return np.array(coded_obs)

In [37]:
coded_X = quantization(Xcon, k=25, method='ds', win_size=10)
coded_Y = quantization(Ycon, k=25, method='ds', win_size=10)
coded_Z = quantization(Zcon, k=25, method='ds', win_size=10)
#coded_X = quantization(Xcon, k=70, method='wf', win_size=20, win_step=10)
#coded_Y = quantization(Y, 8)
#coded_Z = quantization(Z, 8)

In [40]:
kf = KFold(len(Xcon), n_folds=10, shuffle=True)
coonter = 0

for train_index, test_index in kf:
    #print("TRAIN:", train_index, "TEST:", test_index)
    coonter += 1
    X_train, X_test = coded_X[train_index], coded_X[test_index]
    Y_train, Y_test = coded_Y[train_index], coded_Y[test_index]
    Z_train, Z_test = coded_Z[train_index], coded_Z[test_index]
    target_train, target_test = target[train_index], target[test_index]
    write_csv('X', X_train, X_test, target_train, target_test, coonter)
    write_csv('Y', Y_train, Y_test, target_train, target_test, coonter)
    write_csv('Z', Y_train, Y_test, target_train, target_test, coonter)

In [39]:
def write_csv(axis, X_train, X_test, target_train, target_test, n):
    f = open('cv//train'+axis+str(n)+'.csv','w')
    str1 = ''
    for i in range(X_train.shape[0]):
        str1 = ''
        for val in X_train[i]:
            str1 += str(val)+','
        str1 += target_train[i][1]+'\n'
        f.write(str1)
    f.close()
    f = open('cv//test'+axis+str(n)+'.csv','w')
    str1 = ''
    for i in range(X_test.shape[0]):
        str1 = ''
        for val in X_test[i]:
            str1 += str(val)+','
        str1 += target_test[i][1]+'\n'
        f.write(str1)
    f.close()

In [337]:
from sklearn import cross_validation
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import GradientBoostingClassifier
 
def interfu(Obs, ndots=100):
    interpolated = []
    for obs in Obs:
        l = len(obs)
        step = 1.0*l/ndots
        xnew = []
        i = 0
        while (i < l-1):
            a = int(i + 0.5)
            xnew.append(obs[a])
            i = i + step
        interpolated.append(xnew)
    return interpolated
def interfuf(Obs, ndots=100):
    interpolated = []
    for obs in Obs:
        l = len(obs)
        step = 1.0*l/ndots
        xnew = []
        i = 0
        while (i < l-1):
            a = int(i + 0.5)
            if (i==0):
                m = (obs[a]+obs[a+1])/2.0
            if (i==l-2):
                m = (obs[a-1]+obs[a])/2.0
            if (i>0) and (i<l-2):
                m = (obs[a-1]+obs[a]+obs[a+1])/3.0
            xnew.append(m)
            i = i + step
        interpolated.append(xnew)
    return interpolated
#clf = GradientBoostingClassifier()
#clf = MultinomialNB()
clf = KNeighborsClassifier(n_neighbors=1)
#clf = LogisticRegression()
#clf = RandomForestClassifier(n_estimators=159)
#clf = SVC(C=10.9, kernel='rbf')
meta_scores = []

X = interfuf(Xcon,20)
Y = interfuf(Ycon,20)
#clf = SVC(C=1, kernel='linear')
C = []
for i in range(len(X)):
    C.append(np.concatenate((X[i],Y[i])))

scores = cross_validation.cross_val_score(clf, C, target, cv=10)
print np.mean(scores)
meta_scores.append(np.mean(scores))


0.951377351006


In [338]:
f = open('dataset.csv','w')
str1 = ''
for i in range(len(C)):
    str1 = ''
    for val in C[i]:
        str1 += str(val)+','
    str1 += target[i][1]+'\n'
    f.write(str1)
f.close()

In [300]:
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
def euc_kernel(X, Y):
    kern = []
    for x in X:
        row = []
        for y in Y:
            distance = 0
            for i in range(20):
                distance += euclidean([x[i], x[i+19]],[y[i], y[i+19]])
            #distance,b,c = dtw(x, y)
            row.append(np.exp(-distance/20.0))
            #row.append(-distance)
        kern.append(np.array(row))
    #print kern
    return np.array(kern)
clf = SVC(C=10.0, kernel=euc_kernel)
from sklearn.metrics import accuracy_score
kf = KFold(len(Xcon), n_folds=10, shuffle=True)
coonter = 0
X = inter(Xcon,20)
Y = inter(Ycon,20)
C = []
for i in range(len(X)):
    C.append(np.concatenate((X[i],Y[i])))
C = np.array(C)
counter = 0       
for train_index, test_index in kf:
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = C[train_index], C[test_index]
    target_train, target_test = target[train_index], target[test_index]
    clf.fit(X_train, target_train)
    print accuracy_score(clf.predict(X_test),target_test)
#gram = np.dot(X, X.T)
#clf = SVC(kernel='precomputed')


1.0
0.9875
0.95
0.925
0.9625
0.9875
0.95
0.9625
0.95
1.0


In [226]:
from collections import Counter
kf = KFold(len(Xcon), n_folds=10, shuffle=True)
X = inter(Xcon,20)
Y = inter(Ycon,20)
C = []
for i in range(len(X)):
    C.append(np.vstack((X[i],Y[i])).T)
C = np.array(C)
scores = []
k = 3
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
for train_index, test_index in kf:
    #print len(test_index)
    target_test = target[test_index]
    predicted = []
    for a in test_index:
        min_dist = 100000
        best_target = ''
        distanceX = []
        distanceY = []
        for b in train_index:
            distance, path = fastdtw(X[a], X[b], dist=euclidean)
            distanceX.append(distance)
            distance, path = fastdtw(Y[a], Y[b], dist=euclidean)
            distanceY.append(distance)
        distanceY = np.array(distanceY)
        distanceX = np.array(distanceX)
        arr = distanceX + distanceY
        #print target[train_index[arr.argsort()[:k]]]
        b = Counter(target[train_index[arr.argsort()[:k]]])
        #print b.most_common(1)[0][0]
        predicted.append(b.most_common(1)[0][0])
    acc = 0
    #print target_test, predicted
    for i in range(len(target_test)):
        if target_test[i] == predicted[i]:
            acc += 1
    acc = 1.0*acc/len(target_test)
    print acc
    scores.append(acc)
print np.mean(scores)

0.95
0.9625


KeyboardInterrupt: 

In [220]:
from hmmlearn.hmm import GaussianHMM
def HMM_acc(X_train, X_test, Y_train, Y_test, Z_train, Z_test, target_train, target_test, n=10):
    d_train = {}
    for l in target_train:
        d_train[l] = []
    for i in range(len(target_train)):
        d_train[target_train[i]].append(np.column_stack([X_train[i], Y_train[i]]))
    d_test = {}
    for l in target_test:
        d_test[l] = []
    for i in range(len(target_test)):
        d_test[target_test[i]].append(np.column_stack([X_test[i], Y_test[i]]))
    d_models = {}
    for k in d_train:
        train_set = np.concatenate(d_train[k])
        length = [d_train[k][0].shape[0]]*len(d_train[k])
        d_models[k] = GaussianHMM(n_components=n, n_iter=500).fit(train_set, length)
    acc = 0
    for k in d_test:
        for obs in d_test[k]:
            mprob = -100000
            let = ''
            for m in d_models:
                lprob = d_models[m].score(obs)
                if lprob>mprob:
                    mprob = lprob
                    let = m
            if k == let:
                acc += 1
    return 1.0*acc/X_test.shape[0]

In [221]:
X = np.array(inter(Xcon,20))
Y = np.array(inter(Ycon,20))
Z = np.array(inter(Zcon,20))
kf = KFold(len(X), n_folds=10, shuffle=True)
scores = []
for train_index, test_index in kf:
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]
    Z_train, Z_test = Z[train_index], Z[test_index]
    target_train, target_test = target[train_index], target[test_index]
    scores.append(HMM_acc(X_train, X_test, Y_train, Y_test, Z_train, Z_test, target_train, target_test, n=14))
    print np.mean(scores)
print np.mean(scores)

0.9875
0.975
0.966666666667
0.971875
0.9725
0.975
0.976785714286
0.971875
0.966666666667
0.965
0.965
