In [1]:
import numpy as np
import matplotlib.pylab as plt
import pandas as pd
from sklearn.neighbors import LSHForest
import numpy as np
from scipy.stats import mode
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split
from sklearn.decomposition import RandomizedPCA

In [2]:
class LSH_KNN:

    def __init__(self, **kwargs):
        self.n_neighbors = kwargs['n_neighbors']
        self.lsh = LSHForest(**kwargs)

    def fit(self, X, y):
        self.y = y
        self.lsh.fit(X)

    def predict(self, X):
        _, indices = self.lsh.kneighbors(X, n_neighbors = self.n_neighbors)
        print indices
        votes, _ = mode(self.y[indices], axis=1)
        return votes.flatten()

In [3]:
dataframe = pd.read_csv("transformed_WISDM.csv", header=None)
dataframe.head()
dataframe = dataframe.sample(frac=1).reset_index(drop=True)
y_train = dataframe.ix[:, 44]
X_train = dataframe.drop(dataframe.columns[[0, 44]], axis=1)
x_numpy = X_train.values

min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x_numpy)
dct = {"Jogging" : 1, "LyingDown" : 2, "Sitting" : 3, "Stairs" : 4, "Standing" : 5, "Walking" : 6}
labels = {1:"Jogging", 2:"LyingDown", 3:"Sitting", 4:"Stairs", 5:"Standing", 6:"Walking"}
y_train = y_train.replace(dct)
y_transformed = y_train.values

In [4]:
X_train, X_test, y_train, y_test = train_test_split(x_scaled, y_transformed, test_size=0.30, random_state=42)

In [5]:
pca = RandomizedPCA(n_components=34,whiten=True).fit(X_train)
x_pca = pca.transform(X_train)
x_test_pca = pca.transform(X_test)

In [6]:
m = LSH_KNN(n_neighbors=1)

In [7]:
m.fit(x_pca, y_train)
label= m.predict(x_test_pca)

[[ 339]
 [ 552]
 [3442]
 ..., 
 [1119]
 [2744]
 [2273]]


In [8]:
print y_test.shape

(1631,)


In [9]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, confusion_matrix
print classification_report(label, y_test, target_names=[l for l in labels.values()])

             precision    recall  f1-score   support

    Jogging       0.81      0.89      0.85        38
  LyingDown       0.93      0.97      0.95       180
    Sitting       0.94      0.93      0.94       443
     Stairs       0.71      0.85      0.77        60
   Standing       0.94      0.93      0.94       260
    Walking       0.96      0.94      0.95       650

avg / total       0.94      0.94      0.94      1631



In [10]:
conf_mat = confusion_matrix(label, y_test)

fig = plt.figure(figsize=(6,6))
width = conf_mat.shape[1]
height = conf_mat.shape[0]

res = plt.imshow(conf_mat, cmap='summer', interpolation='nearest')
for i, row in enumerate(conf_mat):
    for j, c in enumerate(row):
        if c>0:
            plt.text(j-.2, i+.1, c, fontsize=16)
            
cb = fig.colorbar(res)
plt.title('Confusion Matrix')
_ = plt.xticks(range(6), [l for l in labels.values()], rotation=90)
_ = plt.yticks(range(6), [l for l in labels.values()])
plt.show()

In [11]:
print accuracy_score(y_test, label)

0.935009196812


In [12]:
print confusion_matrix(y_test, label)

[[ 34   0   0   0   0   8]
 [  1 175  10   0   2   1]
 [  1   4 412   2   9   8]
 [  2   0   0  51   0  19]
 [  0   1  10   0 243   4]
 [  0   0  11   7   6 610]]
