In [1]:
%matplotlib inline
import sys

import numpy as np
import seaborn as sns
import pandas as pd
import xgboost as xgb

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import features

from dataset import read_datasets, read_leap_motion_data
from features import extract_features, extract_lp_features
from train import precision_for, train_clf, feature_precisions, predict_img
from utils import print_log, set_cache, get_cache
from img_utils import get_sdists, find_edge_of_img



In [2]:
# Dataset Load
persons = read_leap_motion_data('LP_data/dataset')

Trying fetch cache for persons
Using cache for persons
Time for >>read_leap_motion_data<<: 0.55 s


In [3]:
# Feature Extraction
X = []
y = []
X_img = []
indices = []
for p in persons:
    for g_idx in range(1, 11):
        for idx in range(1, 21):
            g = 'G{}'.format(g_idx)
            d = persons[p][g][idx]
            if not d:
                continue
            X_img.append(d['leye'])
            f = extract_lp_features(d['json'])
            X.append(f)
            y.append(g_idx-1)
            indices.append((p, g, idx))
X = np.array(X)
y = np.array(y)
max_dist = np.max(X[:, 5:10])
X[:, 5:] = X[:, 5:] / max_dist

all_data = pd.DataFrame(np.append(X, y.reshape((-1, 1)), axis=1))
all_data.to_csv('caches/feature_set.csv', index=False, float_format='%.3f')
all_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0
1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0
5,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0
6,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0
7,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0
8,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0
9,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0


In [4]:
print(max_dist)

340.701547861


In [5]:
X_train, X_test, X_img_train, X_img_test, y_train, y_test = train_test_split(
    X, X_img, y, test_size=0.15)

In [6]:
# Train Best clf for numerical data

features_selection = features.F_A | features.F_D | features.F_T
clf = train_clf(X_train, y_train, features_selection, test_size=0.15, ITER_TIMES=150)
deletes = []
if not (features_selection & features.F_T):
    deletes.extend(range(15, 25))
if not (features_selection & features.F_E):
    deletes.extend(range(10, 15))
if not (features_selection & features.F_D):
    deletes.extend(range(5, 10))
if not (features_selection & features.F_A):
    deletes.extend(range(0, 5))       
A = np.delete(X_train, deletes, axis=1)
h_numerical = clf.predict(xgb.DMatrix(A))
A = np.delete(X_test, deletes, axis=1)
h_numerical_test = clf.predict(xgb.DMatrix(A))

print()
print('Accuracy: {:.2%}'.format(accuracy_score(h_numerical, y_train)))
print('Accuracy: {:.2%}'.format(accuracy_score(h_numerical_test, y_test)))
#set_cache(clf, 'prfediction_model')

Round  14/150: Current accuracy 91.27%, Best accuracy 94.28%

KeyboardInterrupt: 

In [None]:
A = np.delete(X, deletes, axis=1)
h_all = clf.predict(xgb.DMatrix(A))
print('Accuracy: {:.2%}'.format(accuracy_score(h_all, y)))

In [None]:
# Template matching for pixel data
raise Exception()
h_img = []
size = len(X_img_train)
for idx, pixels in enumerate(X_img_train, 1):
    sys.stdout.flush()
    sys.stdout.write('Processing {:3d} / {:3d}\r'.format(idx, size))
    best_gesture = predict_img(pixels)
    h_img.append(best_gesture)

h_img_test = []    
size = len(X_img_test)
for idx, pixels in enumerate(X_img_test, 1):
    sys.stdout.flush()
    sys.stdout.write('Processing {:3d} / {:3d}\r'.format(idx, size))
    best_gesture = predict_img(pixels)
    h_img_test.append(best_gesture)
print_log('\nTemplate matching process finished.')
print('Accuracy: {:.2%}'.format(accuracy_score(h_img, y_train)))  
print('Accuracy: {:.2%}'.format(accuracy_score(h_img_test, y_test)))

In [None]:
# Combine prediction for numerical data and prediction for pixel data

df_hn = np.array(h_numerical).reshape(-1, 1)
df_hi = np.array(h_img).reshape(-1, 1)
df_data = np.append(df_hn, df_hi, axis=1)
df = pd.DataFrame(df_data)

df_hn = np.array(h_numerical_test).reshape(-1, 1)
df_hi = np.array(h_img_test).reshape(-1, 1)
df_data = np.append(df_hn, df_hi, axis=1)
df_test = pd.DataFrame(df_data)


dtrain = xgb.DMatrix(df, label=y_train)
dtest = xgb.DMatrix(df_test, label=y_test)
# specify parameters via map
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'multi:softmax', 'num_class': 10}
num_round = 20
watch_list = [(dtrain, 'train'), (dtest, 'test')]
clf_combine = xgb.train(param, dtrain, num_round, watch_list)
h_combine = clf_combine.predict(dtest)
print('Accuracy: {:.2%}'.format(accuracy_score(h_combine, y_test)))

In [None]:
print('Accuracy on numerical train set : {:.2%}'.format(accuracy_score(h_numerical, y_train)))
print('Accuracy on numerical test set  : {:.2%}'.format(accuracy_score(h_numerical_test, y_test)))
print('Accuracy on pixel train set     : {:.2%}'.format(accuracy_score(h_img, y_train)))  
print('Accuracy on pixel test set      : {:.2%}'.format(accuracy_score(h_img_test, y_test)))
print('Accuracy on combined test set   : {:.2%}'.format(accuracy_score(h_combine, y_test)))