In [None]:
from __future__ import division, print_function

import os
import sys
import shutil
import collections
import itertools
from glob import glob

%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import scipy
import scipy.interpolate
import cPickle

In [None]:
DATASET_NAME = 'JIGSAWS'

DATA_DIR = os.path.join(os.path.expanduser('~'), 'Data', 'JIGSAWS', 'Suturing')
KINEMATICS_DIR = os.path.join(DATA_DIR, 'kinematics', 'AllGestures')
LABELS_DIR = os.path.join(DATA_DIR, 'transcriptions')

ORIG_CLASS_IDS = [1, 2, 3, 4, 5, 6, 8, 9, 10, 11]
NEW_CLASS_IDS = range(len(ORIG_CLASS_IDS))
CLASSES = ['G%d' % id for id in ORIG_CLASS_IDS]
NUM_CLASSES = len(CLASSES)

# Standard JIGSAWS experimental setup. In particular, it's the only
# recognition setup that exists, corresponding to
# JIGSAWS/Experimental/Suturing/unBalanced/GestureRecognition/UserOut
# (User H's 2nd trial is left out because no video was available for labeling.)

USER_TO_TRIALS = {
    'B': [1, 2, 3, 4, 5],
    'C': [1, 2, 3, 4, 5],
    'D': [1, 2, 3, 4, 5],
    'E': [1, 2, 3, 4, 5],
    'F': [1, 2, 3, 4, 5],
    'G': [1, 2, 3, 4, 5],
    'H': [1,    3, 4, 5],
    'I': [1, 2, 3, 4, 5]
}

ALL_USERS = sorted(USER_TO_TRIALS.keys())

KINEMATICS_USECOLS = [c-1 for c in [39, 40, 41, 51, 52, 53, 57, 58, 59, 60, 70, 71, 72, 76]]
KINEMATICS_COL_NAMES = ['pos_x', 'pos_y', 'pos_z', 'vel_x', 'vel_y', 'vel_z', 'gripper']*2

LABELS_USECOLS = [0, 1, 2]
LABELS_COL_NAMES = ['start_frame', 'end_frame', 'string_label']
LABELS_CONVERTERS = {2: lambda string_label: int(string_label.replace('G', ''))}

STANDARDIZED_COL_NAMES = KINEMATICS_COL_NAMES + ['label']
STANDARDIZED_DIR = os.path.join(DATA_DIR)
STANDARDIZED_PATH = os.path.join(DATA_DIR, 'standardized_data.pkl')

In [None]:
print(CLASSES)

In [None]:
def get_trial_name(user, trial):
    return 'Suturing_%s%03d' % (user, trial)

In [None]:
USER_TO_TRIAL_NAMES = {}
for user, trials in USER_TO_TRIALS.items():
    USER_TO_TRIAL_NAMES[user] = [get_trial_name(user, trial) for trial in trials]

In [None]:
for user in ALL_USERS:
    print(user, '   ', USER_TO_TRIAL_NAMES[user])

In [None]:
ALL_TRIAL_NAMES = list(itertools.chain(*sorted(USER_TO_TRIAL_NAMES.values())))
print(ALL_TRIAL_NAMES)

In [None]:
def load_kinematics(trial_name):
    path = os.path.join(KINEMATICS_DIR, trial_name + ".txt")
    data = np.loadtxt(path, dtype=np.float, usecols=KINEMATICS_USECOLS)
    return data

In [None]:
def load_kinematics_and_labels(trial_name):
    
    kinematics_data = load_kinematics(trial_name)
    
    labels_path = os.path.join(LABELS_DIR, trial_name + '.txt')
    raw_labels_data = np.genfromtxt(labels_path, dtype=np.int32, converters=LABELS_CONVERTERS,
                                    usecols=LABELS_USECOLS)
    frames = np.arange(1, kinematics_data.shape[0]+1, dtype=np.int32)
    labels = np.zeros(frames.shape, dtype=np.int32)
    for start, end, label in raw_labels_data:
        mask = (frames >= start) & (frames <= end)
        labels[mask] = label
    labels_data = labels.reshape(-1, 1)
    
    data = np.concatenate([kinematics_data, labels_data], axis=1)
    labeled_data_only_mask = labels_data.flatten() != 0
    
    return data[labeled_data_only_mask, :]

In [None]:
def load_kinematics_and_new_labels(trial_name):
    data = load_kinematics_and_labels(trial_name)
    for orig, new in zip(ORIG_CLASS_IDS, NEW_CLASS_IDS):
        mask = data[:, -1] == orig
        data[mask, -1] = new
    return data

In [None]:
def downsample(data, factor=6):
    return data[::factor, :]

In [None]:
# Original data is at 30 Hz.

all_data = {trial_name: downsample(load_kinematics_and_new_labels(trial_name), factor=6)
            for trial_name in ALL_TRIAL_NAMES}

In [None]:
all_data_concat = np.concatenate(all_data.values(), axis=0)
unique_labels = np.unique(all_data_concat[:, 14])
assert unique_labels.tolist() == range(len(CLASSES))

In [None]:
print('Data shapes for all trials:')

for trial_name, data in sorted(all_data.items()):
    print('%s' % trial_name, data.shape)

In [None]:
example = all_data.values()[0]
fig, ax = plt.subplots(nrows=4, ncols=1, sharex=True, figsize=(10, 7))

for (i, title) in zip(range(3), ['x', 'y', 'z']):
    plt.sca(ax[i])
    plt.plot(example[:, i])
    plt.title(title)
    ax[i].set_yticklabels([])
    
    
plt.sca(ax[3])
x = range(example.shape[0])
y = np.ones(example.shape[0])
c = example[:, 14]
plt.scatter(x, y, c=c, lw=0, vmin=0, vmax=NUM_CLASSES)
plt.gca().set_yticklabels([])
plt.axis('tight')

In [None]:
fig, ax_list = plt.subplots(nrows=len(all_data), ncols=1, sharex=True, figsize=(50, 75))
for i, (trial_name, data) in enumerate(sorted(all_data.items())):
    x = range(data.shape[0])
    y = np.ones(data.shape[0])
    c = data[:, 14]
    plt.sca(ax_list[i])
    plt.scatter(x, y, c=c, lw=0, vmin=0, vmax=NUM_CLASSES)
    plt.title(trial_name)

In [None]:
def export_standardized(export_dict):
    if not os.path.exists(STANDARDIZED_DIR):
        os.makedirs(STANDARDIZED_DIR)
    cPickle.dump(export_dict, open(STANDARDIZED_PATH, 'w'))

In [None]:
export_dict = dict(dataset_name=DATASET_NAME, classes=CLASSES, num_classes=NUM_CLASSES,
                   col_names=STANDARDIZED_COL_NAMES,
                   all_users=ALL_USERS, user_to_trial_names=USER_TO_TRIAL_NAMES, 
                   all_trial_names=ALL_TRIAL_NAMES, all_data=all_data)
export_standardized(export_dict)

In [None]:
%reset -f

import os
import cPickle

DATA_DIR = os.path.join(os.path.expanduser('~'), 'Data', 'JIGSAWS', 'Suturing')
STANDARDIZED_PATH = os.path.join(DATA_DIR, 'standardized_data.pkl')

standardized_dict = cPickle.load(open(STANDARDIZED_PATH, 'r'))
print(standardized_dict.keys())

locals().update(standardized_dict)

In [None]:
print(classes)

In [None]:
print(col_names)

In [None]:
print(all_users)

In [None]:
print(all_trial_names)

In [None]:
print(user_to_trial_names['B'])

In [None]:
print(all_data['Suturing_B001'].dtype)

In [None]:
print(all_data['Suturing_B001'].shape)