Data Pre-processing for DHG Dataset

In [None]:
# ---------------------------------------------------------
# Download DHG dataset
# ---------------------------------------------------------

if download_dhg:
  !mkdir dataset_dhg1428
  !wget http://www-rech.telecom-lille.fr/DHGdataset/DHG2016.zip
  !unzip DHG2016.zip -d dataset_dhg1428


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_19.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_2.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_20.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_21.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_22.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_23.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_24.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_25.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_26.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_27.png  
  inflating: dataset_dhg1428/gesture_9/finger_2/subject_18/essai_3/depth_28.png  
  inflating: dataset_dhg1428/gestu

In [None]:
# ---------------------------------------------------------
# Utils
# ---------------------------------------------------------
import glob
import numpy
import pickle
from scipy import ndimage as ndimage
from sklearn.model_selection import train_test_split


def resize_gestures(input_gestures, final_length=100):
    """
    Resize the time series by interpolating them to the same length

    Input:
        - input_gestures: list of numpy.ndarray tensors.
          Each tensor has a shape: (duration, channels)
          channels = 66 = 3 * 22 if recorded in 3D 
    Output:
        - output_gestures: one numpy.ndarray tensor.
          The output tensor has a shape: (len(input_gestures), final_length, channels)
    """
    output_gestures = numpy.array([numpy.array([ndimage.zoom(x_i.T[j], final_length / len(x_i), mode='reflect') for j in range(numpy.size(x_i, 1))]).T for x_i in input_gestures])
    return output_gestures


def load_gestures(dataset='dhg', root='./dataset_dhg1428', version_x='3D', version_y='both', resize_gesture_to_length=100):
    
    root = '/content/dataset_dhg1428'
    if dataset == 'dhg':
      assert 'dataset_dhg' in root
    if dataset == 'shrec':
      assert 'dataset_shrec' in root
    
    if version_x == '3D':
        if dataset == 'dhg':
            pattern = root + '/gesture_*/finger_*/subject_*/essai_*/skeleton_world.txt'
        elif dataset == 'shrec':
            pattern = root + '/gesture_*/finger_*/subject_*/essai_*/skeletons_world.txt'
    else:
        if dataset == 'dhg':
            pattern = root + '/gesture_*/finger_*/subject_*/essai_*/skeleton_image.txt'
        elif dataset == 'shrec':
            pattern = root + '/gesture_*/finger_*/subject_*/essai_*/skeletons_image.txt'

    gestures_filenames = sorted(glob.glob(pattern))
    gestures = [numpy.genfromtxt(f) for f in gestures_filenames]
    if resize_gesture_to_length is not None:
        gestures = resize_gestures(gestures, final_length=resize_gesture_to_length)

    labels_14 = [int(filename.split('/')[-5].split('_')[1]) for filename in gestures_filenames]
    labels_28 = [int(filename.split('/')[-4].split('_')[1]) for filename in gestures_filenames]
    labels_28 = [labels_14[idx_gesture] if n_fingers_used == 1 else 14 + labels_14[idx_gesture] for idx_gesture, n_fingers_used in enumerate(labels_28)]

    if version_y == '14' or version_y == 14:
        return gestures, labels_14
    elif version_y == '28' or version_y == 28:
        return gestures, labels_28
    elif version_y == 'both':
        return gestures, labels_14, labels_28


def write_data(data, filepath):
    with open(filepath, 'wb') as output_file:
        pickle.dump(data, output_file)


def load_data(filepath='./dhg_data.pckl'):
    file = open(filepath, 'rb')
    data = pickle.load(file, encoding='latin1') 
    file.close()
    return data['x_train'], data['x_test'], data['y_train_14'], data['y_train_28'], data['y_test_14'], data['y_test_28']

In [None]:
# ---------------------------------------------------------
# Save the data
# ---------------------------------------------------------
gestures, labels_14, labels_28 = load_gestures(dataset='dhg',
                                               root='/tmp/dataset_dhg1428/',
                                               version_x='3D',
                                               version_y='both',
                                               resize_gesture_to_length=100)
# Test Train Split for Dataset 70/30 
x_train, x_test, y_train_14, y_test_14, y_train_28, y_test_28 = train_test_split(gestures, labels_14, labels_28, test_size=0.30)

# Save the dataset
data = {
    'x_train': x_train,
    'x_test': x_test,
    'y_train_14': y_train_14,
    'y_train_28': y_train_28,
    'y_test_14': y_test_14,
    'y_test_28': y_test_28
}
write_data(data, filepath='dhg_data.pckl')

In [None]:
# ---------------------------------------------------------
# Save in Google Drive
# ---------------------------------------------------------
try:
  # Connect Google Colab instance to Google Drive
  from google.colab import drive
  drive.mount('/gdrive')

  # Save the dataset on Google Drive
  !cp dhg_data.pckl /gdrive/My\ Drive/dhg_data.pckl

except:
  print("You're not in a Google Colab!")

Mounted at /gdrive


In [None]:
x_train, x_test, y_train_14, y_train_28, y_test_14, y_test_28 = load_data('dhg_data.pckl')