
Data Pre-processing for SHREC Dataset

In [None]:
# ---------------------------------------------------------
# Download SHREC DATASET
# ---------------------------------------------------------

if download_shrec_17:
  !mkdir dataset_shrec2017
  !wget http://www-rech.telecom-lille.fr/shrec2017-hand/HandGestureDataset_SHREC2017.tar.gz -O SHREC2017.tar.gz
  !tar -xzf SHREC2017.tar.gz -C dataset_shrec2017


--2021-03-25 12:26:30--  http://www-rech.telecom-lille.fr/shrec2017-hand/HandGestureDataset_SHREC2017.tar.gz
Resolving www-rech.telecom-lille.fr (www-rech.telecom-lille.fr)... 193.48.251.100
Connecting to www-rech.telecom-lille.fr (www-rech.telecom-lille.fr)|193.48.251.100|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6305395695 (5.9G) [application/x-gzip]
Saving to: ‘SHREC2017.tar.gz’


2021-03-25 12:35:37 (11.0 MB/s) - ‘SHREC2017.tar.gz’ saved [6305395695/6305395695]



In [None]:
# ---------------------------------------------------------
# Utils
# ---------------------------------------------------------
import glob
import numpy
import pickle
from scipy import ndimage as ndimage
from sklearn.model_selection import train_test_split


def resize_gestures(input_gestures, final_length=100):
    """
    Resize the time series by interpolating them to the same length

    Input:
        - input_gestures: list of numpy.ndarray tensors.
          Each tensor has a shape: (duration, channels)
          channels = 66 = 3 * 22 if recorded in 3D 
    Output:
        - output_gestures: one numpy.ndarray tensor.
          The output tensor has a shape: (len(input_gestures), final_length, channels)
    """
   
    output_gestures = numpy.array([numpy.array([ndimage.zoom(x_i.T[j], final_length / len(x_i), mode='reflect') for j in range(numpy.size(x_i, 1))]).T for x_i in input_gestures])
    return output_gestures


def load_gestures(dataset='shrec', root='./dataset_shrec2017', version_x='3D', version_y='both', resize_gesture_to_length=100):
  
    root = '/content/dataset_shrec2017/HandGestureDataset_SHREC2017'
    if dataset == 'dhg':
      assert 'dataset_dhg' in root
    if dataset == 'shrec':
      assert 'dataset_shrec' in root
    
    if version_x == '3D':
        if dataset == 'dhg':
            pattern = root + '/gesture_*/finger_*/subject_*/essai_*/skeleton_world.txt'
        elif dataset == 'shrec':
            pattern = root + '/gesture_*/finger_*/subject_*/essai_*/skeletons_world.txt'
    else:
        if dataset == 'dhg':
            pattern = root + '/gesture_*/finger_*/subject_*/essai_*/skeleton_image.txt'
        elif dataset == 'shrec':
            pattern = root + '/gesture_*/finger_*/subject_*/essai_*/skeletons_image.txt'

    gestures_filenames = sorted(glob.glob(pattern))
    gestures = [numpy.genfromtxt(f) for f in gestures_filenames]
    if resize_gesture_to_length is not None:
        gestures = resize_gestures(gestures, final_length=resize_gesture_to_length)

    labels_14 = [int(filename.split('/')[-5].split('_')[1]) for filename in gestures_filenames]
    labels_28 = [int(filename.split('/')[-4].split('_')[1]) for filename in gestures_filenames]
    labels_28 = [labels_14[idx_gesture] if n_fingers_used == 1 else 14 + labels_14[idx_gesture] for idx_gesture, n_fingers_used in enumerate(labels_28)]

    if version_y == '14' or version_y == 14:
        return gestures, labels_14
    elif version_y == '28' or version_y == 28:
        return gestures, labels_28
    elif version_y == 'both':
        return gestures, labels_14, labels_28


def write_data(data, filepath):
    with open(filepath, 'wb') as output_file:
        pickle.dump(data, output_file)


def load_data(filepath='./shrec_data.pckl'):
    
    file = open(filepath, 'rb')
    data = pickle.load(file, encoding='latin1')  
    file.close()
    return data['x_train'], data['x_test'], data['y_train_14'], data['y_train_28'], data['y_test_14'], data['y_test_28']

In [None]:
# ---------------------------------------------------------
# Save the Data
# ---------------------------------------------------------
gestures, labels_14, labels_28 = load_gestures(dataset='shrec',
                                               root='/tmp/dataset_shrec2017/',
                                               version_x='3D',
                                               version_y='both',
                                               resize_gesture_to_length=100)

# Test Train Split for Dataset 70/30 
x_train, x_test, y_train_14,  y_test_14, y_train_28, y_test_28 = train_test_split(gestures, labels_14, labels_28, test_size=0.30)

# Save the dataset
data = {
    'x_train': x_train,
    'x_test': x_test,
    'y_train_14': y_train_14,
    'y_train_28': y_train_28,
    'y_test_14': y_test_14,
    'y_test_28': y_test_28
}
write_data(data, filepath='shrec_data.pckl')

In [None]:
# ---------------------------------------------------------
# Save in Google Drive
# ---------------------------------------------------------
try:

  # Connect Google Colab instance to Google Drive
  from google.colab import drive
  drive.mount('/gdrive')

  # Save your dataset on Google Drive
  !cp shrec_data.pckl /gdrive/My\ Drive/shrec_data.pckl

except:
  print("You're not in a Google Colab!")

Mounted at /gdrive
cp: cannot stat 'shrec_data.pckl': No such file or directory


In [None]:
x_train, x_test, y_train_14, y_train_28, y_test_14, y_test_28 = load_data('shrec_data.pckl')

In [None]:
#-----------------------------------
# Printing parameters to cross check
#------------------------------------
print(x_train.shape)
print(x_test.shape)
print(len(y_train_14))
print(len(y_test_14))


(1960, 100, 66)
(840, 100, 66)
1960
840
[24, 20, 3, 15, 4, 22, 1, 15, 11, 5, 15, 10, 28, 21, 27, 17, 20, 4, 27, 21, 25, 25, 21, 28, 10, 10, 10, 13, 18, 26, 18, 20, 4, 7, 24, 8, 12, 26, 12, 28, 21, 20, 3, 8, 4, 27, 18, 24, 16, 24, 13, 1, 2, 18, 13, 26, 9, 23, 12, 5, 16, 1, 13, 5, 9, 24, 8, 20, 9, 10, 17, 18, 25, 27, 4, 6, 6, 21, 13, 15, 5, 9, 27, 5, 23, 2, 11, 16, 21, 28, 23, 4, 18, 18, 23, 26, 19, 28, 1, 6, 20, 17, 26, 22, 8, 6, 12, 28, 11, 22, 16, 21, 3, 4, 28, 6, 16, 20, 28, 14, 9, 1, 27, 23, 28, 14, 13, 10, 23, 23, 10, 10, 21, 21, 23, 1, 6, 10, 10, 9, 21, 23, 15, 8, 4, 4, 15, 6, 27, 14, 26, 28, 14, 4, 26, 3, 8, 18, 21, 23, 9, 18, 17, 15, 17, 15, 4, 3, 25, 10, 27, 18, 4, 16, 15, 26, 4, 13, 21, 24, 3, 17, 11, 19, 1, 28, 10, 13, 20, 1, 3, 20, 26, 3, 14, 3, 2, 5, 23, 9, 9, 27, 10, 2, 15, 17, 26, 10, 17, 18, 3, 20, 3, 20, 8, 7, 13, 1, 23, 19, 11, 2, 3, 26, 23, 14, 8, 24, 20, 5, 4, 4, 25, 26, 11, 5, 17, 4, 12, 14, 2, 12, 11, 26, 12, 9, 16, 14, 10, 9, 23, 16, 8, 21, 7, 21, 23, 22, 25, 6, 1