# データセットの作成
+ chainer学習用のデータセットを作成.
+ shapeはできるだけ変更しないで, {中間出力, 関節角度(量子化済み＆量子化前),}

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import time
import threading
import h5py


from logging import getLogger, basicConfig, DEBUG, INFO
logger = getLogger(__name__)
LOG_FMT = "{asctime} | {levelname:<5s} | {name} | {message}"
basicConfig(level=INFO, format=LOG_FMT, style="{")

In [2]:
def resampling(X,th_min=-1., th_max=1.):
    """ 量子化の実行
    
    Args.
    -----
    - x: float
    - th_min/th_max: float, threshhold [unit=degree]
    """
    _X = X.copy()
    _X[X < th_min] = -1.
    _X[X > th_max] =  1.
    _X[(X >= th_min) & (X<= th_max)] = 0
    return _X

In [3]:
th_min= -0.5
th_max=  0.5

def threading_clbk(ps):
    (path_in, path_out,) = ps
    
    
    logger.info("Start: Load from {}".format(path_in))
    # Load File
    with h5py.File(path_in, 'r') as f:
        A  = np.array(f["action"],)
        FC = np.array(f["fc"])
    logger.info("Start: A={}, FC={} [from {}]".format(A.shape, FC.shape, path_in))
        
    # 量子化 & Onehot Encoding
    As = resampling(A,th_min=th_min, th_max=th_max)

    shape = list(As.shape) + [3]
    As_onehot = np.eye(3)[As.ravel().astype(int)+1]
    As_onehot  = As_onehot.reshape(shape)
    
    # Write
    with h5py.File(path_out, 'w') as f:
        f.create_dataset("fc", data=FC)
        f.create_group('action')
        f["action"].create_dataset("raw", data=A)
        #f["action"].create_dataset("resampled", data=As)
        f["action"].create_dataset("onehot", data=As_onehot)
    logger.info("Finish: Write to {}".format(path_out))
    return True

In [4]:
""" Apply for single episode
"""
import os


path_in  = os.path.join("/root/dataStore/grasp_v1/episodes", "epoch0.h5")
path_out = os.path.join("/root/dataStore/grasp_v1/Inputs", "epoch0.h5")

threading_clbk( (path_in, path_out,))

2019-01-09 09:50:04,795 | INFO  | __main__ | Start: Load from /root/dataStore/grasp_v1/episodes/epoch0.h5
2019-01-09 09:50:04,799 | INFO  | __main__ | Start: A=(10, 2, 100, 21), FC=(10, 2, 100, 256) [from /root/dataStore/grasp_v1/episodes/epoch0.h5]


OSError: Unable to create file (unable to open file: name = '/root/dataStore/grasp_v1/Inputs/epoch0.h5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)

In [35]:
""" Execute in paralel
"""
import glob

dir_in  = "/root/dataStore/tmp2/episodes"
dir_out = "/root/dataStore/tmp2/Inputs"

file_list = list(glob.glob(os.path.join(dir_in, "*.h5")))
file_list.sort()
print(file_list)

file_list = [(path_in, os.path.join(dir_out, path_in.split("/")[-1])) for path_in in file_list]
display(file_list)

['/root/dataStore/tmp2/episodes/epoch0.h5', '/root/dataStore/tmp2/episodes/epoch1.h5', '/root/dataStore/tmp2/episodes/epoch2.h5']


[('/root/dataStore/tmp2/episodes/epoch0.h5',
  '/root/dataStore/tmp2/Inputs/epoch0.h5'),
 ('/root/dataStore/tmp2/episodes/epoch1.h5',
  '/root/dataStore/tmp2/Inputs/epoch1.h5'),
 ('/root/dataStore/tmp2/episodes/epoch2.h5',
  '/root/dataStore/tmp2/Inputs/epoch2.h5')]

In [37]:
# Load files using Threading
from concurrent.futures import ThreadPoolExecutor

thread_list   = []
max_worker = 5
logger.info("Start Load OPP Dataset [{}files]".format(len(file_list)))    
with ThreadPoolExecutor(max_workers=max_worker) as executor:
    ret = executor.map(threading_clbk, file_list)
logger.info("Thread ... Finish!! [Results={}]".format(len(list(ret))))
logger.info("Finish!!")

2018-12-27 07:55:41,312 | INFO  | __main__ | Start Load OPP Dataset [3files]
2018-12-27 07:55:41,313 | INFO  | __main__ | Start: Load from /root/dataStore/tmp2/episodes/epoch0.h5
2018-12-27 07:55:41,313 | INFO  | __main__ | Start: Load from /root/dataStore/tmp2/episodes/epoch1.h5
2018-12-27 07:55:41,313 | INFO  | __main__ | Start: Load from /root/dataStore/tmp2/episodes/epoch2.h5
2018-12-27 07:55:41,317 | INFO  | __main__ | Start: A=(10, 2, 100, 20), FC=(10, 2, 100, 256) [from /root/dataStore/tmp2/episodes/epoch0.h5]
2018-12-27 07:55:41,320 | INFO  | __main__ | Start: A=(10, 2, 100, 20), FC=(10, 2, 100, 256) [from /root/dataStore/tmp2/episodes/epoch1.h5]
2018-12-27 07:55:41,321 | INFO  | __main__ | Start: A=(10, 2, 100, 20), FC=(10, 2, 100, 256) [from /root/dataStore/tmp2/episodes/epoch2.h5]
2018-12-27 07:55:41,326 | INFO  | __main__ | Finish: Write to /root/dataStore/tmp2/Inputs/epoch0.h5
2018-12-27 07:55:41,330 | INFO  | __main__ | Finish: Write to /root/dataStore/tmp2/Inputs/epoch1.

## 作成したデータの確認

In [42]:
dir_in  = "/root/dataStore/tmp2/episodes"
dir_out = "/root/dataStore/tmp2/Inputs"

import os
import pandas as pd
import numpy as np
import h5py

filename = os.path.join("/root/dataStore", "test", "Inputs", "test", "epoch8.h5")
with h5py.File(filename, 'r') as f:
    A = np.array(f["action/resampled"],)
print(A.shape)

(10, 2, 100, 20)


In [44]:
A[:2]

array([[[[ 1., -1.,  0., ...,  1.,  1., -1.],
         [ 0., -1.,  0., ...,  0.,  1., -1.],
         [ 0., -1., -1., ..., -1.,  1., -1.],
         ...,
         [-1., -1.,  0., ..., -1.,  1., -1.],
         [-1., -1.,  0., ..., -1.,  1., -1.],
         [-1., -1.,  0., ..., -1.,  1., -1.]],

        [[ 1.,  1.,  1., ...,  1.,  1., -1.],
         [ 1.,  1.,  1., ...,  1.,  1., -1.],
         [ 1.,  1.,  1., ...,  1.,  1., -1.],
         ...,
         [ 1.,  1.,  1., ...,  1.,  1.,  1.],
         [ 1.,  1.,  1., ...,  1.,  1.,  1.],
         [ 1.,  1.,  1., ...,  1.,  1.,  1.]]],


       [[[-1.,  1.,  1., ...,  1.,  1., -1.],
         [-1.,  1.,  1., ...,  0.,  0., -1.],
         [-1.,  1.,  1., ..., -1.,  1., -1.],
         ...,
         [-1.,  1.,  0., ...,  0.,  1., -1.],
         [-1.,  1.,  0., ...,  0.,  1., -1.],
         [-1.,  1.,  0., ...,  0.,  1., -1.]],

        [[ 1.,  1.,  1., ..., -1.,  1., -1.],
         [ 1., -1.,  1., ..., -1.,  1., -1.],
         [ 1., -1.,  0., ..., 