## Setting

In [1]:
import os, json, glob, sys, time
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
import scipy.ndimage

sys.path.append('./utils/')
from rfdata import RFdata

%matplotlib inline

In [2]:
MOUNT_PATH = "/run/user/1000/gvfs/smb-share:server=azlab-fs01,share=東研究室/個人work/富井/"
dir_dataset = MOUNT_PATH + "PYUSCT_train/dataset008/"
MODEL_PATH = MOUNT_PATH + "PYUSCT_train/dataset008/ml_model/"

In [3]:
%ls $MODEL_PATH

[0m[01;32mclf_GPC_rbf_iso.pkl[0m*              [01;32mPCA_model.pickle[0m*
[01;32mclf_GPC_rbf_iso_t100_r128.pkl[0m*    [01;32mPCA_model_reduced_t100_r128.pickle[0m*
[01;32mclf_GPC_rbf_iso_t100_r64.pkl[0m*     [01;32mPCA_model_reduced_t100_r64.pickle[0m*
[01;32mclf_GPC_rbf_iso_T4_t100_r64.pkl[0m*  [01;32mPCA_model_reduced_T4_t100_r64.pickle[0m*
[01;32mclf_GPC_rbf_iso_T8_t100_r64.pkl[0m*  [01;32mPCA_model_reduced_T8_t100_r64.pickle[0m*
[01;32mclf_GPC_rbf_iso_time100.pkl[0m*      [01;32mPCA_model_reduced_time100.pickle[0m*


In [9]:
p = {
    "input": {
        "sim_path" : os.path.join(MOUNT_PATH, "nb_usctsim/sim_005/"), 
        "model_path" : os.path.join(MOUNT_PATH, "PYUSCT_train/dataset008/ml_model/PCA_model.pickle"),
        "size" : [512, 512],
        "offset" : [256, 256],
        "interval": [1,1,1] # Transducer, receiver, time
    },
    
    "output" : {
        "path" : os.path.join(MOUNT_PATH, "PYUSCT_train/dataset013/"),
    }
}

In [5]:
sim_result_dirs = glob.glob(os.path.join(p["input"]["sim_path"], "trial*"))
sim_result_dirs.sort()
for sim_result_dir in sim_result_dirs:
    tmp = sim_result_dir.split("/")
    print(tmp[-1])

trial_001
trial_002
trial_003
trial_004
trial_005
trial_006
trial_007
trial_008
trial_009
trial_010


In [11]:
def batch_from_sim_to_pca_1m1(fid, input_path, output_path, model_path, indices, interval):
    import numpy as np
    import string
    import os, json, glob, sys, time
    sys.path.append('utils/')
    from rfdata import RFdata

    sfid = str(fid).zfill(3)
    
    print("{}  Thread {}: start".format(time.ctime(), sfid))
    
    ## Initial RFdata
    rf = RFdata(input_path)
    print("raw data loaded.")

    ## Load model
    from sklearn.externals import joblib
    from sklearn.decomposition import PCA

    pca = joblib.load(model_path) 
    print("Thread {}: PCA model loaded.".format(sfid))


    
    ## Define transfer function
    def dimension_reduce_rf_point(pca, rf, ix, iy):
        offsets = np.arange(-100, 100, interval[2])
        _, subset = rf.getPointSubset((ix,iy), offsets)
        # have to be a parameter
        return pca.transform(subset[::interval[0],::interval[1],:].reshape(1, -1))

    
    
    res = np.empty((len(indices), 800))
    cnt = 0;
    print("Thread {}: processing".format(sfid))
    for (ix, iy) in indices:
        res[cnt] = dimension_reduce_rf_point(pca, rf, ix, iy)
        cnt += 1
        if (cnt % 1000 == 0):
            print("Thread {}: {} points completes {}".format(sfid, cnt, time.ctime()))
        if (cnt % 2000 == 0):
            return
    
    print("Thread {}: Saving file.".format(sfid))
    np.save(output_path + "part{}_size{}.npy".format(sfid, cnt), res)
    print("Thread {}: File saved.".format(sfid))
    
    print("{}  Thread {}: completed.".format(time.ctime(), sfid))

In [7]:
def preprocess_raw_1m1_trial_4thread(trial_id, input_path, model_path, output_path, size, offset, interval):
    #trial_id = sys.argv[1]
    print("{} Process of {} start.".format(time.ctime(), trial_id)) 

    # debug
    print(input_path)
    print(output_path)
    print(model_path)
    
    # create indices
    indices = np.indices((size[0],size[1]))
    indices[0] += offset[0]
    indices[1] += offset[1]
    indices = indices.transpose(1,2,0)
    indices = indices.reshape(-1, 2)      
    

    # multiply threads
    import threading
    batch_size = size[0] * size[1] // 4

    class myThread (threading.Thread):
        def __init__(self, threadID, input_path, output_path, model_path, indices, interval):
            threading.Thread.__init__(self)
            self.threadID = threadID
            self.input_path = input_path
            self.output_path = output_path
            self.model_path = model_path
            self.indices = indices
            self.interval = interval
        def run(self):
            batch_from_sim_to_pca_1m1(self.threadID,
                                      self.input_path,
                                      self.output_path,
                                      self.model_path,
                                      self.indices,
                                      self.interval,
                                     )

    thread1 = myThread(0, input_path, output_path, model_path, indices[:batch_size], interval)
    thread2 = myThread(1, input_path, output_path, model_path, indices[batch_size:batch_size*2], interval)
    thread3 = myThread(2, input_path, output_path, model_path, indices[batch_size*2:batch_size*3], interval)
    thread4 = myThread(3, input_path, output_path, model_path, indices[batch_size*3:batch_size*4], interval)

    thread1.start()
    thread2.start()
    thread3.start()
    thread4.start()
    thread1.join()
    thread2.join()
    thread3.join()
    thread4.join()

    print("Exiting Main Thread")
          
    print("{} Process of {} completed.".format(time.ctime(), trial_id))
    print("\n")



## Main

In [None]:
model_list = [".pickle", "_reduced_time100.pickle", "_reduced_t100_r128.pickle", "_reduced_t100_r64.pickle", "reduced_T8_t100_r64.pickle", "reduced_T4_t100_r64.pickle"]

In [12]:
for sim_result_dir in sim_result_dirs:
    # mkdir for output
    trial_id = sim_result_dir.split("/")[-1]
    out_dir = os.path.join(p["output"]["path"], trial_id)
    if not os.path.exists(out_dir): 
        os.makedirs(os.path.join(out_dir, "input"))   # 入力データ
        os.makedirs(os.path.join(out_dir, "output"))  # 出力データ      
        os.makedirs(os.path.join(out_dir, "sa"))      # 参照用SA
    # save dataX
    ## TODO
    preprocess_raw_1m1_trial_4thread(trial_id,
                                     sim_result_dir, 
                                     p["input"]["model_path"],
                                     os.path.join(out_dir, "input"),
                                     p["input"]["size"],
                                     p["input"]["offset"],
                                     p["input"]["interval"],
                                    )
    break

Tue May  1 12:43:36 2018 Process of trial_001 start.
/run/user/1000/gvfs/smb-share:server=azlab-fs01,share=東研究室/個人work/富井/nb_usctsim/sim_005/trial_001
/run/user/1000/gvfs/smb-share:server=azlab-fs01,share=東研究室/個人work/富井/PYUSCT_train/dataset013/trial_001/input
/run/user/1000/gvfs/smb-share:server=azlab-fs01,share=東研究室/個人work/富井/PYUSCT_train/dataset008/ml_model/PCA_model.pickle
Tue May  1 12:43:36 2018  Thread 000: startTue May  1 12:43:36 2018  Thread 001: startTue May  1 12:43:36 2018  Thread 002: start

Tue May  1 12:43:36 2018  Thread 003: start

raw data loaded.
raw data loaded.
raw data loaded.
raw data loaded.




Thread 003: PCA model loaded.
Thread 003: processing
Thread 001: PCA model loaded.
Thread 001: processing
Thread 000: PCA model loaded.
Thread 000: processing
Thread 002: PCA model loaded.
Thread 002: processing
Thread 002: 1000 points completes Tue May  1 13:08:42 2018
Thread 001: 1000 points completes Tue May  1 13:08:49 2018
Thread 003: 1000 points completes Tue May  1 13:08:51 2018
Thread 000: 1000 points completes Tue May  1 13:09:27 2018
Thread 002: 2000 points completes Tue May  1 13:25:09 2018
Thread 003: 2000 points completes Tue May  1 13:25:22 2018
Thread 001: 2000 points completes Tue May  1 13:25:25 2018
Thread 000: 2000 points completes Tue May  1 13:25:40 2018Exiting Main Thread

Tue May  1 13:25:40 2018 Process of trial_001 completed.


