### Get Pelvis data - LPF + Norm (save Figure + csv)
### Get oneHot encoded Label 
### Interpolation (Univariate Interpolation)

In [1]:
import os
import numpy as np 
import matplotlib.pyplot as plt
from IPython.display import clear_output

from scipy.signal import butter, lfilter, filtfilt  
from scipy.interpolate import UnivariateSpline  # missing value -> np.nan -> to zero -> 
from scipy import interpolate

from sklearn.model_selection import KFold  # 라벨 x_data, y_data 분포를 유지 (함께 fold)
import tensorflow as tf 

np.random.seed(7)
np.set_printoptions(precision=4, suppress=True)  # 과학적 표기 대신 소숫점 자리 4자리까지 표현

rootDir = "/DockerProjects/Dataset/TUG/trainSet/HMMpaper"
expertFolder = "/" + "LSTM2HMM_saveResults_illness_KYH"  # --- 변경 할 부분
viewFolder = "/" + "0_sideView"

print(" [Home Dir.] Current path", os.getcwd())  

datasetDir = rootDir + expertFolder + viewFolder
os.chdir(datasetDir) ; print(" Current workaing path [dataset] ", os.getcwd())  # -- Dataset 있는 곳으로 경로 변경 

 [Home Dir.] Current path /DockerProjects/walkCAM/tug/temporalCNN_wLabel
 Current workaing path [dataset]  /DockerProjects/Dataset/TUG/trainSet/HMMpaper/LSTM2HMM_saveResults_illness_KYH/0_sideView


In [2]:
def get_rawData(csvFile):
    interCheck = False
    continuous_errorVal_Check = 30

    csv_data_ = np.loadtxt(csvFile, delimiter=',', dtype=str, skiprows=2, usecols=(0,1,2,3)) 
    frameNum = csv_data_[:,0].astype(np.int)  # int로 type conversion
    max_frame = len(frameNum)
    
    rawPelvis_x = np.array(frameNum).astype(np.float32)
    rawPelvis_y = np.array(frameNum).astype(np.float32)
    rawPelvis_z = np.array(frameNum).astype(np.float32)
    

    #--- if have "empty value" ---- Interpolation  
    for i, v in enumerate(csv_data_[:, 1]):
        if v == " ": 
            interCheck = True
            csv_data_[i,1] = np.nan
            csv_data_[i,2] = np.nan
            csv_data_[i,3] = np.nan
        rawPelvis_x[i] = csv_data_[i, 1].astype(np.float32)
        rawPelvis_y[i] = csv_data_[i, 2].astype(np.float32)
        rawPelvis_z[i] = csv_data_[i, 3].astype(np.float32)
  
    if interCheck: 
        w_x = np.isnan(rawPelvis_x) 
        rawPelvis_x[w_x] = 0
        Fx = UnivariateSpline(frameNum,rawPelvis_x,w=~w_x)
        newPelvis_x = Fx(frameNum)

        w_y = np.isnan(rawPelvis_y) 
        rawPelvis_y[w_y] = 0
        Fy = UnivariateSpline(frameNum,rawPelvis_y,w=~w_y)
        newPelvis_y = Fy(frameNum)

        w_z = np.isnan(rawPelvis_z) 
        rawPelvis_z[w_z] = 0
        Fz = UnivariateSpline(frameNum,rawPelvis_z,w=~w_z)
        newPelvis_z = Fz(frameNum)

    else:
        newPelvis_x = rawPelvis_x
        newPelvis_y = rawPelvis_y
        newPelvis_z = rawPelvis_z
   

    if np.max(newPelvis_z) >= 5000:
        # ----------- if have garbage value (not "empty..") ... 
        for i in range(continuous_errorVal_Check): # 30 
            newPelvis_x[np.where(np.abs(newPelvis_x[1+i:]-newPelvis_x[:-(1+i)]) > 100)[0]] = newPelvis_x[np.where(np.abs(newPelvis_x[1+i:]-newPelvis_x[:-(1+i)]) > 100)[0]-1]
            newPelvis_y[np.where(np.abs(newPelvis_y[1+i:]-newPelvis_y[:-(1+i)]) > 100)[0]] = newPelvis_y[np.where(np.abs(newPelvis_y[1+i:]-newPelvis_y[:-(1+i)]) > 100)[0]-1]
            newPelvis_z[np.where(np.abs(newPelvis_z[1+i:]-newPelvis_z[:-(1+i)]) > 100)[0]] = newPelvis_z[np.where(np.abs(newPelvis_z[1+i:]-newPelvis_z[:-(1+i)]) > 100)[0]-1]

    
    timeData = np.loadtxt(csvFile, delimiter=',', skiprows=2, dtype=np.float32, usecols=[97, 98, 99, 100]) #  # skiprows=2 (header + first data.. 가끔 첫 줄 data 이상함)
    timestamp_diff_msec = np.array([rawPelvis_x])
    for i in range(max_frame):
        before_time_msec = timeData[i-1, 0] * 60 * 60 * 1000 + timeData[i-1, 1] * 60 * 1000 +  timeData[i-1, 2] * 1000 + timeData[i-1, 3] 
        current_time_msec = timeData[i, 0] * 60 * 60 * 1000 + timeData[i, 1] * 60 * 1000 + timeData[i, 2] * 1000 + timeData[i, 3]
        if i is 0:
            timestamp_diff_msec[0, i] = 0
        else:
            timestamp_diff_msec[0, i] = int(current_time_msec - before_time_msec)
                            
        time_sum_msec = 0
        eachTimestamp_sec = np.array(frameNum, dtype=float) # sec (?,1)
        for timeIdx in range(timestamp_diff_msec.shape[1]):
            time_sum_msec = int(time_sum_msec + timestamp_diff_msec[0, timeIdx])
            eachTimestamp_sec[timeIdx] = time_sum_msec / 1000  
  
    return frameNum, eachTimestamp_sec, newPelvis_x, newPelvis_y, newPelvis_z


    
def get_normData(pelvis_x, pelvis_y, pelvis_z, normMethod = 'min_max'):    
    if normMethod == "min_max":
        normPelvis_x = (pelvis_x -np.min(pelvis_x)) / ( np.max(pelvis_x) - np.min(pelvis_x))
        normPelvis_y = (pelvis_y -np.min(pelvis_y)) / ( np.max(pelvis_y) - np.min(pelvis_y))
        normPelvis_z = (pelvis_z -np.min(pelvis_z)) / ( np.max(pelvis_z) - np.min(pelvis_z))
        
    elif normMethod == "z_score":
        normPelvis_x = (pelvis_x -np.mean(pelvis_x)) / np.std(pelvis_x)
        normPelvis_y = (pelvis_y -np.mean(pelvis_y)) / np.std(pelvis_y)
        normPelvis_z = (pelvis_z -np.mean(pelvis_z)) / np.std(pelvis_z)
        
    return normPelvis_x, normPelvis_y, normPelvis_z

def get_LPFData(pelvis_x, pelvis_y, pelvis_z, cutoff_freq = 0.6, order=2):
    sampling_freq = 30
    number_of_samples = len(pelvis_y)
    normalized_cutoff_freq = 2 * cutoff_freq / sampling_freq
    numerator_coeffs, denominator_coeffs = butter(order, normalized_cutoff_freq)
    Filtered_x = filtfilt(numerator_coeffs, denominator_coeffs, pelvis_x)  # Phase shift 없음
    Filtered_y = filtfilt(numerator_coeffs, denominator_coeffs, pelvis_y)
    Filtered_z = filtfilt(numerator_coeffs, denominator_coeffs, pelvis_z)
    return Filtered_x, Filtered_y, Filtered_z

  
def pltSkeleton(timeSec, pelvis_x, pelvis_y, pelvis_z, saveDir, saveFile,  actLabel, addFolder="raw", actSplit=False, save=True):
    fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(8, 7))
    title = addFolder + "_" + saveFile
    ax[0].set_title(title)
    ax[0].set_ylabel('Side axis(X) of Pelvis [mm]')
    ax[1].set_ylabel('Vertical axis(Y) of Pelvis [mm]')
    ax[2].set_ylabel('Depth Axis of Pelvis [mm]')
    ax[2].set_xlabel('Time [Sec]')
    ax[0].plot(timeSec, pelvis_x, "r--", lw=1, label="pelvis_x")
    ax[1].plot(timeSec, pelvis_y, "g--", lw=1, label="pelvis_y")
    ax[2].plot(timeSec, pelvis_z, "b--", lw=1, label="pelvis_z")
    
    if actSplit:
        for i in range(3):
            if i ==0:
                ax[i].axvline(x=timeSec[actLabel[0]], color='r', linestyle="--", linewidth=3, label="start move")
                ax[i].axvline(x=timeSec[actLabel[1]], color='r', linestyle=":", linewidth=3, label="start walk")
                
                ax[i].axvline(x=timeSec[actLabel[2]], color='k', linestyle="--", linewidth=3, label="start turn")
                ax[i].axvline(x=timeSec[actLabel[3]], color='k', linestyle=":", linewidth=3, label="end turn")
                
                ax[i].axvline(x=timeSec[actLabel[4]], color='g', linestyle="--", linewidth=3, label="start sit")
                ax[i].axvline(x=timeSec[actLabel[5]], color='g', linestyle=":", linewidth=3, label="end sit")
            else:
                ax[i].axvline(x=timeSec[actLabel[0]], color='r', linestyle="--", linewidth=3)
                ax[i].axvline(x=timeSec[actLabel[1]], color='k', linestyle="--", linewidth=3)
                ax[i].axvline(x=timeSec[actLabel[2]], color='g', linestyle="--", linewidth=3)

                ax[i].axvline(x=timeSec[actLabel[3]], color='r', linestyle=":", linewidth=3)
                ax[i].axvline(x=timeSec[actLabel[4]], color='k', linestyle=":", linewidth=3)
                ax[i].axvline(x=timeSec[actLabel[5]], color='g', linestyle=":", linewidth=3)

    fig.legend()
    plt.show()
    
    if save:
        if  os.path.isfile(saveDir+"/"+title):
            os.remove(saveDir+"/"+title)
        fig.savefig(saveDir+"/"+title)
    plt.close()
           

        

In [3]:
def get_actFrameNumber(csvFile): 
    actFrame = []
    with open(csvFile, 'r', encoding='utf-8') as readCSV:
        data = readCSV.read()
        lines = data.split("\n") # lines[0] = header, [1] = act label data
        tmpLabel = lines[1].split(",")
       
        for i in range(len(tmpLabel)):           
            actFrame.append( int(tmpLabel[i].split("_")[1].split(".")[0]) ) 
        return actFrame   
    
def sliding_window(timestamp, data_x, data_y, lookback_window=3):
    time = []
    x = []
    y = []
    enc = MinMaxScaler(feature_range = (0, 1))
    enc_y = enc.fit_transform(data_y)
    for i in range(lookback_window, len(data_x)+1):
        time.append(timestamp[i-1])
        x.append(data_x[i - lookback_window:i])
        y.append(data_y[i-1])
  
    x = np.array(x)
    x = x.reshape(-1, lookback_window, numFeature, 1)
    
    y = np.array(y)
    time = np.array(time)
   
    return time, x, y, enc


#------------------ Origin... sit, sit-to-stand, walk, turn, stand-to-sit (5 action) ... Acc = 88 % ~ 90%   "Act5_lpf_"
def convert_act2oneHot(actFrame, frameNum):
    actionList = []
    for timeIdx in range(len(frameNum)):
        if (timeIdx < actFrame[0]):  # frame Num < move start fram (sit)
            actionList.append(np.array([1, 0, 0, 0, 0],dtype=np.int))
                    # moveStartIdx ~ startWalk 까지 (sit-to-stand)
        elif (timeIdx >= actFrame[0]) and (timeIdx < actFrame[1]):
            actionList.append(np.array([0, 1, 0, 0, 0],dtype=np.int))
                    # startWalk ~ turnStartIdx 까지 (walk)
        elif (timeIdx >= actFrame[1]) and (timeIdx < actFrame[2]):
            actionList.append(np.array([0, 0, 1, 0, 0],dtype=np.int))
                    # turnStartIdx ~ turnEndIdx 까지 (turn)
        elif (timeIdx >= actFrame[2]) and (timeIdx < actFrame[3]):
            actionList.append(np.array([0, 0, 0, 1, 0],dtype=np.int))
                    # turnEndIdx ~ startSitIdx 까지 (walk)
        elif (timeIdx >= actFrame[3]) and (timeIdx < actFrame[4]):
            actionList.append(np.array([0, 0, 1, 0, 0],dtype=np.int))
                    # startSitIdx ~ moveEndIdx 까지 (stand-to-sit)
        elif (timeIdx >= actFrame[4]) and (timeIdx < actFrame[5]):
            actionList.append(np.array([0, 0, 0, 0, 1],dtype=np.int))
            
        elif (timeIdx >= actFrame[5]):
            actionList.append(np.array([1, 0, 0, 0, 0],dtype=np.int))
        
            
        else:
            AssertionError("Check getLabel_forEveryTrial func. in utils_labeling_all")
        oneTrial_actionList = np.array(actionList)
    return np.array(oneTrial_actionList)



In [6]:
numFeatures = 4
numActions = 5
dataset_columns = numFeatures + numActions


def preprocess_forSaving(actSplit=False, save = True):
    total_Trials = 0
    for dirpath, foldername, files in sorted(os.walk(datasetDir)):
        clear_output(wait=True)
        actPath = None
        pelvisPath= None
        actFrame = []
        

        if ".ipynb_checkpoints" in dirpath:
            pass      
        
        elif "2020_11_03 TUG" in dirpath:
            for filename in sorted(files):
                if ".csv" in filename:
                    if "label_" in filename:
                        labelPath = dirpath
                        label_csv = os.path.join(labelPath,filename)
                        actFrame = get_actFrameNumber( label_csv)

                    if "skeleton_" in filename:          
                        pelvisPath = dirpath    
                        subname = dirpath.split("/")[-2] 
                        trial = dirpath.split("/")[-1] 
                        pelvis_csv = os.path.join(dirpath, filename)


                        frameNum, timstamp_ms, pelvis_x, pelvis_y, pelvis_z  = get_rawData(pelvis_csv)  # raw                 
                        normPelvis_x, normPelvis_y, normPelvis_z = get_normData(pelvis_x, pelvis_y, pelvis_z, normMethod = 'min_max') # norm         
                        lpfPelvis_x, lpfPelvis_y, lpfPelvis_z = get_LPFData(normPelvis_x, normPelvis_y, normPelvis_z, cutoff_freq=0.5, order=1)  # lpf 
                
                        total_Trials +=1   
                        
                        if labelPath == pelvisPath:      
#                             print("\t ---->",pelvisPath, " \t act label ====> " +  str(actFrame) + "\n") 
                            pltSkeleton(timstamp_ms, pelvis_x, pelvis_y, pelvis_z, pelvisPath, subname +"_"+str(trial), actFrame, addFolder="raw", actSplit=actSplit, save=save) # ---> plot raw
                            pltSkeleton(timstamp_ms, normPelvis_x, normPelvis_y, normPelvis_z, pelvisPath, subname +"_"+str(trial), actFrame, addFolder="norm", actSplit=actSplit, save=save) # ---> plot norm 
                            pltSkeleton(timstamp_ms, lpfPelvis_x, lpfPelvis_y, lpfPelvis_z, pelvisPath, subname +"_"+str(trial), actFrame, addFolder= "lpf" + "_norm",actSplit=actSplit, save=save) # plot lpf   
        
                            oneHot_actionList = convert_act2oneHot(actFrame, frameNum)
                            
                            # --> raw skeleton data is used..
                            unLabel_rawData = np.array([timstamp_ms, pelvis_x, pelvis_y, pelvis_z]).T 
                            unLabel_lpfData = np.array([timstamp_ms, lpfPelvis_x, normPelvis_y, lpfPelvis_z]).T

                            raw_mergeData = np.zeros((len(unLabel_rawData), dataset_columns))
                            lpf_mergeData = np.zeros((len(unLabel_lpfData), dataset_columns))

                            raw_mergeData[:, :-(numActions)] = unLabel_rawData      # time , pelvis  넣음  (317,4) -- raw data  (time, x ,y, z)
                            raw_mergeData[:, -(numActions): ] = oneHot_actionList     #  (onehot encoded action = 6 )

                            lpf_mergeData[:, :-(numActions)] = unLabel_lpfData      # time , pelvis  넣음  (317,4) -- lpf applied
                            lpf_mergeData[:, -(numActions): ] = oneHot_actionList     # (335, 5) one hot encoding 수행한 actionsList          

                            # -- save data 
                            if save:  
                                if  os.path.isfile(pelvisPath + "/" + "Originact5_raw_labeled_" +  subname +"_"+str(trial) + ".csv"):
                                    os.remove(pelvisPath + "/" + "Originact5_raw_labeled_" +  subname +"_"+str(trial) + ".csv")
                                    os.remove(pelvisPath + "/" + "Originact5_lpf_labeled_" +  subname +"_"+str(trial) + ".csv")
                                
                                np.savetxt(pelvisPath + "/" + "Originact5_raw_labeled_" +  subname +"_"+str(trial) + ".csv", raw_mergeData, fmt='%s', delimiter=',')
                                np.savetxt(pelvisPath + "/" + "Originact5_lpf_labeled_" +  subname +"_"+str(trial) + ".csv", lpf_mergeData, fmt='%s', delimiter=',')
              
        else:  
            pass
                            
    return total_Trials
    
    
actSplit=True
save=True


total_Trials = preprocess_forSaving(actSplit=actSplit, save=save)
print("Total Trials: ", total_Trials)

Total Trials:  27
