In [2]:
import pandas as pd 
import numpy as np
import h5py
import tables
import os
import cv2
from scipy.ndimage import gaussian_filter1d
from persistence1d import RunPersistence
import sys
import time
import matplotlib.pyplot as plt 

In [3]:
# h5file to csvfile
def change(h5dir, rawdir):
    h5filenames = os.listdir(h5dir)
    rawfilenames = os.listdir(rawdir)

    for i in range(len(rawfilenames)):
        rawfilenames[i] = rawfilenames[i][:-4]

    tochangefiles = []

    for h5file in h5filenames:
        h5file = h5file[:-3]
        if h5file not in rawfilenames:
            tochangefiles.append(h5file)
    print('Tochange', tochangefiles)
    
    for filename in tochangefiles:
        print(filename)
        h5f = h5py.File(h5dir + '/' + filename + '.h5', 'r')

        keylist = list(h5f.keys())

        data = {}

        for i in range(len(keylist)):
            key = keylist[i] 
            data[key] = list(h5f[key])
            
        now = time.strftime('%m%d_%H%M', time.localtime(time.time()))

        df = pd.DataFrame(data)
        df.to_csv(f'{rawdir}/{now}_{filename}.csv', index=None)

In [4]:
# split by submovement and labeling
def labeling(rawdir, labeleddir):
    rawfilenames = os.listdir(rawdir)
    labeledfilenames = os.listdir(labeleddir)
    
    for i in range(len(rawfilenames)):
        rawfilenames[i] = rawfilenames[i][:-4]
    for i in range(len(labeledfilenames)):
        labeledfilenames[i] = labeledfilenames[i][:-12]
    

    tochangefiles = []

    for rawfile in rawfilenames:
        if rawfile not in labeledfilenames:
            tochangefiles.append(rawfile)
    print("Tochange",  tochangefiles)
    
    for filename in tochangefiles:
        print(filename)
        df = pd.read_csv(rawdir+ '/' + filename + ".csv", sep=',')

        speed = df['speed']
        speed = speed.tolist()
        speed = speed[1:len(speed)-1]
        filtered_speed = gaussian_filter1d(speed, 1)
        filtered_speed = np.insert(filtered_speed,0, -1)
        filtered_speed = np.append(filtered_speed,-1)
        df["filtered_speed"] = filtered_speed

        minindices = []
        InputData = np.array(filtered_speed)
        ExtremaAndPersistence = RunPersistence(InputData)
        Filtered = [t for t in ExtremaAndPersistence if t[1] > 1]
        Sorted = sorted(Filtered, key=lambda ExtremumAndPersistence: ExtremumAndPersistence[1])

        for i, E in enumerate(Sorted):
            if (i % 2 == 0):
                minindices.append(E[0])

        minindices = sorted(minindices)
        minindices = minindices[1:]
        submovementlabel = [-1]
        label_index = 0
        for i in range(1, len(filtered_speed)):
            if(minindices[label_index]) > i:
                submovementlabel.append(label_index)
            elif (minindices[label_index] == i):
                label_index += 1
                submovementlabel.append(label_index)

        submovementlabel[-1] = -1
        df['submovementlabel'] = submovementlabel
        df.to_csv(f'{labeleddir}/{filename}_labeled.csv', sep = ',', index = None)
    

In [5]:
# padding to submovement
def Padding(labeleddir, paddingdir):
    maxRow = 30
    labeledfilenames = os.listdir(labeleddir)
    paddingfilenames = os.listdir(paddingdir)
    
    for i in range(len(labeledfilenames)):
        labeledfilenames[i] = labeledfilenames[i][:-12]
    for i in range(len(paddingfilenames)):
        paddingfilenames[i] = paddingfilenames[i][:-12]
        
    tochangefiles = []
    
    for labeledfile in labeledfilenames:
        if labeledfile not in paddingfilenames:
            tochangefiles.append(labeledfile)
    print("Tochange",  tochangefiles)
    
    for labeledfile in tochangefiles:
        print(labeledfile)

        raw_data = pd.read_csv(labeleddir+ '/' + labeledfile + "_labeled.csv", sep=',')
                
        raw_data = raw_data.drop(index=0, axis=0).drop(index=len(raw_data)-1, axis=0)
        data = raw_data[["timestamp", "submovementlabel", "speed", "filtered_speed"]]


        aUnique = np.array(data['submovementlabel'].tolist()) # label
        unique, counts = np.unique(aUnique, return_counts=True) # 중복 제거된 라벨
        counts = counts.tolist() # 각 라벨 별 개수

        # submovement의 행 개수를 각 행의 count 열에 삽입
        countL = list()
        for i in range(len(counts)):
            for k in range(counts[i]):
                countL.append(counts[i])

        raw_data['count'] = countL
        # maxRow보다 Count가 큰 행 제거
        filtered_data = raw_data[ raw_data['count'] <= maxRow ]

        # padding
        remainLabelCounts = [x for x in counts if x <= maxRow]
        referenceRowUniqueIdx = 0

        result_data = pd.DataFrame(columns=raw_data.columns)
        firstRowIdx = 0

        LabelNum = len(remainLabelCounts)
        for idx, count in enumerate(remainLabelCounts):
            result_data = pd.concat([result_data, filtered_data.iloc[firstRowIdx : firstRowIdx + count,:]], ignore_index=True)

            lastRowIdx = firstRowIdx + count-1 # 참조할 row. 해당 라벨의 맨 마지막 행
            remainRowNum = maxRow - count  # padding할 row 개수

            reference_row = filtered_data.iloc[lastRowIdx]
            padding_data = pd.Series(0, index=raw_data.columns)
            padding_data['cursor_x'] = reference_row['cursor_x']
            padding_data['cursor_y'] = reference_row['cursor_y']

            padding_df = pd.DataFrame( [ padding_data ] * remainRowNum )

            result_data = pd.concat([result_data, padding_df], ignore_index=True)


            firstRowIdx += count

        result_data.to_csv(f'{paddingdir}/{labeledfile}_padding.csv', sep = ',', index = None)


In [9]:
DATA_H5_FOLDER_pro = "./data_h5/pro"
DATA_RAW_FOLDER_pro = "./data_raw/pro"
DATA_LABELED_FOLDER_pro = "./data_labeled/pro"
DATA_PADDING_FOLDER_pro = "./data_padding/pro"


change(DATA_H5_FOLDER_pro, DATA_RAW_FOLDER_pro)
labeling(DATA_RAW_FOLDER_pro, DATA_LABELED_FOLDER_pro)
Padding(DATA_LABELED_FOLDER_pro, DATA_PADDING_FOLDER_pro)

DATA_H5_FOLDER_general= "./data_h5/general"
DATA_RAW_FOLDER_general = "./data_raw/general"
DATA_LABELED_FOLDER_general = "./data_labeled/general"
DATA_PADDING_FOLDER_general = "./data_padding/general"


change(DATA_H5_FOLDER_general, DATA_RAW_FOLDER_general)
labeling(DATA_RAW_FOLDER_general, DATA_LABELED_FOLDER_general)
Padding(DATA_LABELED_FOLDER_general, DATA_PADDING_FOLDER_general)


Tochange ['유백진입니다만_다리우스_1', '유백진입니다만_벡스_2', '유백진입니다만_아크샨_1']
유백진입니다만_다리우스_1


KeyboardInterrupt: 