In [28]:
import pandas as pd
import numpy as np
import copy
# pandas.__version__

Set up the columns to be transfered
- adds general x, y, z directional columns
- adds actuator specific columns
- adds gripper columns

In [30]:
bf_cols = ['%time']

# add x, y, z data column information
for force in ['field.base.imu_acceleration_', 'field.base.imu_angular_velocity_', 'field.base.tool_pose_','field.base.tool_pose_theta_', 'field.base.tool_twist_linear_', 'field.base.tool_twist_angular_', 'field.base.tool_external_wrench_force_', 'field.base.tool_external_wrench_torque_', 'field.base.commanded_tool_pose_', 'field.base.commanded_tool_pose_theta_', 'field.interconnect.imu_acceleration_', 'field.interconnect.imu_angular_velocity_']:
    for dir in ['x', 'y', 'z']: 
        bf_cols.append(force + dir)

# add accuator column information
for acc in ['field.actuators0' ,'field.actuators1', 'field.actuators2', 'field.actuators3', 'field.actuators4', 'field.actuators5']:
    for force in ['.position', '.velocity', '.torque']:
        bf_cols.append(acc + force)

# add gripper column information
    for force in ['field.interconnect.oneof_tool_feedback.gripper_feedback0.motor0.position', 'field.interconnect.oneof_tool_feedback.gripper_feedback0.motor0.velocity']:
        bf_cols.append(force)
        
img_cols = ['%time', 'field.header.seq']

# print(bf_cols)
print("done")


done


Set up dataframes
- read data from basefeedback and camera data files into pandas dataframes

In [31]:
dataset = "flarp_folding_4"
print("running")
base_feedback = pd.read_csv('/media/imero/kinova_experimen/experimentData/' + dataset + '/csvFiles/' + dataset + '-_my_gen3_base_feedback.csv', usecols=bf_cols)
print("created base_feedback dataframe")
cam_1_color = pd.read_csv('/media/imero/kinova_experimen/experimentData/' + dataset + '/csvFiles/' + dataset + '-_cam_1_color_image_raw.csv', usecols=img_cols)
print("created cam_1_color dataframe")
cam_1_depth = pd.read_csv('/media/imero/kinova_experimen/experimentData/' + dataset + '/csvFiles/' + dataset + '-_cam_1_depth_image_raw.csv', usecols=img_cols)
print("created cam_1_depth dataframe")
cam_2_color = pd.read_csv('/media/imero/kinova_experimen/experimentData/' + dataset + '/csvFiles/' + dataset + '-_cam_2_color_image_raw.csv', usecols=img_cols)
print("created cam_2_color dataframe")
cam_2_depth = pd.read_csv('/media/imero/kinova_experimen/experimentData/' + dataset + '/csvFiles/' + dataset + '-_cam_2_depth_image_rect_raw.csv', usecols=img_cols)
print("created cam_2_depth dataframe")

running
created base_feedback dataframe
created cam_1_color dataframe
created cam_1_depth dataframe
created cam_2_color dataframe
created cam_2_depth dataframe


Data clean up

Main phases:
1. Divide data into two groups: basefeedback and image data:
    - Assuming all the image data sets have the same number of images:
        - find the dataset with the earliest start time
        - overwrite the timestamps of all other image data sets to match
    Why?: The difference between recorded times should be tiny, so standardizing the times
    makes handling the image data easier
    
2. Trim datasets to synchonize start and end times
a)
  - Find which group of data sets (basefeedback versus image data sets) starts last
b)
    - Set the start time to the latest start time from the sets
    - Drop data entries from the data set that started first that were recorded before the other data set, i.e. drop entries before the set start time
c)
    - Find which group of data sets ends first
    - Set the end time to the earliest end time amoung the data sets
    - Drop data entries from the data set(s) that recorded entries after the end time

3. Compare the lengths of the data set groups
    - If they are the same, skip down to a special case in 4
    - If they are different (most likely case) continue to main phase 4
4. Mesh the data sets
    - Create final dataframe
    - For every timestamp value in the smallest data set group:
        - find the timestamp value from the corresponding row in the largest data set group that is most similar (smallest difference) and pair those data rows
        - add paired data to the final dataframe
5. Convert the data to a CSV file
    - Create a CSV file using the final dataframe

Phase 1

In [32]:
print("phase 1 running")
# find the start of every image data set
cam_1_c_start = cam_1_color['%time'][0]
cam_1_d_start = cam_1_depth['%time'][0]
cam_2_c_start = cam_2_color['%time'][0]
cam_2_d_start = cam_2_depth['%time'][0]

# find earliest start time
image_set = [cam_1_color, cam_1_depth, cam_2_color, cam_2_depth]
image_start = np.min([cam_1_c_start, cam_1_d_start, cam_2_c_start, cam_2_d_start]) # problems!!

# standardize image data set timestamps
image_ref = None
for i in range(0,4):
    if image_start == image_set[i].iloc[0][0] and image_ref == None:
        times = image_set[i]['%time']
        image_ref = i
        print("Start time is: ", image_set[i].iloc[0][0])
        print("Image ref is ", image_ref)
        for j in range(0,4):
            if (i != j):
                image_set[j]['%time'] = times
        
print("Image start is : ", image_start)

phase 1 running
Start time is:  1625595589500829895
Image ref is  2
Image start is :  1625595589500829895


Phase 2a

In [33]:
# Find whether bf or image data sets begins last, create common start time
bf_start = base_feedback.iloc[0][0]
to_shorten = ""
if (bf_start - image_start) > 1:
    print("bf begins last with difference of: ", bf_start - image_start)
    start = bf_start
    to_shorten = "images"
elif bf_start - image_start < 1:
    print("Images begins last with difference of: ", bf_start - image_start)
    start = image_start
    to_shorten = "bf"
else:
    print(synchonized_start)
    start = bf_start
    
# Remove data before start time
if (to_shorten == "images"):
    remove = []
    for x in range(0, len(cam_1_color)):
        if cam_1_color.iloc[x, 0] < start:
            print("Remove: ", x)
            remove.append(x);
    print("Length of cam_1_color = ", len(cam_1_color))     
    cam_1_color = cam_1_color.drop(remove).reset_index().drop(columns = "index")
    print("Length of cam_1_color after drop= ", len(cam_1_color))
elif (to_shorten == "bf"):
    remove = []
    for x in range(0, len(base_feedback)):
        if base_feedback.iloc[x, 0] < start:
            print("Remove: ", x)
            remove.append(x);
    print("Length of basefeedback = ", len(base_feedback))
    if len(remove) > 0: 
        base_feedback = base_feedback.drop(remove).reset_index().drop(columns = "index")
    print("Length of basefeedback after drop= ", len(base_feedback))



Images begins last with difference of:  -1347584.0
Remove:  0
Length of basefeedback =  1229
Length of basefeedback after drop=  1228


Phase 2b

In [36]:
# Need to find common end time
bf_end = base_feedback['%time'][len(base_feedback) - 1]
image_end = image_set[image_ref]['%time'][len(image_set[image_ref]) - 1]

if bf_end - image_end > 1:
    print("images ends first, with difference of: ", bf_end - image_end)
    end = image_end
    to_shorten = "bf"
elif bf_end - image_end < 1:
    print("bf ends first, with difference of: ", bf_end - image_end)
    end = bf_end
    to_shorten = "images"
else:
    print("synchonized end")
    end = bf_end
    to_shorten = "none"
    
# Need to remove extra data at ends
remove = []
    
if (to_shorten == "bf"):
    print("Need to add code to shorten bf at end")
    for x in range(0, len(base_feedback)):
        if base_feedback.iloc[x, 0] > end:
            print(x)
            remove.append(x);
    print("Length of basefeedback = ", len(base_feedback))     
    if (len(remove) > 0):
        base_feedback = base_feedback.drop(remove).reset_index().drop(columns = "index")
    print("Length of base_feedback after drop = ", len(base_feedback))

if (to_shorten == "images"):
    for x in range(0, len(image_set[image_ref])):
        if image_set[image_ref].iloc[x, 0] > end:
            print("Remove: ", x)
            print(image_set[i])
            remove.append(x);
    print("Length of image ref = ", len(image_set[image_ref]))
    if (len(remove) > 0):
        for i in range (0, 4):
            image_set[i] = image_set[i].drop(remove).reset_index().drop(columns = "index")
            print("Length of image_set[" + str(i) + "] after drop= ", len(image_set[i]))
            
        


bf ends first, with difference of:  -10145400
Remove:  921
                   %time  field.header.seq
0    1625595589500829895              4219
1    1625595589528456368              4220
2    1625595589562393771              4221
3    1625595589595483746              4222
4    1625595589633193689              4223
..                   ...               ...
916  1625595620043202530              5135
917  1625595620075346030              5136
918  1625595620109562076              5137
919  1625595620144537229              5138
920  1625595620177313842              5139

[921 rows x 2 columns]
Length of image ref =  922


KeyError: '[921] not found in axis'

Phase 3

In [17]:
# Need to then compare data lengths
print("Length of image ref: ", len(image_set[image_ref]))
print("Length of base_feedback: ", len(base_feedback))
print("Ratio of base feedback images to image ref: ", len(base_feedback)/len(image_set[image_ref]))

test_Var = 10

Length of image ref:  921
Length of base_feedback:  1228
Ratio of base feedback images to image ref:  1.3333333333333333


In [21]:
print(len(image_set[3]))

922


Phase 4

In [18]:
#Let the meshing begin
i = 0 #index of the next bf data to be sorted
j = 0 #index of the next image to be sorted
final_cols = copy.copy(bf_cols) #need to make this a copy
final_cols.extend(['cam_1_color_filename', 'cam_1_depth_filename', 'cam_2_color_filename', 'cam_2_depth_filename'])

final = pd.DataFrame(columns = final_cols)
# add time stamps
final['%time'] = image_set[image_ref]['%time'].values
# add cam_1 data
final['cam_1_color_filename'] = ("myframe" + image_set[0]['field.header.seq'].astype(str).str.zfill(6) + ".png").values
final['cam_1_depth_filename'] = ("myframe" + image_set[1]['field.header.seq'].astype(str).str.zfill(6) + ".png").values
#add cam_2 data
final['cam_2_color_filename'] = ("myframe" + image_set[2]['field.header.seq'].astype(str).str.zfill(6) + ".png").values
final['cam_2_depth_filename'] = ("myframe" + image_set[3]['field.header.seq'].astype(str).str.zfill(6) + ".png").values

while j < len(image_set[image_ref]): 
    # find a match
    match = i
    img_time = image_set[image_ref].iloc[j][0]
    match_diff = abs(img_time - base_feedback.iloc[i][0])
    for offset in range(1,5):
        #check that offset value is in range
        if (i + offset < len(image_set[image_ref])):     
            bf_time = base_feedback.iloc[i + offset][0]
            # if the difference between the timestamps for img_time and bf_time_i is smaller than the match difference
            if (abs(img_time-bf_time) < match_diff) :
                match = i + offset
                match_diff = abs(img_time-bf_time)
        
    # add new data to the dataframe
    transfer_cols = copy.copy(bf_cols)
    transfer_cols.remove('%time')
    #temp
    print("Match: " + j + "to" + match)
    #temp
    for column in transfer_cols:
#         print("column from transfer column: ", column, "row: ", j)
        final.at[j, column] = base_feedback[column][match]
            
    #increment j and i
    j = j + 1
    i = match + 1
        
#may find final.at[index, column] useful
        
        
    

ValueError: Length of values (922) does not match length of index (921)

Phase 5

In [12]:
final_name = r'~/Desktop/'+ dataset +'final.csv'
final.to_csv(final_name, index=False)
print("done")

done
