This is a refactored version(by Sulagna) of [this code](https://github.com/MHC-Dataset-Research/MHC-Data-Sync/blob/main/picke23a/SynchonizeData.ipynb) (contributed by Anna). To read detail description of what's being done, check it out.


In [None]:
# working with google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

dataset = 'flarp_push_3'
# Specify the file paths for the CSV files
cam_1_color_path = '/content/drive/MyDrive/MelodyResearch/'+ dataset + '/csvFiles/' + dataset + '-_cam_1_color_image_raw.csv'
cam_1_depth_path = '/content/drive/MyDrive/MelodyResearch/' + dataset + '/csvFiles/' + dataset + '-_cam_1_depth_image_raw.csv'
cam_1_depth_points_path = '/content/drive/MyDrive/MelodyResearch/' + dataset + '/csvFiles/' + dataset + '-_cam_1_depth_points.csv'
cam_2_color_path = '/content/drive/MyDrive/MelodyResearch/' + dataset + '/csvFiles/' + dataset + '-_cam_2_color_image_raw.csv'
cam_2_depth_path = '/content/drive/MyDrive/MelodyResearch/' + dataset + '/csvFiles/' + dataset + '-_cam_2_depth_image_rect_raw.csv'
base_feedback_path = '/content/drive/MyDrive/MelodyResearch/' + dataset + '/csvFiles/' + dataset + '-_my_gen3_base_feedback.csv'

In [None]:
# defining the columns selected from base feedback and image dataset
bf_cols = ['%time']

# add x, y, z data column information
for force in ['field.base.imu_acceleration_', 'field.base.imu_angular_velocity_', 'field.base.tool_pose_','field.base.tool_pose_theta_', 'field.base.tool_twist_linear_', 'field.base.tool_twist_angular_', 'field.base.tool_external_wrench_force_', 'field.base.tool_external_wrench_torque_', 'field.base.commanded_tool_pose_', 'field.base.commanded_tool_pose_theta_', 'field.interconnect.imu_acceleration_', 'field.interconnect.imu_angular_velocity_']:
    for dir in ['x', 'y', 'z']: 
        bf_cols.append(force + dir)

# add accuator column information
for acc in ['field.actuators0' ,'field.actuators1', 'field.actuators2', 'field.actuators3', 'field.actuators4', 'field.actuators5']:
    for force in ['.position', '.velocity', '.torque']:
        bf_cols.append(acc + force)

# add gripper column information
    for force in ['field.interconnect.oneof_tool_feedback.gripper_feedback0.motor0.position', 'field.interconnect.oneof_tool_feedback.gripper_feedback0.motor0.velocity']:
        bf_cols.append(force)
img_cols = ['%time', 'field.header.seq']

In [None]:
# Read the image CSV files into pandas dataframes
cam_1_color = pd.read_csv(cam_1_color_path, usecols=img_cols)
cam_1_depth = pd.read_csv(cam_1_depth_path, usecols=img_cols)
cam_1_depth_points = pd.read_csv(cam_1_depth_points_path, usecols=img_cols)
cam_2_color = pd.read_csv(cam_2_color_path, usecols=img_cols)
cam_2_depth = pd.read_csv(cam_2_depth_path, usecols=img_cols)

In [None]:
# Read the base feedback CSV files
base_feedback = pd.read_csv(base_feedback_path, usecols = bf_cols)

In [None]:
# merging image dataset based on the frame and keeping the earliest time column
image_set = [cam_1_color, cam_1_depth, cam_1_depth_points, cam_2_color, cam_2_depth]

# Create an empty DataFrame to store the merged dataset
merged_data = pd.DataFrame()

# Merge the datasets based on image frame number
for i in range(len(image_set)):
    image_data = image_set[i][['%time', 'field.header.seq']]
    if merged_data.empty:
        merged_data = image_data
    else:
        merged_data = pd.merge(merged_data, image_data, on='field.header.seq', suffixes=('', f'_{i+1}'))

# Get all the time columns
time_columns = [col for col in merged_data.columns if col.startswith('%time')]

# Find the earliest start time among all time columns
earliest_start_time = merged_data[time_columns].min().min()

# Drop the time columns except the one with the earliest start time
# Rename the column of earliest time
for col in time_columns:
    if merged_data[col].min() != earliest_start_time:
        merged_data.drop(columns=col, inplace=True)
    elif merged_data[col].min() == earliest_start_time:
        merged_data.rename(columns={col: '%time'}, inplace=True)
        


# Reset the index of the merged dataset
merged_data.reset_index(drop=True, inplace=True)

# Print the merged dataset
print(merged_data)



                   %time  field.header.seq
0    1625582778393631832              5294
1    1625582778426759549              5295
2    1625582778460205001              5296
3    1625582778493472124              5297
4    1625582778527016824              5298
..                   ...               ...
494  1625582799298959801              5895
495  1625582799299725994              5897
496  1625582799300882905              5898
497  1625582799325999298              5899
498  1625582799359269925              5900

[499 rows x 2 columns]


In [None]:
# Merge all datasets based on nearest timestamps
final = merged_data
# https://pandas.pydata.org/pandas-docs/version/0.25.0/reference/api/pandas.merge_asof.html
final = pd.merge_asof(final, base_feedback.sort_values('%time'), on='%time', direction='nearest')
final = final.dropna()
final['image_frames'] = "myframe" + final['field.header.seq'].astype(str).str.zfill(6) + ".png"
final = final.drop_duplicates(subset=['image_frames'])

In [None]:
print(final)

                   %time  field.header.seq  field.base.imu_acceleration_x  \
0    1625582778393631832              5294                       0.237740   
1    1625582778426759549              5295                       0.237740   
2    1625582778460205001              5296                       0.237975   
3    1625582778493472124              5297                       0.237975   
4    1625582778527016824              5298                       0.237975   
..                   ...               ...                            ...   
494  1625582799298959801              5895                       0.234765   
495  1625582799299725994              5897                       0.234765   
496  1625582799300882905              5898                       0.234765   
497  1625582799325999298              5899                       0.234765   
498  1625582799359269925              5900                       0.236085   

     field.base.imu_acceleration_y  field.base.imu_acceleration_z  \
0     

In [None]:

print("Length of image dataset: ", len(merged_data))
print("Length of base_feedback: ", len(base_feedback))
print("Length of final: ", len(final))

Length of image dataset:  499
Length of base_feedback:  924
Length of final:  499


In [None]:
# Save the merged dataset to a CSV file
final_name = dataset + '_final.csv'
final.to_csv(final_name, index=False)
print("Merging and saving the dataset is complete.")

Merging and saving the dataset is complete.


In [None]:
from google.colab import files

# Download the CSV file
files.download(final_name)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>