path_to_data/raw_data/

    ├── class_0/
    │   ├── trial1
    │   │   ├── all_data.txt
    │   │   ├── true_lable.csv
    │   ├── trial2
    │   │   ├── all_data.txt
    │   │   ├── true_lable.csv
    │   └── ...
    ├── class_1/
    │   ├── trial1
    │   │   ├── all_data.txt
    │   │   ├── true_lable.csv
    │   ├── trial2
    │   │   ├── all_data.txt
    │   │   ├── true_lable.csv
    │   └── ...
    └── ...

# Franka Robot
## 1. Collect data from robots (script: frankaRobot/save_data.py) → outputs raw data
## 2. Convert raw data into labeled data

In [138]:
# class rawData2LabeledData:
import numpy as np
import pandas as pd
import os 

class rawData2LabeledData:   #make_folder_dataset:
    def __init__(self, raw_data_path:str, labeled_data_path:str, labeled_data_name:str) -> None:
        self.path = raw_data_path
        self.save_path = labeled_data_path
        self.save_name = labeled_data_name
        
        os.makedirs(self.save_path, exist_ok=True)
        self.num_lines_per_message = 130
        self.df = pd.DataFrame()
        self.tau = ['tau_J0','tau_J1', 'tau_J2', 'tau_J3', 'tau_J4', 'tau_J5', 'tau_J6']
        self.tau_d = ['tau_J_d0','tau_J_d1', 'tau_J_d2', 'tau_J_d3', 'tau_J_d4', 'tau_J_d5', 'tau_J_d6']
        self.tau_ext =['tau_ext0','tau_ext1','tau_ext2','tau_ext3','tau_ext4','tau_ext5','tau_ext6']

        self.q = ['q0','q1','q2','q3','q4','q5','q6']
        self.q_d = ['q_d0','q_d1','q_d2','q_d3','q_d4','q_d5','q_d6']

        self.dq = ['dq0','dq1','dq2','dq3','dq4','dq5','dq6']
        self.dq_d = ['dq_d0','dq_d1','dq_d2','dq_d3','dq_d4','dq_d5','dq_d6']


        self.e = ['e0','e1','e2','e3','e4','e5','e6']
        self.de = ['de0','de1','de2','de3','de4','de5','de6']
        self.etau = ['etau_J0','etau_J1', 'etau_J2', 'etau_J3', 'etau_J4', 'etau_J5', 'etau_J6']
    
    def _extract_array(self, data_dict:dict, data_frame:str, header:list,  n:int):
            dof = 7
            x, y = data_frame[n].split(':')
            y = y.replace('[','')
            y = y.replace(']','')
            y = y.replace('\n','')

            y = y.split(',')
            for i in range(dof):
                data_dict[header[i]].append(float(y[i]))

    def extract_robot_data(self):
        f = open(self.path + 'all_data.txt', 'r')
        lines = f.readlines()

        keywords = ['time'] + self.tau + self.tau_d + self.tau_ext + self.q + self.q_d + self.dq + self.dq_d 

        data_dict = dict.fromkeys(keywords)
        for i in keywords:
            data_dict[i]=[0]
        
        for i in range(int(len(lines)/self.num_lines_per_message)):
            data_frame = lines[i*self.num_lines_per_message:(i+1)*self.num_lines_per_message]
            
            x, y = data_frame[3].split(':')
            time_=int(y)

            x, y = data_frame[4].split(':')
            time_ = time_+int(y)/np.power(10,9)

            data_dict['time'].append(time_)
            
            self._extract_array(data_dict,data_frame,self.tau, 25)
            self._extract_array(data_dict,data_frame,self.tau_d, 26)
            self._extract_array(data_dict,data_frame, self.tau_ext, 37)
            
            self._extract_array(data_dict,data_frame,self.q, 28)
            
            self._extract_array(data_dict,data_frame, self.q_d, 29)
            self._extract_array(data_dict,data_frame, self.dq, 30)
            self._extract_array(data_dict,data_frame, self.dq_d, 31)
        
       
        self.df = pd.DataFrame.from_dict(data_dict)
        self.df = self.df.drop(index=0).reset_index()
        
        for i in range(len(self.e)):
            self.df[self.e[i]] = self.df[self.q_d[i]]-self.df[self.q[i]]
        for i in range(len(self.de)):
            self.df[self.de[i]] = self.df[self.dq_d[i]]-self.df[self.dq[i]]
        for i in range(len(self.etau)):
            self.df[self.etau[i]] = self.df[self.tau_d[i]]-self.df[self.tau[i]]

        #self.df.to_csv(self.save_path +'robot_data.csv',index=False)

    def get_labels(self):
        time_dev_parameter = 0.2

        true_label = pd.read_csv(self.path+'true_label.csv')

        true_label_time_digits = len(str(int(true_label['time_sec'][0])))
        
        self.df.time = self.df.time % np.power(10,true_label_time_digits)
        true_label['time'] = true_label['time_sec']+true_label['time_nsec'] - self.df['time'].iloc[0]
        
        # Compute time differences to find significant contact events
        time_dev = true_label['time'].diff()
        
        # Identify start and end indices of contact events based on the time deviation parameter
        start_times = np.append([0], true_label['time'][time_dev > time_dev_parameter].index.values)
        end_times = np.append(true_label['time'][time_dev > time_dev_parameter].index.values-1, true_label['time'].shape[0]-1)

        # Normalize 'time' in df
        self.df['time'] -= self.df['time'].iloc[0]

        self.df['label'] = 0  # Initialize the 'label' column

        # Assign labels for contact events
        for start_time, end_time in zip(start_times, end_times):
            # Create a mask for rows in df where time is within the start and end bounds
            mask = (self.df['time'] >= true_label.time[start_time]) & (self.df['time'] < true_label.time[end_time])
            self.df.loc[mask, 'label'] = 1  # Use .loc with a mask to assign label1
            
        self.true_label = true_label
        self.df.to_csv(self.save_path +self.save_name + '.csv', index=False)


In [143]:
# run on all folders within the raw_data_path
import os
raw_data_path = os.getcwd().replace('AIModels','') + 'dataset/franka_mindlab/raw_data/'
labeled_data_path = os.getcwd().replace('AIModels','') + 'dataset/franka_mindlab/labeled_data/' 
os.makedirs(labeled_data_path, exist_ok=True)
for class_name in os.listdir(raw_data_path):
    if os.path.isdir(raw_data_path+class_name):
        for trial in os.listdir(raw_data_path+class_name):
            instance = rawData2LabeledData(raw_data_path = raw_data_path+class_name+'/'+trial+'/', labeled_data_path = labeled_data_path+class_name+'/', labeled_data_name=trial)
            instance.extract_robot_data()
            instance.get_labels()

In [144]:
# plot a sample data
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

target = ['e3','tau_J3']

for i in target:
    A = instance.df[i].max()-instance.df[i].min()
    instance.df['label_scaled']=instance.df['label']*A + instance.df[i][0] -A/2
    instance.df.iplot(x='time', y= [i, 'label_scaled'], xTitle='time (sec)', yTitle=i)
    
    #plt.plot(instance.df['time'],instance.df['labeled_scaled'])

# UR Robot
## 1. Collect data from robots (script: urRobot/save_data.py) → outputs raw data
## 2. Convert raw data into labeled data

In [9]:
import numpy as np
import pandas as pd
import os 
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
import matplotlib.pyplot as plt
import seaborn as sns

cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

class rawData2LabeledData: #make_folder_dataset:
    def __init__(self, raw_data_path:str,labeled_data_path:str, labeled_data_name:str) -> None:
        self.path = raw_data_path
        self.save_path = labeled_data_path
        self.save_name = labeled_data_name
        self.dof = 6
        os.makedirs(self.save_path, exist_ok=True)

        self.df = pd.DataFrame()
        self.df_dataset = pd.DataFrame()
        self._create_dummy_data(self.dof)

    def _create_dummy_data(self,dof):
        self.target_q = [f'target_q_{i}' for i in range(dof)]
        self.actual_q = [f'actual_q_{i}' for i in range(dof)]

        self.target_qd = [f'target_qd_{i}' for i in range(dof)]
        self.actual_qd = [f'actual_qd_{i}' for i in range(dof)]

        self.target_current = [f'target_current_{i}' for i in range(dof)]
        self.actual_current = [f'actual_current_{i}' for i in range(dof)]

        self.actual_TCP_pose = [f'actual_TCP_pose_{i}' for i in range(dof)]
        self.target_TCP_pose = [f'target_TCP_pose_{i}' for i in range(dof)]

        self.actual_TCP_speed = [f'actual_TCP_speed_{i}' for i in range(dof)]
        self.target_TCP_speed = [f'target_TCP_speed_{i}' for i in range(dof)]

        self.actual_TCP_force = [f'actual_TCP_force_{i}' for i in range(dof)]
        self.joint_control_output = [f'joint_control_output_{i}' for i in range(dof)]
        self.target_moment = [f'target_moment_{i}' for i in range(dof)]

        self.joint_temperatures = [f'joint_temperatures_{i}' for i in range(dof)]
        self.actual_execution_time = ['actual_execution_time']
        self.joint_mode = [f'joint_mode_{i}' for i in range(dof)]
        self.actual_tool_accelerometer = [f'actual_tool_accelerometer_{i}' for i in range(3)]
        
        self.actual_joint_voltage = [f'actual_joint_voltage_{i}' for i in range(dof)]

        self.e = [f'e{i}' for i in range(dof)]
        self.de = [f'de{i}' for i in range(dof)]
        self.etau = [f'etau{i}' for i in range(dof)]
        self.tau_ext = [f'tau_ext{i}' for i in range(dof)]
        self.e_i = [f'e_i{i}' for i in range(dof)]

    def extract_robot_data(self):
        for file in os.listdir(self.path):
            if file.endswith(".txt"):
                df = pd.read_csv(self.path+'/'+file)

                # Extract the filename from the path
                file_name = os.path.basename(file)
                
                # Remove the file extension
                file_base = os.path.splitext(file_name)[0]

                # Convert the remaining string to a float
                self.ros_time = float(file_base)
                
                k= [1.35,  1.361, 1.355, 0.957, 0.865, 0.893]    
                for i in range(self.dof):
                    df[self.e_i[i]]= df[self.target_current[i]]-df[self.actual_current[i]]
                    df[self.etau[i]]= df[self.e_i[i]]*k[i]
                    df[self.e[i]] = df[self.target_q[i]]-df[self.actual_q[i]]
                    df[self.de[i]] = df[self.target_qd[i]]-df[self.actual_qd[i]]
                
                df.rename(columns={'timestamp':'time'}, inplace=True)
                df['time']=df['time']-df['time'][0]+self.ros_time
                #self.df.to_csv(self.save_path +'robot_data.csv',index=False)
                self.df = df

    def get_labels(self):
        time_dev_parameter = 0.2

        true_label = pd.read_csv(self.path+'true_label.csv')

        true_label_time_digits = len(str(int(true_label['time_sec'][0])))
        
        self.df.time = self.df.time % np.power(10,true_label_time_digits)
        true_label['time'] = true_label['time_sec']+true_label['time_nsec'] - self.df['time'].iloc[0]
        
        # Compute time differences to find significant contact events
        time_dev = true_label['time'].diff()
        
        # Identify start and end indices of contact events based on the time deviation parameter
        start_times = np.append([0], true_label['time'][time_dev > time_dev_parameter].index.values)
        end_times = np.append(true_label['time'][time_dev > time_dev_parameter].index.values-1, true_label['time'].shape[0]-1)

        # Normalize 'time' in df
        self.df['time'] -= self.df['time'].iloc[0]

        self.df['label'] = 0  # Initialize the 'label' column

        # Assign labels for contact events
        for start_time, end_time in zip(start_times, end_times):
            # Create a mask for rows in df where time is within the start and end bounds
            mask = (self.df['time'] >= true_label.time[start_time]) & (self.df['time'] < true_label.time[end_time])
            self.df.loc[mask, 'label'] = 1  # Use .loc with a mask to assign label1
            
        self.true_label = true_label
        self.df.to_csv(self.save_path +self.save_name + '.csv', index=False)


In [10]:
# run on all folders within the raw_data_path
import os
raw_data_path = os.getcwd().replace('AIModels','') + 'dataset/ur5/raw_data/'
labeled_data_path = os.getcwd().replace('AIModels','') + 'dataset/ur5/labeled_data/' 
os.makedirs(labeled_data_path, exist_ok=True)
for class_name in os.listdir(raw_data_path):
    if os.path.isdir(raw_data_path+class_name):
        for trial in os.listdir(raw_data_path+class_name):
            instance = rawData2LabeledData(raw_data_path = raw_data_path+class_name+'/'+trial+'/', labeled_data_path = labeled_data_path+class_name+'/', labeled_data_name=trial)
            instance.extract_robot_data()
            instance.get_labels()

In [18]:
# plot a sample data
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

target = ['e1','e_i1']

for i in target:
    A = instance.df[i].max()-instance.df[i].min()
    instance.df['label_scaled']=instance.df['label']*A + instance.df[i][0] -A/2
    instance.df.iplot(x='time', y= [i, 'label_scaled'], xTitle='time (sec)', yTitle=i)
    
    #plt.plot(instance.df['time'],instance.df['labeled_scaled'])