# 1. Collect data from robots (script: frankaRobot/save_data.py) → outputs raw data
# 2. Convert raw data into labeled data

In [29]:
# class rawData2LabeledData:
import numpy as np
import pandas as pd
import os 

class rawData2LabeledData:   #make_folder_dataset:
    def __init__(self, raw_data_path:str, labeled_data_path:str) -> None:
        self.path = raw_data_path
        self.save_path = labeled_data_path
        
        os.makedirs(self.save_path, exist_ok=True)
        self.num_lines_per_message = 130
        self.df = pd.DataFrame()
        self.tau = ['tau_J0','tau_J1', 'tau_J2', 'tau_J3', 'tau_J4', 'tau_J5', 'tau_J6']
        self.tau_d = ['tau_J_d0','tau_J_d1', 'tau_J_d2', 'tau_J_d3', 'tau_J_d4', 'tau_J_d5', 'tau_J_d6']
        self.tau_ext =['tau_ext0','tau_ext1','tau_ext2','tau_ext3','tau_ext4','tau_ext5','tau_ext6']

        self.q = ['q0','q1','q2','q3','q4','q5','q6']
        self.q_d = ['q_d0','q_d1','q_d2','q_d3','q_d4','q_d5','q_d6']

        self.dq = ['dq0','dq1','dq2','dq3','dq4','dq5','dq6']
        self.dq_d = ['dq_d0','dq_d1','dq_d2','dq_d3','dq_d4','dq_d5','dq_d6']


        self.e = ['e0','e1','e2','e3','e4','e5','e6']
        self.de = ['de0','de1','de2','de3','de4','de5','de6']
        self.etau = ['etau_J0','etau_J1', 'etau_J2', 'etau_J3', 'etau_J4', 'etau_J5', 'etau_J6']
    
    def _extract_array(self, data_dict:dict, data_frame:str, header:list,  n:int):
            dof = 7
            x, y = data_frame[n].split(':')
            y = y.replace('[','')
            y = y.replace(']','')
            y = y.replace('\n','')

            y = y.split(',')
            for i in range(dof):
                data_dict[header[i]].append(float(y[i]))

    def extract_robot_data(self):
        f = open(self.path + 'all_data.txt', 'r')
        lines = f.readlines()

        keywords = ['time'] + self.tau + self.tau_d + self.tau_ext + self.q + self.q_d + self.dq + self.dq_d 

        data_dict = dict.fromkeys(keywords)
        for i in keywords:
            data_dict[i]=[0]
        
        for i in range(int(len(lines)/self.num_lines_per_message)):
            data_frame = lines[i*self.num_lines_per_message:(i+1)*self.num_lines_per_message]
            
            x, y = data_frame[3].split(':')
            time_ = int(y)-int(int(y)/1000000)*1000000

            x, y = data_frame[4].split(':')
            time_ = time_+int(y)/np.power(10,9)

            data_dict['time'].append(time_)
            
            self._extract_array(data_dict,data_frame,self.tau, 25)
            self._extract_array(data_dict,data_frame,self.tau_d, 26)
            self._extract_array(data_dict,data_frame, self.tau_ext, 37)
            
            self._extract_array(data_dict,data_frame,self.q, 28)
            
            self._extract_array(data_dict,data_frame, self.q_d, 29)
            self._extract_array(data_dict,data_frame, self.dq, 30)
            self._extract_array(data_dict,data_frame, self.dq_d, 31)
        
       
        self.df = pd.DataFrame.from_dict(data_dict)
        self.df = self.df.drop(index=0).reset_index()
        
        for i in range(len(self.e)):
            self.df[self.e[i]] = self.df[self.q_d[i]]-self.df[self.q[i]]
        for i in range(len(self.de)):
            self.df[self.de[i]] = self.df[self.dq_d[i]]-self.df[self.dq[i]]
        for i in range(len(self.etau)):
            self.df[self.etau[i]] = self.df[self.tau_d[i]]-self.df[self.tau[i]]

        #self.df.to_csv(self.save_path +'robot_data.csv',index=False)

    def get_labels(self):
        time_dev_parameter = 0.1

        true_label = pd.read_csv(self.path+'true_label.csv')
        true_label['time'] = true_label['time_sec']+true_label['time_nsec']-self.df['time'][0]
        time_dev = true_label['time'].diff()
        
        contact_events_index = np.append([0], true_label['time'][time_dev>time_dev_parameter].index.values)
        contact_events_index = np.append(contact_events_index,  true_label['time'].shape[0]-1)

        self.df['time'] = self.df['time'] - self.df['time'][0]
        contact_count = 0
        self.df['label']=0

        for i in range(self.df['time'].shape[0]):
            if (self.df['time'][i]-true_label['time'][contact_events_index[contact_count]]) > 0:
                #print(i ,',', contact_events_index[contact_count], ',',self.df['time'][i], '   ', true_label['time'][contact_events_index[contact_count]] )
                contact_count += 1
                if contact_count == len(contact_events_index):
                    break
                for j in range(i, self.df['time'].shape[0]):
                    self.df.loc[j, 'label'] = 1
                    #print(j)
                    if (self.df['time'][j] - true_label['time'][contact_events_index[contact_count]-1]) > 0:
                        #print(j ,',', contact_events_index[contact_count]-1, ',', self.df['time'][j], '   ', true_label['time'][contact_events_index[contact_count]-1] )
                        #print('----------------------------------------')
                        i = j
                        break

        self.df.to_csv(self.save_path + 'labeled_data.csv', index=False)


In [16]:
# run on all folders within the raw_data_path

import os
raw_data_path = os.getcwd().replace('AIModels','') + 'frankaRobot/DATA/source_robot/'
labeled_data_path = os.getcwd().replace('AIModels','') + 'data/labeled_data/' 
os.makedirs(labeled_data_path, exist_ok=True)
for folder in os.listdir(raw_data_path):

    instance = rawData2LabeledData(raw_data_path = raw_data_path+folder+'/', labeled_data_path = labeled_data_path+folder+'/')
    instance.extract_robot_data()
    instance.get_labels()

In [28]:
# plot a sample data
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

target = ['e3','tau_J3']

for i in target:
    A = instance.df[i].max()-instance.df[i].min()
    instance.df['label_scaled']=instance.df['label']*A + instance.df[i][0] -A/2
    instance.df.iplot(x='time', y= [i, 'label_scaled'], xTitle='time (sec)', yTitle=i)
    
    #plt.plot(instance.df['time'],instance.df['labeled_scaled'])


# 3. Convert labeled data into sequential datasets

In [30]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
import torch
from torchvision import transforms
from torchinfo import summary 


class LoadDataset(Dataset):
    def __init__(self, data_path:str, extract_from_labeled_data=False):
        """
        Initialize the dataset.
        
        Args:
            data_path (str): Path to the data directory which should contain subdirectories for each class
        """
        self.data_path = data_path
        self.data = pd.DataFrame()

        if extract_from_labeled_data:
            self.__labeled_data_to_sequence__()
        else:
            self.__read_dataset__()


    def __labeled_data_to_sequence__(self):
        pass

    def __read_dataset__(self):
        """
        Load data from csv or pkl file
        """
        if '.csv' in self.data_path:
            self.data = pd.read_csv(self.data_path)
        elif '.pkl' in self.data_path:
            self.data = pd.read_pickle(self.data_path)


    
    def save_dataset(self, save_path):
        self.data.to_pickle(save_path)

    def __len__(self):
        """Return the total number of samples."""
        return len(self.data)

    def __getitem__(self, idx):
        """
        Retrieve a single sample at the specified index.
        
        Args:
            idx (int): Index of the sample to retrieve.
            
        Returns:
            (tuple): (features, target) where target is the label for classification.
        """
        # Extract data
        features = self.data.iloc[idx, :-1].values  # Assuming features are all columns except the last
        target = self.data.iloc[idx, -1]            # Assuming target is in the last column

        # Apply transformations if specified
        if self.transform:
            features = self.transform(features)
        if self.target_transform:
            target = self.target_transform(target)

        # Convert to tensors
        features = torch.tensor(features, dtype=torch.float32)
        target = torch.tensor(target, dtype=torch.long)  # Assuming a classification task

        return features, target
    


In [None]:
#different make sequence function
import numpy as np
import pandas as pd
import os

robot_dof = 7
seq_num = 28
gap = 5 # 25 ms
train_split_rate = 0.75
dict_label = {'a': 7, 'b':6, 'c':5, 'd':4, 'e':3, 'f':2, 'g':1}

#save_path = main_path+'/dataset/test_dataset_target_robot/'
#main_path = main_path + 'frankaRobot/DATA/dataset/target_robot/'

save_path = main_path+'dataset/e_edot/localization_gap_5/'
main_path = os.path.expanduser('~/myProjects/contact_localization_dataset/dataset/extracted_data/')



# Function to generate sequence names
def generate_sequence(base_name, indices):
    return [f'{base_name}{i}' for i in indices]

def split_data(dataset, train_split_rate = 0.75):
    msk = np.random.rand(len(dataset)) < train_split_rate
    train = dataset.loc[msk, :]
    test = dataset.loc[~msk, :]
    return train, test

def make_sequence(df, selected_features,seq_num, deltaMin, gap):
    contact_indexs_ = df.loc[df.label.diff()>0.1,:].index
    contact_indexs2 = df.loc[df.label.diff()<-0.1,:].index
    contact_indexs = [idx for idx, idx2 in zip(contact_indexs_, contact_indexs2) if idx2 - idx >= seq_num]

    dataset_df = pd.DataFrame()
    for contact_index in contact_indexs:
        window_right_edge = contact_index
        end_point = contact_index + seq_num 
        for step in range(window_right_edge , end_point, gap):
            label = df.label[step-1]
            window = df[selected_features][step-seq_num:step]
            df_dummy=pd.DataFrame(np.insert(window.values.flatten(), 0, label).reshape(1,-1))
            dataset_df = pd.concat([dataset_df,df_dummy],ignore_index=True)
    return dataset_df


    main_path = os.getcwd().replace('AIModels','')


os.makedirs(save_path, exist_ok=True)

# Create sequences for tau_J, tau_J_d, tau_ext, q, q_d, dq, dq_d, e, de, and etau_J
indices = range(robot_dof)  # indices go from 0 to robot_dof

selected_features = generate_sequence('e', indices) + generate_sequence('de', indices)
columns = range(seq_num*len(selected_features)+1)

df_master = pd.DataFrame(columns=columns)
for i in os.listdir(main_path):
    if len(i.split('.'))==1:
        file_path = main_path+i+'/labeled_data.csv'
        df = pd.read_csv(file_path)
        df.drop(columns='index', inplace=True)
        df = make_sequence(df, selected_features, seq_num, deltaMin, gap)
        #labeling data
        if 'link5' in i:
            df[0] = df[0]*5
        elif 'link6' in i:
            df[0] = df[0]*6
        elif 'f' in i:
            df[0] = df[0]*2
        elif 'e' in i:
            df[0] = df[0]*3
        #print(df.loc[:,0].min())
        df_master = df_master.append(df, ignore_index=True)

df_master.to_pickle(save_path+'dataset_test.pkl')
print(df_master.shape)

# 4. Load sequential dataset

path/to/data/ 

    ├── class_0/
    │   ├── sample1.pkl
    │   ├── sample2.csv
    │   └── ...
    ├── class_1/
    │   ├── sample1.csv
    │   ├── sample2.pkl
    │   └── ...
    └── ...

dict_label={'class_0':0, 'class_1':1, ... }

In [40]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
import torch
from torchinfo import summary 


class LoadSeqDataset(Dataset):
    def __init__(self, data_path:str, label=1):
        """
        Initialize the dataset.
        
        Args:
            data_path (str): Path to the CSV or Pickle file containing the sequential data.
            label (int): Multiplier label (default is 1); useful if applying additional labeling logic.
        """
        self.label = label
        if '.csv' in data_path:
            self.data = pd.read_csv(data_path)
        elif '.pkl' in data_path:
            self.data = pd.read_pickle(data_path)
        else:
            raise ValueError("Unsupported file format. Please use .csv or .pkl")


    def __len__(self):
        """Return the total number of samples."""
        return len(self.data)

    def __getitem__(self, idx):
        """
        Retrieve a single sample at the specified index.
        
        Args:
            idx (int): Index of the sample to retrieve.
            
        Returns:
            (tuple): (features, target) where target is the label for classification.
        """
        # Extract features and target
        features = self.data.iloc[idx, 1:].values  # All columns except the first as features
        target = self.data.iloc[idx, 0] * self.label  # First column as target

        # Convert to tensors
        features = torch.tensor(features, dtype=torch.float32)
        target = torch.tensor(target, dtype=torch.long)  # Assuming a classification task
        
        return features, target
    

class LoadDataset(Dataset):
    def __init__(self, data_path:str, dict_label = None):
        """
        Load sequential dataset from a directory structure with labeled subdirectories.

            Expected directory structure:
            
            path/to/data/
                ├── class_0/
                │   ├── sample1.pkl 
                │   ├── sample2.csv
                │   └── ...
                ├── class_1/
                │   ├── sample1.csv
                │   ├── sample2.pkl
                │   └── ...
                └── ...

            Label mapping example:
            
            dict_label = {'class_0': 0, 'class_1': 1, ... }
        
        Args:
            data_path (str): Path to the data directory.
            dict_label (dict, optional): Dictionary mapping class folder names to labels.
        """
        if dict_label is None:
            dict_label = {'a': 7, 'b': 6, 'c': 5, 'd': 4, 'e': 3, 'f': 2, 'g': 1}
        
        self.samples = []
        self.class_to_idx = {}

        # Scan data_path for subdirectories        
        for class_name in sorted(os.listdir(data_path)):
            class_dir = os.path.join(data_path, class_name)
            if os.path.isdir(class_dir) and class_name in dict_label:
                label = dict_label[class_name]  # Look up label
                self.class_to_idx[class_name] = label
                for file_name in os.listdir(class_dir):
                    file_path = os.path.join(class_dir, file_name)
                    if os.path.isfile(file_path):
                        self.samples.append((file_path, label))
                        
    def __len__(self):
        """Return the total number of samples."""
        return len(self.samples)

    def __getitem__(self, idx):
        """
        Retrieve a single sample at the specified index.
        
        Args:
            idx (int): Index of the sample to retrieve.
            
        Returns:
            tuple: (file_path, label)
        """
        seq_path, label = self.samples[idx]
        return seq_path, label

In [50]:
data_path = '/home/rzma/myProjects/contactInterpretation/dataset/4features'
dict_label = {'test_dataset_source_robot':0, 'test_dataset_target_robot':1,'test_dataset_target_robot_ur5':2}
test_data = LoadDataset(data_path, dict_label)
seq_path, label = test_data.__getitem__(2)
data = LoadSeqDataset(seq_path, label)
print(data.__getitem__(3)[1])
print(label)
data.data.iloc[3,0]

tensor(10)
2


5.0