In [4]:
import pandas as pd

pd.set_option('display.width', 140)

DATA_PATH = 'data/igus/lead_screw_nut'

In [None]:
# Path to your CSV file
import os

csv_file_path = os.path.join(DATA_PATH, 'all.csv')

# Read the CSV into a DataFrame
df = pd.read_csv(csv_file_path)

# Initialize an empty list to store the parsed data
data = []

# Remove square brackets and then split string by comma
def parse_string(s):
  return [float(x) for x in s.strip('[]').split(',')]

# Iterate through each row in the dataframe and parse the string values
for index, row in df.iterrows():
  x_values = parse_string(row['point_cloud_5000_x'])
  y_values = parse_string(row['point_cloud_5000_y'])
  z_values = parse_string(row['point_cloud_5000_z'])
  
  data.append([x_values, y_values, z_values])


In [None]:
# Output the parsed data
print(len(data))
print(len(data[0]))
print(len(data[0][0]))

31
3
5000


In [None]:
import os

# Get the parent folder name
parent_folder = os.path.basename(DATA_PATH)

# Create a directory if it doesn't exist
output_dir = DATA_PATH
os.makedirs(output_dir, exist_ok=True)

# Save each row to a separate file
for i, row in enumerate(data):
    with open(os.path.join(output_dir, f'{parent_folder}_{i}.txt'), 'w') as file:
        for values in zip(row[0], row[1], row[2]):
            file.write(','.join(map(str, values)) + '\n')

In [8]:
import os

data_path_name = os.path.dirname(DATA_PATH)
folders = [f for f in os.listdir(data_path_name) if os.path.isdir(os.path.join(data_path_name, f))]

# Write the folder names to a file
with open(os.path.join(data_path_name, 'igus_shape_names.txt'), 'w') as file:
  for folder in folders:
    file.write(folder + '\n')





In [49]:

from sklearn.model_selection import train_test_split
from natsort import natsorted
import re

DATA_PATH = 'data/igus'
# Define the output file path
output_file = os.path.join(DATA_PATH, 'filelist.txt')

# Clean up the output files if it already exists
if os.path.exists(output_file):
    os.remove(output_file)

if os.path.exists(os.path.join(DATA_PATH, 'igus_train.txt')):
    os.remove(os.path.join(DATA_PATH, 'igus_train.txt'))

if os.path.exists(os.path.join(DATA_PATH, 'igus_test.txt')):
    os.remove(os.path.join(DATA_PATH, 'igus_test.txt'))


# Traverse the directory tree and find all files
with open(output_file, 'w') as f_filelist:
  for root, dirs, files in os.walk(DATA_PATH):
    if root == DATA_PATH:
        # Skip the first level of the directory tree
        print('Skipping', root)
        continue
    sorted_files = natsorted(files)
    X = []
    y = []
    for file in sorted_files:
        # Write the file path relative to DATA_PATH to the output file
        file_path = os.path.join(root, file)[len(DATA_PATH) + 1:]
        f_filelist.write(file_path + '\n')
        match = re.match(r"(.+)/(.+)", file_path)

        if match:
            # Get the shape name
            shape_name = match.group(1)
            # Get the file name
            file_name = match.group(2)
            # Append the shape name to the list
            y.append(shape_name)
            # Append the file path to the list
            X.append(file_name)
        else:
            # throw an error since the regex didn't match
            raise ValueError(f"Regex didn't match for file: {file_path}")
        
    # Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # Write the training data to a file
    with open(os.path.join(DATA_PATH, 'igus_train.txt'), 'a') as f:
        for x in natsorted(X_train):
            f.write(x + '\n')

    # Write the validation data to a file
    with open(os.path.join(DATA_PATH, 'igus_test.txt'), 'a') as f:
        for x in natsorted(X_val):
            f.write(x + '\n')

Skipping data/igus
