In [6]:
from OCC.Core.STEPControl import STEPControl_Reader
from OCC.Core.IFSelect import IFSelect_RetDone, IFSelect_ItemsByEntity
from OCC.Display.SimpleGui import init_display

# Create a STEP reader
step_reader = STEPControl_Reader()

# Read the STEP file
status = step_reader.ReadFile(r'.\samples_abc\13GZzcN19TySIR0_1.step')

if status == IFSelect_RetDone:  # Check if the file was loaded successfully
    # Load all entities in the STEP file
    step_reader.TransferRoot()
    shape = step_reader.Shape()
    
    # Display the shape
    display, start_display, add_menu, add_function_to_menu = init_display()
    display.DisplayShape(shape, update=True)
    start_display()
else:
    print("Error: could not read STEP file.")

In [2]:
!mkdir step

A subdirectory or file step already exists.


In [3]:
!wget https://deep-geometry.github.io/abc-dataset/data/step_v00.txt


--2025-05-21 22:38:01--  https://deep-geometry.github.io/abc-dataset/data/step_v00.txt
Resolving deep-geometry.github.io (deep-geometry.github.io)... 2606:50c0:8003::153, 2606:50c0:8000::153, 2606:50c0:8001::153, ...
Connecting to deep-geometry.github.io (deep-geometry.github.io)|2606:50c0:8003::153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7600 (7.4K) [text/plain]
Saving to: 'step_v00.txt'

     0K .......                                               100% 1.32M=0.005s

2025-05-21 22:38:01 (1.32 MB/s) - 'step_v00.txt' saved [7600/7600]



In [5]:
!cat step_v00.txt | xargs -n 2 -P 8 sh -c 'wget --no-check-certificate $0 -O step/$1'


'cat' is not recognized as an internal or external command,
operable program or batch file.


In [None]:
cat step_v00.txt | xargs -n 2 -P 8 sh -c 'curl --insecure -o step/$1 $0'

In [1]:
#!/usr/bin/env python3
import os
import shutil
import sys

# Base directory where your data_process/abc_parsed folder is located
# Replace with your actual path - this should point to the abc_parsed folder
base_dir = "./data_process/abc_parsed"

# Output directory where renamed folders and files will be saved
output_dir = "./data_process/abc_parsed_updated"

def rename_folders_sequentially():
    """
    Create renamed copies of folders in sequential order (0002 -> 0000, 0003 -> 0001, etc.)
    """
    try:
        # Create output directory if it doesn't exist
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            print(f"Created output directory: {output_dir}")
        
        # Get all folders and sort them numerically
        folders = [f for f in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, f))]
        folders.sort(key=lambda x: int(x) if x.isdigit() else float('inf'))
        
        print(f"Found folders: {folders}")
        
        # Create mapping of old names to new names
        folder_mapping = {}
        for i, folder in enumerate(folders):
            new_name = f"{i:04d}"  # Format as 4 digits with leading zeros
            folder_mapping[folder] = new_name
            
            # Create the new folder in the output directory
            new_folder_path = os.path.join(output_dir, new_name)
            os.makedirs(new_folder_path, exist_ok=True)
            print(f"Created folder mapping: {folder} -> {new_name}")
        
        return folder_mapping
    
    except Exception as e:
        print(f"Error creating renamed folders: {e}")
        raise

def rename_files_sequentially(folder_mapping):
    """
    Copy and rename files in each folder to follow a sequential pattern (00000000.pkl, etc).
    """
    try:
        # Initialize a global file counter for sequential file naming across all folders
        global_file_counter = 0
        
        # For each folder, copy and rename the files inside
        for old_folder_name, new_folder_name in folder_mapping.items():
            old_folder_path = os.path.join(base_dir, old_folder_name)
            new_folder_path = os.path.join(output_dir, new_folder_name)
            
            # Get all .pkl files in the source folder
            files = [f for f in os.listdir(old_folder_path) if f.endswith('.pkl')]
            files.sort(key=lambda x: int(x.split('.')[0]) if x.split('.')[0].isdigit() else float('inf'))
            
            print(f"Found files in {old_folder_name}: {files}")
            
            # Copy and rename each file using the global counter
            for file in files:
                new_file_name = f"{global_file_counter:08d}.pkl"  # Format as 8 digits with leading zeros
                old_file_path = os.path.join(old_folder_path, file)
                new_file_path = os.path.join(new_folder_path, new_file_name)
                
                print(f"Copying and renaming file: {old_folder_name}/{file} -> {new_folder_name}/{new_file_name}")
                shutil.copy2(old_file_path, new_file_path)  # copy2 preserves metadata
                
                global_file_counter += 1
    
    except Exception as e:
        print(f"Error copying and renaming files: {e}")
        raise

def main():
    """
    Main function to execute the renaming process
    """
    try:
        print("Starting sequential renaming process...")
        
        # Step 1: Create new folders with sequential numbering
        folder_mapping = rename_folders_sequentially()
        
        # Step 2: Copy and rename files with sequential numbering
        rename_files_sequentially(folder_mapping)
        
        print(f"Process completed successfully!")
        print(f"Original folders and files preserved in: {base_dir}")
        print(f"Renamed folders and files saved to: {output_dir}")
        
        # Print the complete mapping for reference
        print("\nFolder mapping summary:")
        for old, new in folder_mapping.items():
            print(f"  {old} → {new}")
    
    except Exception as e:
        print(f"Process failed: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()

Starting sequential renaming process...
Found folders: ['0002', '0003', '0004', '0006', '0007', '0008', '0009', '0021', '0022', '0024', '0031', '0039', '0057', '0058', '0061', '0063', '0064', '0065', '0066', '0067', '0068', '0070', '0072', '0077', '0078', '0079', '0081', '0087', '0088', '0090', '0093', '0102', '0103', '0104', '0105', '0106', '0111', '0112', '0119', '0120', '0123', '0125', '0127', '0128', '0129', '0130', '0131', '0132', '0133', '0134', '0136', '0139', '0140', '0142', '0143', '0144', '0145', '0146', '0153', '0154', '0157', '0159', '0161', '0162', '0163', '0164', '0165', '0166', '0168', '0170', '0171', '0172', '0173', '0174', '0175', '0176', '0177', '0178', '0180', '0181', '0183', '0187', '0188', '0190', '0196', '0206', '0224', '0230', '0237', '0238', '0241', '0243', '0248', '0249', '0250', '0251', '0257', '0258', '0259', '0260', '0261', '0262', '0265', '0266', '0267', '0268', '0269', '0272', '0273', '0274', '0275', '0276', '0280', '0282', '0283', '0284', '0285', '0287', 

In [1]:
import pickle

file = r'C:\Users\prart\CAD-GEN\BrepGen\data_process\abc_data_split_6bit.pkl'

with open(file, 'rb') as f:
    data = pickle.load(f)
data


{'train': ['00000002.pkl',
  '00000004.pkl',
  '00000006.pkl',
  '00000007.pkl',
  '00000008.pkl',
  '00000009.pkl',
  '00000021.pkl',
  '00000022.pkl',
  '00000031.pkl',
  '00000039.pkl',
  '00000057.pkl',
  '00000058.pkl',
  '00000061.pkl',
  '00000063.pkl',
  '00000064.pkl',
  '00000065.pkl',
  '00000066.pkl',
  '00000068.pkl',
  '00000072.pkl',
  '00000077.pkl',
  '00000078.pkl',
  '00000079.pkl',
  '00000081.pkl',
  '00000087.pkl',
  '00000088.pkl',
  '00000090.pkl',
  '00000093.pkl',
  '00000102.pkl',
  '00000103.pkl',
  '00000104.pkl',
  '00000105.pkl',
  '00000106.pkl',
  '00000111.pkl',
  '00000112.pkl',
  '00000119.pkl',
  '00000120.pkl',
  '00000123.pkl',
  '00000125.pkl',
  '00000127.pkl',
  '00000129.pkl',
  '00000131.pkl',
  '00000133.pkl',
  '00000134.pkl',
  '00000136.pkl',
  '00000139.pkl',
  '00000140.pkl',
  '00000142.pkl',
  '00000143.pkl',
  '00000144.pkl',
  '00000145.pkl',
  '00000146.pkl',
  '00000153.pkl',
  '00000154.pkl',
  '00000157.pkl',
  '00000159.pkl',
 

In [6]:
import os

root = r'C:\Users\prart\CAD-GEN\BrepGen\data_process\abc_parsed'
dirs = os.listdir(root)

new_root = r'C:\Users\prart\CAD-GEN\BrepGen\data_process\abc_parsed_updated2\0000'
os.mkdir(new_root)


['0002',
 '0003',
 '0004',
 '0006',
 '0007',
 '0008',
 '0009',
 '0021',
 '0022',
 '0024',
 '0031',
 '0039',
 '0057',
 '0058',
 '0061',
 '0063',
 '0064',
 '0065',
 '0066',
 '0067',
 '0068',
 '0070',
 '0072',
 '0077',
 '0078',
 '0079',
 '0081',
 '0087',
 '0088',
 '0090',
 '0093',
 '0102',
 '0103',
 '0104',
 '0105',
 '0106',
 '0111',
 '0112',
 '0119',
 '0120',
 '0123',
 '0125',
 '0127',
 '0128',
 '0129',
 '0130',
 '0131',
 '0132',
 '0133',
 '0134',
 '0136',
 '0139',
 '0140',
 '0142',
 '0143',
 '0144',
 '0145',
 '0146',
 '0153',
 '0154',
 '0157',
 '0159',
 '0161',
 '0162',
 '0163',
 '0164',
 '0165',
 '0166',
 '0168',
 '0170',
 '0171',
 '0172',
 '0173',
 '0174',
 '0175',
 '0176',
 '0177',
 '0178',
 '0180',
 '0181',
 '0183',
 '0187',
 '0188',
 '0190',
 '0196',
 '0206',
 '0224',
 '0230',
 '0237',
 '0238',
 '0241',
 '0243',
 '0248',
 '0249',
 '0250',
 '0251',
 '0257',
 '0258',
 '0259',
 '0260',
 '0261',
 '0262',
 '0265',
 '0266',
 '0267',
 '0268',
 '0269',
 '0272',
 '0273',
 '0274',
 '0275',
 

In [2]:
import os
import json
import random
import math

def list_pkl_files(parent_folder):
    # List to store all .pkl files in the format "child_folder/file" (without .pkl extension)
    all_pkl_files = []
    
    # Check if the parent folder exists
    if not os.path.exists(parent_folder):
        print(f"Error: Parent folder '{parent_folder}' does not exist")
        return []
    
    # Get all items in the parent folder
    items = os.listdir(parent_folder)
    
    # Filter for directories (child folders)
    child_folders = [item for item in items if os.path.isdir(os.path.join(parent_folder, item))]
    
    # For each child folder, list pkl files
    for folder in child_folders:
        folder_path = os.path.join(parent_folder, folder)
        
        try:
            files = os.listdir(folder_path)
            # Filter for .pkl files and format as requested
            for file in files:
                if file.endswith('.pkl'):
                    # Remove the .pkl extension
                    file_without_extension = file[:-4]
                    all_pkl_files.append(f"{folder}/{file_without_extension}")
        except Exception as e:
            print(f"Error processing folder {folder}: {str(e)}")
    
    return all_pkl_files

def split_dataset(all_files, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15, seed=42):
    """
    Split the dataset into train, validation, and test sets.
    
    Parameters:
    - all_files: List of all files
    - train_ratio: Proportion for training set (default 70%)
    - val_ratio: Proportion for validation set (default 15%)
    - test_ratio: Proportion for test set (default 15%)
    - seed: Random seed for reproducibility
    
    Returns:
    - Dictionary with train, val, and test splits as lists
    """
    # Validate ratios
    if abs(train_ratio + val_ratio + test_ratio - 1.0) > 1e-10:
        raise ValueError("Ratios must sum to 1")
    
    # Set random seed for reproducibility
    random.seed(seed)
    
    # Shuffle the file list
    shuffled_files = all_files.copy()
    random.shuffle(shuffled_files)
    
    # Calculate split indices
    n_files = len(shuffled_files)
    n_train = math.floor(n_files * train_ratio)
    n_val = math.floor(n_files * val_ratio)
    
    # Split the dataset
    train_files = shuffled_files[:n_train]
    val_files = shuffled_files[n_train:n_train + n_val]
    test_files = shuffled_files[n_train + n_val:]
    
    # Create result dictionary with lists instead of dictionaries
    split_result = {
        "train": train_files,
        "val": val_files,
        "test": test_files
    }
    
    return split_result

def main():
    # Use the specific parent folder path
    parent_folder = "data_process/abc_parsed_updated"
    
    # Get all .pkl files (without .pkl extension)
    all_files = list_pkl_files(parent_folder)
    print(f"Found {len(all_files)} .pkl files in total")
    
    # Split the dataset (70% train, 15% val, 15% test)
    splits = split_dataset(all_files, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)
    
    # Convert to JSON and print
    json_output = json.dumps(splits, indent=4)
    print(json_output)
    
    # Save to a file
    output_file = "dataset_split.json"
    with open(output_file, 'w') as f:
        f.write(json_output)
    print(f"Output saved to {output_file}")

if __name__ == "__main__":
    main()

Found 4776 .pkl files in total
{
    "train": [
        "1678/00001678",
        "4426/00004426",
        "2217/00002217",
        "0750/00000750",
        "2315/00002315",
        "4382/00004382",
        "1290/00001290",
        "3707/00003707",
        "1911/00001911",
        "3338/00003338",
        "2945/00002945",
        "1471/00001471",
        "0963/00000963",
        "3918/00003918",
        "3709/00003709",
        "0417/00000417",
        "3759/00003759",
        "0351/00000351",
        "4521/00004521",
        "2204/00002204",
        "3494/00003494",
        "1778/00001778",
        "2093/00002093",
        "2678/00002678",
        "0657/00000657",
        "3118/00003118",
        "0425/00000425",
        "3939/00003939",
        "0695/00000695",
        "4590/00004590",
        "0438/00000438",
        "4700/00004700",
        "1057/00001057",
        "0478/00000478",
        "2003/00002003",
        "0551/00000551",
        "0126/00000126",
        "0167/00000167",
  

In [None]:
!mkdir step

A subdirectory or file step already exists.


In [None]:
!mkdir step

A subdirectory or file step already exists.


In [None]:
!mkdir step

A subdirectory or file step already exists.


In [None]:
!mkdir step

A subdirectory or file step already exists.


In [None]:
!mkdir step

A subdirectory or file step already exists.
