In [1]:
import pandas as pd

In [9]:
dcat = pd.read_csv('../assets/dcat3.csv')
print(dcat.isna().sum())
dcat.head()

Class       0
Property    0
Datatype    0
dtype: int64


Unnamed: 0,Class,Property,Datatype
0,dcat:Catalog,dcat:dataset,dcat:Dataset
1,dcat:Catalog,dcat:record,dcat:CatalogRecord
2,dcat:Catalog,dcat:service,dcat:DataService
3,dcat:Catalog,dcat:catalog,dcat:Catalog
4,dcat:Catalog,dcat:themeTaxonomy,skos:ConceptScheme


In [30]:
import pandas as pd
import requests
import io
import numpy as np

# Download the CSV file from GitHub
url = "https://raw.githubusercontent.com/OPCFoundation/UA-Nodeset/CNC-1.00-2017-06-19/Robotics/NodeIds.csv"
response = requests.get(url)
data = response.content.decode('utf-8')
df = pd.read_csv(io.StringIO(data), names=['name', 'id', 'type'])

# Function to split names and create hierarchical structure
def process_opcua_nodes(df):
    # Create a new dataframe to store the processed data
    result_data = []
    
    # Process each row
    for _, row in df.iterrows():
        name_parts = row['name'].split('_')
        node_id = row['id']
        node_type = row['type']
        
        # Create a dictionary with the hierarchical structure
        node_dict = {
            'id': node_id,
            'type': node_type
        }
        
        # Add each level of the hierarchy
        for i, part in enumerate(name_parts):
            node_dict[f'level_{i}'] = part
        
        # Add the number of levels for easier filtering
        node_dict['depth'] = len(name_parts)
        
        # Add the original full name
        node_dict['full_name'] = row['name']
        
        result_data.append(node_dict)
    
    # Create a DataFrame from the processed data
    result_df = pd.DataFrame(result_data)
    
    # Determine the maximum depth
    max_depth = result_df['depth'].max()
    
    # Ensure all rows have the same number of level columns
    for i in range(max_depth):
        if f'level_{i}' not in result_df.columns:
            result_df[f'level_{i}'] = np.nan
    
    # Reorder columns for better readability
    level_cols = [f'level_{i}' for i in range(max_depth)]
    other_cols = ['id', 'type', 'depth', 'full_name']
    result_df = result_df[level_cols + other_cols]
    
    return result_df

# Process the data
processed_df = process_opcua_nodes(df)

# Save to CSV
# processed_df.to_csv('../assets/opcua_robotics_hierarchical.csv', index=False)

# Create a more compact representation focusing on ObjectTypes and their children
def create_object_type_hierarchy(df):
    # Get all ObjectTypes
    object_types = df[df['type'] == 'ObjectType']['level_0'].unique()
    
    hierarchy_data = []
    
    for obj_type in object_types:
        # Get all nodes that have this ObjectType as level_0
        related_nodes = df[df['level_0'] == obj_type]
        
        for _, node in related_nodes.iterrows():
            row_data = {
                'ObjectType': obj_type,
                'id': node['id'],
                'type': node['type'],
                'full_name': node['full_name'],
                'depth': node['depth']
            }
            
            # Add each level
            for i in range(1, int(node['depth'])):
                if f'level_{i}' in node and not pd.isna(node[f'level_{i}']):
                    row_data[f'level_{i}'] = node[f'level_{i}']
            
            hierarchy_data.append(row_data)
    
    hierarchy_df = pd.DataFrame(hierarchy_data)
    return hierarchy_df

# Create the object type hierarchy
hierarchy_df = create_object_type_hierarchy(processed_df)
hierarchy_df.to_csv('../assets/opcua_robotics_object_hierarchy.csv', index=False)

# Print some information about the processed data
print(f"Total nodes: {len(processed_df)}")
print(f"Maximum hierarchy depth: {processed_df['depth'].max()}")
print(f"Object types: {', '.join(processed_df[processed_df['type'] == 'ObjectType']['level_0'].unique())}")

Total nodes: 2133
Maximum hierarchy depth: 9
Object types: MotionDeviceSystemType, ControllerType, MotionDeviceType, TaskControlType, SafetyStateType, LoadType, MotorType, GearType, AxisType, PowerTrainType, EmergencyStopFunctionType, ProtectiveStopFunctionType, AuxiliaryComponentType, DriveType, UserType


In [31]:
hierarchy_df.head()

Unnamed: 0,ObjectType,id,type,full_name,depth,level_1,level_2,level_3,level_4,level_5,level_6,level_7,level_8
0,MotionDeviceSystemType,1002,ObjectType,MotionDeviceSystemType,1,,,,,,,,
1,MotionDeviceSystemType,5001,Object,MotionDeviceSystemType_Controllers,2,Controllers,,,,,,,
2,MotionDeviceSystemType,5002,Object,MotionDeviceSystemType_MotionDevices,2,MotionDevices,,,,,,,
3,MotionDeviceSystemType,5010,Object,MotionDeviceSystemType_SafetyStates,2,SafetyStates,,,,,,,
4,MotionDeviceSystemType,15001,Object,MotionDeviceSystemType_Identification,2,Identification,,,,,,,
