In [1]:
# -*- coding: utf-8 -*-
"""
Analysis and Visualization of Dymola Simulation Results
=================================================

This module processes and visualizes simulation results from Dymola for UESGraphs-generated 
district heating network models. It demonstrates the analysis workflow from loading 
simulation data to creating network visualizations with key performance indicators.

Prerequisites:
------------
1. Network Definition:
   - Requires a network definition file (nodes.json) generated by example e11
   - Contains network topology and component parameters

2. Simulation Data:
   - Dymola simulation results in either .mat or .parquet.gzip format
   - Example data provided in uesgraphs/data/
   - Results must be from AixLib v2.1.0 models


Features:
--------
- Loads network topology from JSON
- Processes Dymola simulation results
- Assigns time series data to network components
- Generates network visualizations with:
  * Mass flow distribution
  * Pressure distribution
  * Time-averaged performance indicators

Data Processing:
-------------
1. Data Format Handling:
   - Checks for compressed data in .parquet.gzip format
   - If only .mat file exists:
     * Converts data using mat_handler.py
     * Stores intermediate results in compressed .parquet.gzip format
     * Uses compressed format for further processing
   - Subsequent runs use cached .parquet.gzip file for improved performance

2. Data Assignment:
   - Maps time series data to network components
   - Processes both extensive and intensive properties
   - Calculates time averages for visualization

Required Input Files:
------------------
1. nodes.json: Network topology and parameters
2. simulation_results.mat or parquet with .gzip compression: Dymola simulation output
    (If only .mat is available, will be converted to .gzip with mat_handler.py)

Output:
------
1. Network visualizations:
    - Mean Mass flow distribution (m_flow.png)
    - Mean Pressure distribution (press_flow.png)

Notes:
-----
- Simulation period can be specified (default: one week)
- Visualization uses time-averaged values for certain properties
- AixLib version must be specified for correct data mapping
- All paths are relative to the workspace directory
"""




In [2]:

import uesgraphs as ug

import uesgraphs.analyze as analyze
from uesgraphs.examples import e1_example_readme as e1

import os

from datetime import datetime


workspace = e1.workspace_example("e14")

dir_ues = os.path.dirname(os.path.dirname(workspace))
pinola_json = os.path.join(dir_ues, "workspace", "e11", "inputs","test_modelgen", "Pinola", "nodes.json")
pinola_sim_data = os.path.join(dir_ues,"uesgraphs","data","Pinola_low_temp_network_inputs.mat")

if not os.path.exists(pinola_json):
    raise FileNotFoundError(f"File {pinola_json} not found."
                            "Please run example e11 to generate network topology.")

graph = ug.UESGraph()
graph.from_json(path = pinola_json, network_type="heating")
graph.graph["name"] = "pinola"
graph.graph["supply_type"] = "supply"

start_date=datetime(2024, 1, 1) 
end_date=datetime(2024, 1, 7)

graph = analyze.assign_data_to_uesgraphs(graph,sim_data = pinola_sim_data,
                                            start_date=start_date,
                                            end_date=end_date,
                                            aixlib_version="2.1.0") #aixlib version is needed to assign data properly



read nodes...
******
 input_ids were {'buildings': None, 'nodes': '24f91801-215e-4b3d-9426-4ec51de13368', 'pipes': None, 'supplies': None}
...finished
Processing: D:\rka-lko\git\uesgraphs\uesgraphs\data\Pinola_low_temp_network_inputs.gzip
Assignment of pressure to nodes completed


In [3]:
import os

target_dir = r"D:\rka-lko\work\2025_04_analysis"

# Check if directory exists
if not os.path.exists(target_dir):
    print(f"Directory does not exist: {target_dir}")
    
# Check if we can write to the directory
test_file = os.path.join(target_dir, "test_permissions.txt")
try:
    with open(test_file, 'w') as f:
        f.write("Test")
    os.remove(test_file)  # Clean up
    print("Directory is writable")
except PermissionError:
    print("Permission denied - cannot write to directory")
except Exception as e:
    print(f"Other error: {e}")

Directory is writable


In [4]:
from pathlib import Path

output_dir = Path(r"D:\rka-lko\work\2025_04_analysis")
output_file = output_dir / "report.txt"

# Versuche ein alternatives Verzeichnis, falls notwendig
if not output_dir.exists() or not os.access(output_dir, os.W_OK):
    output_file = Path.home() / "Documents" / "report.txt"
    
# Sicherstellen, dass das übergeordnete Verzeichnis existiert
output_file.parent.mkdir(parents=True, exist_ok=True)

try:
    output_file.write_text(report_content)
    print(f"Report erfolgreich gespeichert unter: {output_file}")
except Exception as e:
    print(f"Fehler beim Schreiben: {e}")

Fehler beim Schreiben: name 'report_content' is not defined


In [5]:
target_f = r"D:\rka-lko\work\2025_04_analysis"

analyze.generate_graph_data_html_report(graph,target_f)

Report successfully saved to: D:\rka-lko\work\2025_04_analysis\report.md


In [6]:
edge1 = list(graph.edges)[0]
graph.get_edge_data(edge1[0],edge1[1])

{'diameter': 0.0825,
 'length': 155,
 'pipeID': '1',
 'name': '1',
 'node_0': 'S1',
 'node_1': 'N1',
 'm_flow_nom': 0.3,
 'fac': 1,
 'dIns': 0.045,
 'kIns': 0.024,
 'm_flow': DateTime
 2024-01-01 00:00:00    0.196914
 2024-01-01 01:00:00    0.136173
 2024-01-01 02:00:00    0.132721
 2024-01-01 03:00:00    0.138158
 2024-01-01 04:00:00    0.207043
                          ...   
 2024-01-06 20:00:00    0.211615
 2024-01-06 21:00:00    0.206302
 2024-01-06 22:00:00    0.145928
 2024-01-06 23:00:00    0.155562
 2024-01-07 00:00:00    0.093189
 Freq: h, Name: networkModel.pipe1.port_a.m_flow, Length: 145, dtype: float32,
 'press_drop': DateTime
 2024-01-01 00:00:00    5732.5625
 2024-01-01 01:00:00    2741.4375
 2024-01-01 02:00:00    2604.1875
 2024-01-01 03:00:00    2821.9375
 2024-01-01 04:00:00    6337.5000
                          ...    
 2024-01-06 20:00:00    6620.4375
 2024-01-06 21:00:00    6292.2500
 2024-01-06 22:00:00    3148.3125
 2024-01-06 23:00:00    3577.6875
 2024-01-0

In [7]:
def analyze_node_types(graph):
    """
    Analyze and categorize different node types in the graph with their attributes.
    
    This function identifies different node types and their specific attributes,
    which is helpful for understanding the structure of the graph.
    
    Parameters:
        graph: NetworkX graph with simulation data
        
    Returns:
        dict: Dictionary with node types as keys and their specific attributes as values
    """
    node_types = {}
    
    # Iterate through all nodes
    for _, data in graph.nodes(data=True):
        node_type = data.get('node_type', 'unknown')
        
        # Initialize node type if not seen before
        if node_type not in node_types:
            node_types[node_type] = {
                'count': 0,
                'attributes': set(),
                'timeseries_attributes': set()
            }
        
        # Count this node
        node_types[node_type]['count'] += 1
        
        # Add all attributes
        for key, value in data.items():
            node_types[node_type]['attributes'].add(key)
            
            # Try to detect time series data
            if hasattr(value, '__len__') and not isinstance(value, (str, dict)):
                try:
                    if len(value) > 1:
                        node_types[node_type]['timeseries_attributes'].add(key)
                except TypeError:
                    # Not a sequence with length
                    pass
    
    return node_types

In [8]:
noide = analyze_node_types(graph)
noide.keys()

dict_keys(['building', 'network_heating'])

In [9]:

for _, data in graph.nodes(data=True):
    print(data.get('node_type', 'unknown') in noide.keys())

True
True
True
True
True
True
True
True


In [10]:
sim_dat = os.path.join(dir_ues,"uesgraphs","data","Pinola_low_temp_network_inputs.gzip")
sim_dat

'D:\\rka-lko\\git\\uesgraphs\\uesgraphs\\data\\Pinola_low_temp_network_inputs.gzip'

In [11]:
extract_units_from_parquet(sim_dat)

NameError: name 'extract_units_from_parquet' is not defined

In [None]:
parquet_file = pq.ParquetFile(sim_dat)
units_dict = {}

In [None]:
for key, value in parquet_file.metadata.metadata.items():
    i=5


b'ARROW:schema' b'/////zhfJAAQAAAAAAAKAA4ABgAFAAgACgAAAAABBAAQAAAAAAAKAAwAAAAEAAgACgAAAHTgGAAEAAAAAQAAAAwAAAAIAAwABAAIAAgAAABM4BgABAAAAD/gGAB7ImluZGV4X2NvbHVtbnMiOiBbIlRpbWUiXSwgImNvbHVtbl9pbmRleGVzIjogW3sibmFtZSI6IG51bGwsICJmaWVsZF9uYW1lIjogbnVsbCwgInBhbmRhc190eXBlIjogInVuaWNvZGUiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiB7ImVuY29kaW5nIjogIlVURi04In19XSwgImNvbHVtbnMiOiBbeyJuYW1lIjogIm5ldHdvcmtNb2RlbC5zdXBwbHlTMS5hbGxvd0Zsb3dSZXZlcnNhbCIsICJmaWVsZF9uYW1lIjogIm5ldHdvcmtNb2RlbC5zdXBwbHlTMS5hbGxvd0Zsb3dSZXZlcnNhbCIsICJwYW5kYXNfdHlwZSI6ICJmbG9hdDMyIiwgIm51bXB5X3R5cGUiOiAiZmxvYXQzMiIsICJtZXRhZGF0YSI6IG51bGx9LCB7Im5hbWUiOiAibmV0d29ya01vZGVsLnN1cHBseVMxLnBvcnRfYS5wIiwgImZpZWxkX25hbWUiOiAibmV0d29ya01vZGVsLnN1cHBseVMxLnBvcnRfYS5wIiwgInBhbmRhc190eXBlIjogImZsb2F0MzIiLCAibnVtcHlfdHlwZSI6ICJmbG9hdDMyIiwgIm1ldGFkYXRhIjogbnVsbH0sIHsibmFtZSI6ICJuZXR3b3JrTW9kZWwuc3VwcGx5UzEucG9ydF9hLmhfb3V0ZmxvdyIsICJmaWVsZF9uYW1lIjogIm5ldHdvcmtNb2RlbC5zdXBwbHlTMS5wb3J0X2EuaF9vdXRmbG93IiwgInBhbmRhc190eXBlIjogIm

In [None]:
EDGE_ATTRIBUTES = {
    # Static attributes
    "diameter": {"unit": None, "description": "Pipe inner diameter", "is_timeseries": False},
    "length": {"unit": None, "description": "Pipe length", "is_timeseries": False},
    "pipeID": {"unit": None, "description": "Pipe identifier", "is_timeseries": False},
    "name": {"unit": None, "description": "Pipe name", "is_timeseries": False},
    "node_0": {"unit": None, "description": "Start node identifier", "is_timeseries": False},
    "node_1": {"unit": None, "description": "End node identifier", "is_timeseries": False},
    "dIns": {"unit": "m", "description": "Insulation diameter", "is_timeseries": False},
    "kIns": {"unit": "W/(m·K)", "description": "Insulation thermal conductivity", "is_timeseries": False},
    "m_flow_nom": {"unit": "kg/s", "description": "Nominal mass flow rate", "is_timeseries": False},
    "fac": {"unit": None, "description": "Flow factor", "is_timeseries": False},
    
    # Time series attributes
    "m_flow": {"unit": None, "description": "Mass flow rate", "is_timeseries": True},
    "press_drop": {"unit": None, "description": "Pressure drop", "is_timeseries": True},
    "press_drop_length": {"unit": None, "description": "Pressure drop per length", "is_timeseries": True},
    "temp_diff": {"unit": None, "description": "Temperature difference", "is_timeseries": True},
    #"asd": {"unit": None, "description": "asd", "is_timeseries": True}
}

In [None]:
NODE_ATTRIBUTES = {
    # Static attributes
    "node_type": {"unit": None, "description": "Type of the network node", "is_timeseries": False},
    "network_id": {"unit": None, "description": "Network identifier", "is_timeseries": False},
    "position": {"unit": None, "description": "Geographical position coordinates", "is_timeseries": False},
    "name": {"unit": None, "description": "Node name", "is_timeseries": False},
    "press_name": {"unit": None, "description": "Reference name for pressure data", "is_timeseries": False},
    "temp_name": {"unit": None, "description": "Reference name for temperature data", "is_timeseries": False},
    
    # Time series attributes
    "press_flow": {"unit": "Pa", "description": "Pressure flow time series", "is_timeseries": True},
    "temperature_supply": {"unit": "K", "description": "Supply temperature time series", "is_timeseries": True}
}

In [None]:
NODE_ATTRIBUTES = {
    # Common static attributes
    "name": {"unit": None, "description": "Node name identifier", "is_timeseries": False},
    "node_type": {"unit": None, "description": "Type of the node (network_heating, building, etc.)", "is_timeseries": False},
    "position": {"unit": None, "description": "Geographical position coordinates", "is_timeseries": False},
    
    # Network nodes specific attributes
    "network_id": {"unit": None, "description": "Network identifier", "is_timeseries": False},
    "press_name": {"unit": None, "description": "Reference name for pressure data", "is_timeseries": False},
    "temp_name": {"unit": None, "description": "Reference name for temperature data", "is_timeseries": False},
    "press_flow": {"unit": "Pa", "description": "Pressure flow time series", "is_timeseries": True},
    "temperature_supply": {"unit": "K", "description": "Supply temperature time series", "is_timeseries": True},
    
    # Supply nodes specific attributes
    "is_supply_heating": {"unit": None, "description": "Flag indicating if node is a heating supply", "is_timeseries": False},
    "is_supply_cooling": {"unit": None, "description": "Flag indicating if node is a cooling supply", "is_timeseries": False},
    "is_supply_electricity": {"unit": None, "description": "Flag indicating if node is an electricity supply", "is_timeseries": False},
    "is_supply_gas": {"unit": None, "description": "Flag indicating if node is a gas supply", "is_timeseries": False},
    "is_supply_other": {"unit": None, "description": "Flag indicating if node is another type of supply", "is_timeseries": False},
    "p_supply": {"unit": "Pa", "description": "Supply pressure values", "is_timeseries": True},
    "T_supply": {"unit": "K", "description": "Supply temperature values", "is_timeseries": True}
}

def print_available_data(graph, element_type="edges") -> None:
    """
    Print available data attributes in the graph with their descriptions and highlight discrepancies.
    
    This function compares the actual attributes present in the graph elements (edges or nodes)
    with those defined in the schema, identifying missing and undocumented attributes.
    
    Parameters:
        graph: NetworkX graph with simulation data
        element_type: Type of graph elements to analyze ("edges" or "nodes")
    """
    if element_type.lower() not in ["edges", "nodes"]:
        raise ValueError("element_type must be 'edges' or 'nodes'")
    
    # Select the appropriate attributes schema and graph elements
    if element_type.lower() == "edges":
        attributes_schema = EDGE_ATTRIBUTES
        element_name = "EDGE"
        # Get actual edge attributes present in the graph
        actual_attrs = set()
        for _, _, data in graph.edges(data=True):
            actual_attrs.update(data.keys())
    else:  # nodes
        attributes_schema = NODE_ATTRIBUTES
        element_name = "NODE"
        # Get actual node attributes present in the graph
        actual_attrs = set()
        for _, data in graph.nodes(data=True):
            actual_attrs.update(data.keys())
    
    # Get expected attributes from schema
    expected_attrs = set(attributes_schema.keys())
    
    # Calculate missing and unexpected attributes
    missing_attrs = expected_attrs - actual_attrs
    unexpected_attrs = actual_attrs - expected_attrs
    
    print(f"\n{element_name} ATTRIBUTES:")
    print("-" * 80)
    print(f"{'Attribute':<20} | {'Unit':<10} | {'Type':<15} | Description")
    print("-" * 80)
    
    # Print attributes that are both in the graph and in the schema
    for attr in sorted(actual_attrs & expected_attrs):
        info = attributes_schema[attr]
        data_type = "Time series" if info["is_timeseries"] else "Static value"
        unit = info["unit"] if info["unit"] else "-"
        print(f"{attr:<20} | {unit:<10} | {data_type:<15} | {info['description']}")
    
    # Print attributes found in graph but not in schema
    if unexpected_attrs:
        print("\nUNDOCUMENTED ATTRIBUTES (found in graph but not in schema):")
        print("-" * 80)
        for attr in sorted(unexpected_attrs):
            print(f"{attr:<20} | {'?':<10} | {'Unknown':<15} | Not documented in schema")
    
    # Print attributes in schema but not found in graph
    if missing_attrs:
        print("\nMISSING ATTRIBUTES (defined in schema but not found in graph):")
        print("-" * 80)
        for attr in sorted(missing_attrs):
            info = attributes_schema[attr]
            data_type = "Time series" if info["is_timeseries"] else "Static value"
            unit = info["unit"] if info["unit"] else "-"
            print(f"{attr:<20} | {unit:<10} | {data_type:<15} | {info['description']}")
    
    print("\n")


def analyze_node_types(graph):
    """
    Analyze and categorize different node types in the graph with their attributes.
    
    This function identifies different node types and their specific attributes,
    which can be helpful for building a more comprehensive attribute schema.
    
    Parameters:
        graph: uesgraphs graph with simulation data
        
    Returns:
        dict: Dictionary with node types as keys and their specific attributes as values
    """
    node_types = {}
    
    # Iterate through all nodes
    for _, data in graph.nodes(data=True):
        node_type = data.get('node_type', 'unknown')
        
        # Initialize node type if not seen before
        if node_type not in node_types:
            node_types[node_type] = {
                'count': 0,
                'attributes': set(),
                'timeseries_attributes': set()
            }
        
        # Count this node
        node_types[node_type]['count'] += 1
        
        # Add all attributes
        for key, value in data.items():
            node_types[node_type]['attributes'].add(key)
            
            # Try to detect time series data
            if hasattr(value, '__len__') and not isinstance(value, (str, dict)):
                try:
                    if len(value) > 1:
                        node_types[node_type]['timeseries_attributes'].add(key)
                except TypeError:
                    # Not a sequence with length
                    pass
    
    # Print analysis results
    print("\nNODE TYPE ANALYSIS:")
    print("-" * 80)
    for node_type, info in sorted(node_types.items()):
        print(f"Node Type: {node_type} (Count: {info['count']})")
        print(f"  Static Attributes: {', '.join(sorted(info['attributes'] - info['timeseries_attributes']))}")
        print(f"  Time Series Attributes: {', '.join(sorted(info['timeseries_attributes']))}")
        print("-" * 80)
    
    return node_types


def create_node_attributes_schema(graph):
    """
    Create a comprehensive NODE_ATTRIBUTES schema based on actual graph data.
    
    This function analyzes the graph and creates a detailed schema of all node attributes
    found, attempting to detect time series data and provide meaningful descriptions.
    
    Parameters:
        graph: NetworkX graph with simulation data
        
    Returns:
        dict: Dictionary with attribute names as keys and their properties as values
    """
    # Analyze node types to get a better understanding of attributes
    node_types = analyze_node_types(graph)
    
    # Create a comprehensive schema
    schema = {}
    
    # Collect all attributes across all node types
    all_attrs = set()
    all_timeseries_attrs = set()
    
    for node_type_info in node_types.values():
        all_attrs.update(node_type_info['attributes'])
        all_timeseries_attrs.update(node_type_info['timeseries_attributes'])
    
    # Sample nodes to extract units if possible
    sample_nodes = {}
    for node, data in graph.nodes(data=True):
        node_type = data.get('node_type', 'unknown')
        if node_type not in sample_nodes:
            sample_nodes[node_type] = data
    
    # Build the schema
    for attr in sorted(all_attrs):
        is_timeseries = attr in all_timeseries_attrs
        
        # Try to infer unit based on attribute name
        unit = None
        if any(keyword in attr.lower() for keyword in ['temp', 'temperature', 't_']):
            unit = "K"
        elif any(keyword in attr.lower() for keyword in ['press', 'pressure', 'p_']):
            unit = "Pa"
        elif any(keyword in attr.lower() for keyword in ['flow', 'mass']):
            unit = "kg/s"
        
        # Try to create a meaningful description
        description = " ".join(word.capitalize() for word in attr.split('_'))
        
        # Add to schema
        schema[attr] = {
            "unit": unit,
            "description": description,
            "is_timeseries": is_timeseries
        }
    
    return schema

# Example usage:
# print_available_data(graph, "nodes")  # For analyzing node attributes
# print_available_data(graph, "edges")  # For analyzing edge attributes
# node_types = analyze_node_types(graph)  # Analyze different node types and their attributes
# auto_schema = create_node_attributes_schema(graph)  # Create a comprehensive schema automatically

In [None]:
def generate_graph_data_report(graph, output_path=None, node_types=None):
    """
    Generate a comprehensive report on the graph data, attributes, and validation results.
    Includes both edge and node analysis.
    
    Parameters:
        graph: UESGraph with simulation data
        output_path: Path to save the report (if None, will print to console)
        node_types: List of node types to include in the report (default: ["heating"])
    """
    from datetime import datetime
    import os
    
    # Use default node types if none specified
    if node_types is None:
        node_types = ["heating"]
    
    # Build report content
    report_lines = []
    report_lines.append("# Graph Data Report")
    report_lines.append(f"\nGenerated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Graph summary section
    report_lines.append(f"\n## Graph Summary")
    
    # Custom handling for UESGraph node counting
    for node_type in node_types:
        try:
            num_nodes = graph.number_of_nodes(node_type=node_type)
            report_lines.append(f"- Number of {node_type} nodes: {num_nodes}")
        except Exception as e:
            report_lines.append(f"- Error counting {node_type} nodes: {str(e)}")
    
    # Handle edge counting - may need to adapt based on your class implementation
    try:
        # First try without parameters
        num_edges = len(list(graph.edges()))
        report_lines.append(f"- Number of edges: {num_edges}")
    except Exception as e:
        report_lines.append(f"- Error counting edges: {str(e)}")
    
    if hasattr(graph, "graph") and isinstance(graph.graph, dict) and "supply_type" in graph.graph:
        report_lines.append(f"- Supply type: {graph.graph['supply_type']}")
    
    # Analyze node types
    node_type_analysis = analyze_node_types(graph)
    report_lines.append(f"- Node types identified: {len(node_type_analysis)}")
    for nt, info in node_type_analysis.items():
        report_lines.append(f"  - {nt}: {info['count']} nodes")
    
    #------------------------#
    # EDGE ATTRIBUTES ANALYSIS
    #------------------------#
    report_lines.append(f"\n## EDGE ATTRIBUTES ANALYSIS")
    
    # Get actual edge attributes
    actual_edge_attrs = set()
    for u, v, data in graph.edges(data=True):
        actual_edge_attrs.update(data.keys())
    
    # Get expected attributes from schema
    expected_edge_attrs = set(EDGE_ATTRIBUTES.keys())
    
    # Calculate missing and unexpected attributes
    missing_edge_attrs = expected_edge_attrs - actual_edge_attrs
    unexpected_edge_attrs = actual_edge_attrs - expected_edge_attrs
    
    # Edge attribute summary section
    report_lines.append(f"\n### Edge Attribute Summary")
    report_lines.append(f"- Total attributes in schema: {len(expected_edge_attrs)}")
    report_lines.append(f"- Total attributes in graph: {len(actual_edge_attrs)}")
    report_lines.append(f"- Attributes in both: {len(actual_edge_attrs & expected_edge_attrs)}")
    report_lines.append(f"- Undocumented attributes: {len(unexpected_edge_attrs)}")
    report_lines.append(f"- Missing attributes: {len(missing_edge_attrs)}")
    
    # Documented edge attributes section
    report_lines.append(f"\n### Documented Edge Attributes")
    report_lines.append(f"Attributes that are known and expected by this script.")
    report_lines.append(f"| Attribute | Unit | Type | Description |")
    report_lines.append(f"|-----------|------|------|-------------|")
    
    for attr in sorted(actual_edge_attrs & expected_edge_attrs):
        info = EDGE_ATTRIBUTES[attr]
        data_type = "Time series" if info["is_timeseries"] else "Static value"
        unit = info["unit"] if info["unit"] else "-"
        report_lines.append(f"| {attr} | {unit} | {data_type} | {info['description']} |")
    
    # Undocumented edge attributes section
    if unexpected_edge_attrs:
        report_lines.append(f"\n### Undocumented Edge Attributes")
        report_lines.append(f"| Attribute | Count | Example Value |")
        report_lines.append(f"|-----------|-------|---------------|")
        
        # Get example values and counts
        attr_examples = {}
        attr_counts = {attr: 0 for attr in unexpected_edge_attrs}
        
        for u, v, data in graph.edges(data=True):
            for attr in unexpected_edge_attrs:
                if attr in data:
                    attr_counts[attr] += 1
                    if attr not in attr_examples and data[attr] is not None:
                        # Safely convert to string and truncate long values
                        try:
                            example_value = str(data[attr])[:50]
                            if len(str(data[attr])) > 50:
                                example_value += "..."
                            attr_examples[attr] = example_value
                        except:
                            attr_examples[attr] = "[Complex data]"
        
        for attr in sorted(unexpected_edge_attrs):
            example = attr_examples.get(attr, "None")
            report_lines.append(f"| {attr} | {attr_counts[attr]} | {example} |")
    
    # Missing edge attributes section
    if missing_edge_attrs:
        report_lines.append(f"\n### Missing Edge Attributes")
        report_lines.append(f"| Attribute | Unit | Type | Description |")
        report_lines.append(f"|-----------|------|------|-------------|")
        for attr in sorted(missing_edge_attrs):
            info = EDGE_ATTRIBUTES[attr]
            data_type = "Time series" if info["is_timeseries"] else "Static value"
            unit = info["unit"] if info["unit"] else "-"
            report_lines.append(f"| {attr} | {unit} | {data_type} | {info['description']} |")
    
    #------------------------#
    # NODE ATTRIBUTES ANALYSIS
    #------------------------#
    report_lines.append(f"\n## NODE ATTRIBUTES ANALYSIS")
    
    # Get all nodes of the specified types
    analyzed_nodes = []
    for _, data in graph.nodes(data=True):
        node_type_value = data.get('node_type', 'unknown')
        if node_type_value in node_types or not node_types:  # Include if in specified types or if no types specified
            analyzed_nodes.append(data)
    
    if not analyzed_nodes:
        report_lines.append(f"\nNo nodes of specified types {node_types} found.")
    else:
        # Get actual node attributes from nodes of specified types
        actual_node_attrs = set()
        for data in analyzed_nodes:
            actual_node_attrs.update(data.keys())
        
        # Get expected attributes from schema
        expected_node_attrs = set(NODE_ATTRIBUTES.keys())
        
        # Calculate missing and unexpected attributes
        missing_node_attrs = expected_node_attrs - actual_node_attrs
        unexpected_node_attrs = actual_node_attrs - expected_node_attrs
        
        # Node attribute summary section
        report_lines.append(f"\n### Node Attribute Summary")
        report_lines.append(f"- Total attributes in schema: {len(expected_node_attrs)}")
        report_lines.append(f"- Total attributes in graph nodes: {len(actual_node_attrs)}")
        report_lines.append(f"- Attributes in both: {len(actual_node_attrs & expected_node_attrs)}")
        report_lines.append(f"- Undocumented attributes: {len(unexpected_node_attrs)}")
        report_lines.append(f"- Missing attributes: {len(missing_node_attrs)}")
        
        # Documented node attributes section
        report_lines.append(f"\n### Documented Node Attributes")
        report_lines.append(f"Attributes that are known and expected by this script.")
        report_lines.append(f"| Attribute | Unit | Type | Description |")
        report_lines.append(f"|-----------|------|------|-------------|")
        
        for attr in sorted(actual_node_attrs & expected_node_attrs):
            info = NODE_ATTRIBUTES[attr]
            data_type = "Time series" if info["is_timeseries"] else "Static value"
            unit = info["unit"] if info["unit"] else "-"
            report_lines.append(f"| {attr} | {unit} | {data_type} | {info['description']} |")
        
        # Undocumented node attributes section
        if unexpected_node_attrs:
            report_lines.append(f"\n### Undocumented Node Attributes")
            report_lines.append(f"| Attribute | Count | Example Value |")
            report_lines.append(f"|-----------|-------|---------------|")
            
            # Get example values and counts
            attr_examples = {}
            attr_counts = {attr: 0 for attr in unexpected_node_attrs}
            
            for data in analyzed_nodes:
                for attr in unexpected_node_attrs:
                    if attr in data:
                        attr_counts[attr] += 1
                        if attr not in attr_examples and data[attr] is not None:
                            # Safely convert to string and truncate long values
                            try:
                                example_value = str(data[attr])[:50]
                                if len(str(data[attr])) > 50:
                                    example_value += "..."
                                attr_examples[attr] = example_value
                            except:
                                attr_examples[attr] = "[Complex data]"
            
            for attr in sorted(unexpected_node_attrs):
                example = attr_examples.get(attr, "None")
                report_lines.append(f"| {attr} | {attr_counts[attr]} | {example} |")
        
        # Missing node attributes section
        if missing_node_attrs:
            report_lines.append(f"\n### Missing Node Attributes")
            report_lines.append(f"| Attribute | Unit | Type | Description |")
            report_lines.append(f"|-----------|------|------|-------------|")
            for attr in sorted(missing_node_attrs):
                info = NODE_ATTRIBUTES[attr]
                data_type = "Time series" if info["is_timeseries"] else "Static value"
                unit = info["unit"] if info["unit"] else "-"
                report_lines.append(f"| {attr} | {unit} | {data_type} | {info['description']} |")
        
        # Node type specific attribute analysis
        report_lines.append(f"\n### Node Type Specific Attributes")
        report_lines.append(f"Analysis of attributes by node type.")
        
        for node_type, info in sorted(node_type_analysis.items()):
            if node_type in node_types or not node_types:  # Only include specified types
                report_lines.append(f"\n#### {node_type} ({info['count']} nodes)")
                
                static_attrs = info['attributes'] - info['timeseries_attributes']
                report_lines.append(f"- Static attributes: {len(static_attrs)}")
                report_lines.append(f"- Time series attributes: {len(info['timeseries_attributes'])}")
                
                # List static attributes
                if static_attrs:
                    report_lines.append(f"\n**Static Attributes:**")
                    report_lines.append(f"| Attribute | In Schema |")
                    report_lines.append(f"|-----------|-----------|")
                    for attr in sorted(static_attrs):
                        in_schema = "Yes" if attr in NODE_ATTRIBUTES else "No"
                        report_lines.append(f"| {attr} | {in_schema} |")
                
                # List time series attributes
                if info['timeseries_attributes']:
                    report_lines.append(f"\n**Time Series Attributes:**")
                    report_lines.append(f"| Attribute | In Schema |")
                    report_lines.append(f"|-----------|-----------|")
                    for attr in sorted(info['timeseries_attributes']):
                        in_schema = "Yes" if attr in NODE_ATTRIBUTES else "No"
                        report_lines.append(f"| {attr} | {in_schema} |")
    
    #------------------------#
    # DATA QUALITY VALIDATION
    #------------------------#
    report_lines.append(f"\n## Data Quality Summary")
    
    # Edge validation
    report_lines.append(f"\n### Edge Data Quality")
    report_lines.append("- Basic validation of edge attributes (checking for missing values and anomalies)")
    
    # Count edges with missing required attributes
    required_edge_attrs = ["diameter", "length"]  # Customize based on your requirements
    missing_required = 0
    edge_issues = {}
    
    for u, v, data in graph.edges(data=True):
        edge_id = f"{u}-{v}"
        issues = []
        
        for attr in required_edge_attrs:
            if attr not in data or data[attr] is None:
                issues.append(f"Missing required attribute: {attr}")
                
        if issues:
            edge_issues[edge_id] = issues
            missing_required += 1
    
    report_lines.append(f"- Edges missing required attributes: {missing_required} of {len(list(graph.edges()))}")
    
    # Node validation
    report_lines.append(f"\n### Node Data Quality")
    report_lines.append("- Basic validation of node attributes (checking for missing values and anomalies)")
    
    # Define required node attributes based on node type
    required_node_attrs = {
        "network_heating": ["position", "name"],
        "building": ["position", "name"],
        # Add other node types as needed
    }
    
    # Count nodes with issues
    nodes_with_issues = 0
    node_issues = {}
    
    for node, data in graph.nodes(data=True):
        node_type_value = data.get('node_type', 'unknown')
        
        # Skip if not in specified types
        if node_types and node_type_value not in node_types:
            continue
            
        issues = []
        
        # Check for required attributes based on node type
        if node_type_value in required_node_attrs:
            for attr in required_node_attrs[node_type_value]:
                if attr not in data or data[attr] is None:
                    issues.append(f"Missing required attribute: {attr}")
        
        # Check for time series data consistency
        for attr in data:
            if attr in NODE_ATTRIBUTES and NODE_ATTRIBUTES[attr]["is_timeseries"]:
                value = data[attr]
                # Check if this is a time series but empty or very short
                if hasattr(value, '__len__') and not isinstance(value, (str, dict)):
                    try:
                        if len(value) < 2:  # A time series should have multiple values
                            issues.append(f"Time series attribute '{attr}' has insufficient data points: {len(value)}")
                    except TypeError:
                        # Not a sequence with length
                        pass
        
        if issues:
            node_issues[node] = issues
            nodes_with_issues += 1
    
    analyzed_node_count = len(analyzed_nodes)
    report_lines.append(f"- Nodes with issues: {nodes_with_issues} of {analyzed_node_count} analyzed nodes")
    
    # Join report lines
    report_content = "\n".join(report_lines)
    
    if output_path:
        try:
            from pathlib import Path
            import os
            
            # Debug information
            print(f"Original output_path: {output_path}")
            
            # Explicitly check if output_path is a directory or file path
            output_path_obj = Path(output_path)
            
            # If output_path ends with a directory separator or exists as directory,
            # append a default filename
            if output_path.endswith(('/', '\\')) or (os.path.exists(output_path) and os.path.isdir(output_path)):
                filename = "report.md"
                output_file = output_path_obj / filename
            else:
                output_file = output_path_obj
            
            # Create parent directory if it doesn't exist
            output_file.parent.mkdir(parents=True, exist_ok=True)
            
            # Check write permissions explicitly
            if not os.access(output_file.parent, os.W_OK):
                print(f"WARNING: No write permission for {output_file.parent}")
                # Fall back to user's home directory
                backup_file = Path.home() / "report.md"
                print(f"Falling back to: {backup_file}")
                output_file = backup_file
            
            # Write report to file
            with open(output_file, 'w') as f:
                f.write(report_content)
                
            print(f"Report successfully saved to: {output_file}")
            
        except Exception as e:
            print(f"ERROR: Could not write report: {type(e).__name__}: {str(e)}")
            print(f"Using alternative approach to display report")
            print("=" * 80)
            print(report_content)
            print("=" * 80)
    else:
        print(report_content)

In [None]:
generate_graph_data_report(graph,output_path=target_f)


NODE TYPE ANALYSIS:
--------------------------------------------------------------------------------
Node Type: building (Count: 5)
  Static Attributes: dT_design, is_supply_cooling, is_supply_electricity, is_supply_gas, is_supply_heating, is_supply_other, name, node_type, p_supply, position, press_name, temp_name
  Time Series Attributes: T_supply, input_heat, press_flow, temperature_supply
--------------------------------------------------------------------------------
Node Type: network_heating (Count: 3)
  Static Attributes: name, network_id, node_type, position, press_name, temp_name
  Time Series Attributes: press_flow, temperature_supply
--------------------------------------------------------------------------------
Original output_path: D:\rka-lko\work\2025_04_analysis
Output path was a directory, appending filename: D:\rka-lko\work\2025_04_analysis\report.md
Target file path: D:\rka-lko\work\2025_04_analysis\report.md
Parent directory: D:\rka-lko\work\2025_04_analysis
Report

In [None]:
def print_available_data(graph) -> None:
    """
    Print available data attributes in the graph with their descriptions and highlight discrepancies.
    
    This function compares the actual attributes present in the graph edges with those
    defined in the schema, identifying missing and undocumented attributes.
    
    Parameters:
        graph: NetworkX graph with simulation data
    """
    # Get actual edge attributes present in the graph
    actual_edge_attrs = set()
    for _, _, data in graph.edges(data=True):
        actual_edge_attrs.update(data.keys())
    
    # Get expected attributes from schema
    expected_attrs = set(EDGE_ATTRIBUTES.keys())
    
    # Calculate missing and unexpected attributes
    missing_attrs = expected_attrs - actual_edge_attrs
    unexpected_attrs = actual_edge_attrs - expected_attrs
    
    #logger.info("Available edge attributes in graph:")
    print("\nEDGE ATTRIBUTES:")
    print("-" * 80)
    print(f"{'Attribute':<20} | {'Unit':<10} | {'Type':<15} | Description")
    print("-" * 80)
    
    # Print attributes that are both in the graph and in the schema
    for attr in sorted(actual_edge_attrs & expected_attrs):
        info = EDGE_ATTRIBUTES[attr]
        data_type = "Time series" if info["is_timeseries"] else "Static value"
        unit = info["unit"] if info["unit"] else "-"
        print(f"{attr:<20} | {unit:<10} | {data_type:<15} | {info['description']}")
    
    # Print attributes found in graph but not in schema
    if unexpected_attrs:
        print("\nUNDOCUMENTED ATTRIBUTES (found in graph but not in schema):")
        print("-" * 80)
        for attr in sorted(unexpected_attrs):
            print(f"{attr:<20} | {'?':<10} | {'Unknown':<15} | Not documented in schema")
    
    # Print attributes in schema but not found in graph
    if missing_attrs:
        print("\nMISSING ATTRIBUTES (defined in schema but not found in graph):")
        print("-" * 80)
        for attr in sorted(missing_attrs):
            info = EDGE_ATTRIBUTES[attr]
            data_type = "Time series" if info["is_timeseries"] else "Static value"
            unit = info["unit"] if info["unit"] else "-"
            print(f"{attr:<20} | {unit:<10} | {data_type:<15} | {info['description']}")
    
    print("\n")


In [None]:
def validate_edge_data(graph):
    """
    Validate data for each edge in the graph and report errors or inconsistencies.
    
    This function performs detailed validation on edge data, checking for missing
    required attributes, data type issues, and abnormal values in time series.
    
    Parameters:
        graph: NetworkX graph with simulation data
        
    Returns:
        Dictionary containing validation results with the following structure:
        {
            "total_edges": int,
            "valid_edges": int,
            "invalid_edges": int,
            "edge_issues": {edge_id: [list of issues]},
            "missing_attributes": set of missing attribute names,
            "summary": {issue_type: count}
        }
    """
    validation_results = {
        "total_edges": graph.number_of_edges(),
        "valid_edges": 0,
        "invalid_edges": 0,
        "edge_issues": {},
        "missing_attributes": set(),
        "summary": {}
    }
    
    # Define required attributes (those that should be present on every edge)
    # You can customize this list based on your specific requirements
    required_attrs = ["diameter", "length", "m_flow"] 
    
    # Check each edge
    for u, v in graph.edges():
        edge_id = f"{u}-{v}"
        edge_data = graph.get_edge_data(u, v)
        edge_issues = []
        
        # Check for required attributes
        for attr in required_attrs:
            if attr not in edge_data:
                edge_issues.append(f"Missing required attribute: {attr}")
                validation_results["missing_attributes"].add(attr)
        
        # Check each attribute's data integrity
        for attr, value in edge_data.items():
            # Skip checking if attribute is not in schema
            if attr not in EDGE_ATTRIBUTES:
                continue
                
            expected_type = "time_series" if EDGE_ATTRIBUTES[attr]["is_timeseries"] else "static"
            
            # Check time series data integrity
            if EDGE_ATTRIBUTES[attr]["is_timeseries"]:
                if isinstance(value, pd.Series):
                    # Check for NaN values
                    if hasattr(value, 'isna') and value.isna().any():
                        nan_count = value.isna().sum()
                        edge_issues.append(f"{attr}: Contains {nan_count} NaN values")
            
                    if attr == "temperature" and hasattr(value, 'lt') and (value < 273.15).any():
                        freezing_count = (value < 273.15).sum()
                        edge_issues.append(f"{attr}: Contains {freezing_count} values below freezing point")
                else:
                    edge_issues.append(f"{attr}: Expected time series but got {type(value).__name__}")
            
            # For non-time series, check if the value is None or NaN
            elif value is None or (hasattr(value, 'isna') and value.isna().all()):
                edge_issues.append(f"{attr}: Missing value")
        
        # Store issues for this edge if any
        if edge_issues:
            validation_results["invalid_edges"] += 1
            validation_results["edge_issues"][edge_id] = edge_issues
        else:
            validation_results["valid_edges"] += 1
    
    # Generate summary counts for each type of issue
    issue_counts = {}
    for issues in validation_results["edge_issues"].values():
        for issue in issues:
            issue_type = issue.split(":")[0]
            issue_counts[issue_type] = issue_counts.get(issue_type, 0) + 1
    
    validation_results["summary"] = issue_counts
    
    return validation_results


In [None]:
target_f = r"D:\rka-lko\work\2025_04_analysis"
generate_graph_data_report(graph, target_f,node_types=["building","street","heating","cooling"])

NameError: name 'EDGE_ATTRIBUTES' is not defined

In [None]:
def generate_graph_data_report(graph, output_path=None, node_types=None):
    """
    Generate a comprehensive report on the graph data, attributes, and validation results.
    
    Parameters:
        graph: UESGraph with simulation data
        output_path: Path to save the report (if None, will print to console)
        node_types: List of node types to include in the report (default: ["heating"])
    """
    from datetime import datetime
    import os
    
    # Use default node types if none specified
    if node_types is None:
        node_types = ["heating"]
    
    # Build report content
    report_lines = []
    report_lines.append("# Graph Data Report")
    report_lines.append(f"\nGenerated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Graph summary section
    report_lines.append(f"\n## Graph Summary")
    
    # Custom handling for UESGraph node counting
    for node_type in node_types:
        try:
            num_nodes = graph.number_of_nodes(node_type=node_type)
            report_lines.append(f"- Number of {node_type} nodes: {num_nodes}")
        except Exception as e:
            report_lines.append(f"- Error counting {node_type} nodes: {str(e)}")
    
    # Handle edge counting - may need to adapt based on your class implementation
    try:
        # First try without parameters
        num_edges = len(list(graph.edges()))
        report_lines.append(f"- Number of edges: {num_edges}")
    except Exception as e:
        report_lines.append(f"- Error counting edges: {str(e)}")
    
    if hasattr(graph, "graph") and isinstance(graph.graph, dict) and "supply_type" in graph.graph:
        report_lines.append(f"- Supply type: {graph.graph['supply_type']}")
    
    # Get actual edge attributes
    actual_edge_attrs = set()
    for u, v, data in graph.edges(data=True):
        actual_edge_attrs.update(data.keys())
    
    # Get expected attributes from schema
    expected_attrs = set(EDGE_ATTRIBUTES.keys())
    
    # Calculate missing and unexpected attributes
    missing_attrs = expected_attrs - actual_edge_attrs
    unexpected_attrs = actual_edge_attrs - expected_attrs
    
    # Attribute summary section
    report_lines.append(f"\n## Attribute Summary")
    report_lines.append(f"- Total attributes in schema: {len(expected_attrs)}")
    report_lines.append(f"- Total attributes in graph: {len(actual_edge_attrs)}")
    report_lines.append(f"- Attributes in both: {len(actual_edge_attrs & expected_attrs)}")
    report_lines.append(f"- Undocumented attributes: {len(unexpected_attrs)}")
    report_lines.append(f"- Missing attributes: {len(missing_attrs)}")
    
    # Documented attributes section
    report_lines.append(f"\n## Documented Attributes in edges")
    report_lines.append(f"Means those attributes that are known and expected by this script.
                        Acessible by calling.")
    report_lines.append(f"| Attribute | Unit | Type | Description |")
    report_lines.append(f"|-----------|------|------|-------------|")
    
    for attr in sorted(actual_edge_attrs & expected_attrs):
        info = EDGE_ATTRIBUTES[attr]
        data_type = "Time series" if info["is_timeseries"] else "Static value"
        unit = info["unit"] if info["unit"] else "-"
        report_lines.append(f"| {attr} | {unit} | {data_type} | {info['description']} |")
    
    # Undocumented attributes section
    if unexpected_attrs:
        report_lines.append(f"\n## Undocumented Attributes")
        report_lines.append(f"| Attribute | Count | Example Value |")
        report_lines.append(f"|-----------|-------|---------------|")
        
        # Get example values and counts
        attr_examples = {}
        attr_counts = {attr: 0 for attr in unexpected_attrs}
        
        for u, v, data in graph.edges(data=True):
            for attr in unexpected_attrs:
                if attr in data:
                    attr_counts[attr] += 1
                    if attr not in attr_examples and data[attr] is not None:
                        # Safely convert to string and truncate long values
                        try:
                            example_value = str(data[attr])[:50]
                            if len(str(data[attr])) > 50:
                                example_value += "..."
                            attr_examples[attr] = example_value
                        except:
                            attr_examples[attr] = "[Complex data]"
        
        for attr in sorted(unexpected_attrs):
            example = attr_examples.get(attr, "None")
            report_lines.append(f"| {attr} | {attr_counts[attr]} | {example} |")
    
    # Missing attributes section
    if missing_attrs:
        report_lines.append(f"\n## Missing Attributes")
        report_lines.append(f"| Attribute | Unit | Type | Description |")
        report_lines.append(f"|-----------|------|------|-------------|")
        for attr in sorted(missing_attrs):
            info = EDGE_ATTRIBUTES[attr]
            data_type = "Time series" if info["is_timeseries"] else "Static value"
            unit = info["unit"] if info["unit"] else "-"
            report_lines.append(f"| {attr} | {unit} | {data_type} | {info['description']} |")
    
    # Validation results section - simplified to avoid further errors
    report_lines.append(f"\n## Data Quality Summary")
    report_lines.append("- Basic validation of edge attributes (checking for missing values and anomalies)")
    
    # Count edges with missing required attributes
    required_attrs = ["diameter", "length"]  # Customize based on your requirements
    missing_required = 0
    edge_issues = {}
    
    for u, v, data in graph.edges(data=True):
        edge_id = f"{u}-{v}"
        issues = []
        
        for attr in required_attrs:
            if attr not in data or data[attr] is None:
                issues.append(f"Missing required attribute: {attr}")
                
        if issues:
            edge_issues[edge_id] = issues
            missing_required += 1
    
    report_lines.append(f"- Edges missing required attributes: {missing_required} of {len(list(graph.edges()))}")
    
    # Join report lines
    report_content = "\n".join(report_lines)
    
    # Output report
    if output_path:
        try:
            # Make sure the directory exists
            os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
            
            # Write the report
            with open(output_path, 'w') as f:
                f.write(report_content)
            print(f"Report generated and saved to: {output_path}")
        except PermissionError:
            print(f"ERROR: Permission denied when writing to {output_path}")
            print("Please check that you have write permissions for this location.")
            print("The report will be printed to console instead:\n")
            print(report_content)
        except Exception as e:
            print(f"ERROR: Could not write to {output_path}: {str(e)}")
            print("The report will be printed to console instead:\n")
            print(report_content)
    else:
        print(report_content)

SyntaxError: unterminated f-string literal (detected at line 66) (1930816951.py, line 66)

In [None]:
def print_available_data(graph) -> None:
    """
    Print available data attributes in the graph with their descriptions.
    
    Parameters:
        graph: NetworkX graph with simulation data
    """
    # Get actual edge attributes present in the graph
    actual_edge_attrs = set()
    for _, _, data in graph.edges(data=True):
        actual_edge_attrs.update(data.keys())
    
    #logger.info("Available edge attributes in graph:")
    print("\nEDGE ATTRIBUTES:")
    print("-" * 80)
    print(f"{'Attribute':<20} | {'Unit':<10} | {'Type':<15} | Description")
    print("-" * 80)
    
    for attr in sorted(actual_edge_attrs):
        if attr in EDGE_ATTRIBUTES:
            info = EDGE_ATTRIBUTES[attr]
            data_type = "Time series" if info["is_timeseries"] else "Static value"
            unit = info["unit"] if info["unit"] else "-"
            print(f"{attr:<20} | {unit:<10} | {data_type:<15} | {info['description']}")
        else:
            print(f"{attr:<20} | {'?':<10} | {'Unknown':<15} | Not documented in schema")
    
    print("\n")

In [None]:
summarize_edge_data(graph)

Unnamed: 0,Unnamed: 1,diameter,length,pipeID,name,node_0,node_1,m_flow_nom,fac,dIns,kIns,...,m_flow_min,press_drop_mean,press_drop_max,press_drop_min,press_drop_length_mean,press_drop_length_max,press_drop_length_min,temp_diff_mean,temp_diff_max,temp_diff_min
1001,1006,0.0825,155,1,1,S1,N1,0.3,1,0.045,0.024,...,0.093189,3778.412109,11141.875,1284.6875,24.376852,71.883064,8.288306,1.059535,5.120117,0.419495
1002,1008,0.0825,175,5,5,S2,N3,0.3,1,0.045,0.024,...,0.084941,3612.293945,10991.875,1208.75,20.64168,62.810715,6.907143,1.276687,5.251984,0.569702
1003,1006,0.0273,15,6,6,B1,N1,0.3,1,0.045,0.024,...,-0.322474,597.662903,1487.8125,157.375,39.844196,99.1875,10.491667,0.336957,2.277344,0.0
1004,1007,0.0273,15,7,7,B2,N2,0.3,1,0.045,0.024,...,-0.201622,88.286209,581.625,1.125,5.885747,38.775002,0.075,0.422453,2.43457,0.0
1005,1008,0.0273,15,8,8,B3,N3,0.3,1,0.045,0.024,...,-0.134179,29.77931,257.5625,0.0,1.985287,17.170834,0.0,2.451359,13.180115,0.0
1006,1007,0.0825,100,2,2,N1,N2,0.3,1,0.045,0.024,...,-0.044124,98.228447,735.125,0.125,0.982284,7.35125,0.00125,0.267857,3.022766,0.0
1006,1008,0.05,45,4,4,N1,N3,0.3,1,0.045,0.024,...,-0.107846,166.121552,499.1875,0.25,3.69159,11.093056,0.005556,0.267857,3.022766,0.0
1007,1008,0.0825,70,3,3,N2,N3,0.3,1,0.045,0.024,...,-0.11393,240.525864,866.625,111.25,3.436084,12.380357,1.589286,0.0,0.0,0.0


In [None]:
def summarize_edge_data(graph, edge_list= None) -> pd.DataFrame:
    """
    Create a summary DataFrame of static edge attributes.
    
    Parameters:
        graph: NetworkX graph with simulation data
        edge_list: Optional list of edges to include (default: all edges)
    
    Returns:
        DataFrame with static edge attributes
    """
    if edge_list is None:
        edge_list = list(graph.edges())
    
    # Create a dictionary to store the data
    data_dict = {}
    
    for u, v in edge_list:
        edge_data = graph.get_edge_data(u, v)
        row_data = {}
        
        # Add static attributes
        for attr, value in edge_data.items():
            if attr in EDGE_ATTRIBUTES and not EDGE_ATTRIBUTES[attr]["is_timeseries"]:
                row_data[attr] = value
            elif attr in EDGE_ATTRIBUTES and EDGE_ATTRIBUTES[attr]["is_timeseries"]:
                # For time series, add some summary statistics
                if isinstance(value, pd.Series):
                    row_data[f"{attr}_mean"] = value.mean()
                    row_data[f"{attr}_max"] = value.max()
                    row_data[f"{attr}_min"] = value.min()
        
        data_dict[(u, v)] = row_data
    
    return pd.DataFrame.from_dict(data_dict, orient='index')

In [None]:
import pandas as pd
import pyarrow.parquet as pq
def extract_units_from_parquet(file_path: str):
    """
    Extract units from parquet file metadata if available.
    
    Args:
        file_path: Path to the parquet file
        
    Returns:
        Dictionary mapping column names to their units
    """
    parquet_file = pq.ParquetFile(file_path)
    units_dict = {}
    
    # Try to access file-level metadata first
    if parquet_file.metadata.metadata is not None:
        for key, value in parquet_file.metadata.metadata.items():
            try:
                # Look for unit-related metadata keys
                key_str = key.decode('utf-8') if isinstance(key, bytes) else key
                value_str = value.decode('utf-8') if isinstance(value, bytes) else value
                
                if 'unit' in key_str.lower():
                    # This might contain column-to-unit mappings in some format
                    # Parse accordingly if needed
                    pass
            except:
                # Skip keys that can't be decoded
                continue
    
    # Check schema-level metadata for each column
    schema = parquet_file.schema
    for i in range(schema.num_columns):
        col = schema.column(i)
        col_name = col.name
        
        # Check if the column path contains information about units in brackets like "temperature [K]"
        if '[' in col_name and ']' in col_name:
            unit_start = col_name.rfind('[')
            unit_end = col_name.rfind(']')
            if unit_start < unit_end:
                unit = col_name[unit_start+1:unit_end].strip()
                clean_name = col_name[:unit_start].strip()
                units_dict[col_name] = unit
                # Also store the mapping for the clean name without unit
                units_dict[clean_name] = unit
    
    return units_dict

In [None]:
    ### Plotting
    #Visuals cant handle series data, so we just take the mean values, but single time points are possible
    vis = ug.Visuals(graph)
    for edge in graph.edges:
        graph.edges[edge]["m_flow_mean"] = graph.edges[edge]["m_flow"].mean()
    vis.show_network(show_plot=False,
                           scaling_factor=1,
                           scaling_factor_diameter=50,
                           label_size=15,
                           ylabel="Mean mass flow [kg/s]",
                           generic_extensive_size="m_flow_mean",
                           save_as=os.path.join(workspace, "m_flow.png"),
                           timestamp=f"{graph.graph["name"]}: Mean mass flow"
                           )
    
    for node in graph.nodes:
        graph.nodes[node]["press_flow_mean"] = graph.nodes[node]["press_flow"].mean()
    vis.show_network(show_plot=False,
                           scaling_factor=1,
                           scaling_factor_diameter=50,
                           ylabel="Mean pressure [Pa]",
                           label_size=15,
                           generic_intensive_size="press_flow_mean",
                           save_as=os.path.join(workspace, "press_flow.png"),
                           timestamp=f"{graph.graph["name"]}: Mean pressure flow"
                           )
if __name__ == "__main__":
    main()