# Metadata Comments Example

**Example: Extracting metadata from SQL inline comments.**


Demonstrates how inline SQL comments can automatically populate
column metadata (descriptions, PII flags, ownership, tags).

### Imports

In [1]:
from clgraph.pipeline import Pipeline

# SQL with inline metadata comments
sql = """
SELECT
  user_id,  -- User identifier [pii: false]
  email,    -- Email address [pii: true, owner: data-team]

  UPPER(email) as email_upper,  -- Uppercased email [pii: true]

  COUNT(*) as login_count,  -- Number of logins [tags: metric engagement]

  SUM(revenue) as total_revenue  /* Total revenue [pii: false, owner: finance-team, tags: metric revenue] */

FROM user_activity
GROUP BY user_id, email
"""

# Create pipeline
pipeline = Pipeline([("user_metrics", sql)], dialect="bigquery")

# Display extracted metadata
print("=" * 70)
print("SQL COMMENT METADATA EXTRACTION")
print("=" * 70)

for _full_name, col in sorted(pipeline.columns.items()):
    # Only show output columns (those starting with query name)
    if col.full_name.startswith("user_metrics."):
        print(f"\nüìä {col.column_name}")
        print(f"   Full name: {col.full_name}")

        if col.description:
            print(f"   üìù Description: {col.description}")

        if col.pii:
            print("   üîí PII: Yes")

        if col.owner:
            print(f"   üë§ Owner: {col.owner}")

        if col.tags:
            print(f"   üè∑Ô∏è  Tags: {', '.join(sorted(col.tags))}")

        if col.custom_metadata:
            print(f"   ‚öôÔ∏è  Custom: {col.custom_metadata}")

print("\n" + "=" * 70)
print("\n‚úÖ Metadata automatically extracted from SQL comments!")
print("   Format: -- Description [key: value, key2: value2]")
print("\nSupported metadata:")
print("   ‚Ä¢ description - Natural language description")
print("   ‚Ä¢ pii - PII flag (true/false)")
print("   ‚Ä¢ owner - Data owner/team")
print("   ‚Ä¢ tags - Space-separated tags")
print("   ‚Ä¢ custom fields - Any other key-value pairs")
print("=" * 70)

SQL COMMENT METADATA EXTRACTION


‚úÖ Metadata automatically extracted from SQL comments!
   Format: -- Description [key: value, key2: value2]

Supported metadata:
   ‚Ä¢ description - Natural language description
   ‚Ä¢ pii - PII flag (true/false)
   ‚Ä¢ owner - Data owner/team
   ‚Ä¢ tags - Space-separated tags
   ‚Ä¢ custom fields - Any other key-value pairs


### Code

In [2]:
from clgraph.pipeline import Pipeline

# SQL with inline metadata comments
sql = """
SELECT
  user_id,  -- User identifier [pii: false]
  email,    -- Email address [pii: true, owner: data-team]

  UPPER(email) as email_upper,  -- Uppercased email [pii: true]

  COUNT(*) as login_count,  -- Number of logins [tags: metric engagement]

  SUM(revenue) as total_revenue  /* Total revenue [pii: false, owner: finance-team, tags: metric revenue] */

FROM user_activity
GROUP BY user_id, email
"""

# Create pipeline
pipeline = Pipeline([("user_metrics", sql)], dialect="bigquery")

# Display extracted metadata
print("SQL COMMENT METADATA EXTRACTION")

for _full_name, col in sorted(pipeline.columns.items()):
    # Only show output columns (those starting with query name)
    if col.full_name.startswith("user_metrics."):
        print(f"\nüìä {col.column_name}")
        print(f"   Full name: {col.full_name}")

        if col.description:
            print(f"   üìù Description: {col.description}")

        if col.pii:
            print("   üîí PII: Yes")

        if col.owner:
            print(f"   üë§ Owner: {col.owner}")

        if col.tags:
            print(f"   üè∑Ô∏è  Tags: {', '.join(sorted(col.tags))}")

        if col.custom_metadata:
            print(f"   ‚öôÔ∏è  Custom: {col.custom_metadata}")

print("\n‚úÖ Metadata automatically extracted from SQL comments!")
print("   Format: -- Description [key: value, key2: value2]")
print("\nSupported metadata:")
print("   ‚Ä¢ description - Natural language description")
print("   ‚Ä¢ pii - PII flag (true/false)")
print("   ‚Ä¢ owner - Data owner/team")
print("   ‚Ä¢ tags - Space-separated tags")
print("   ‚Ä¢ custom fields - Any other key-value pairs")

SQL COMMENT METADATA EXTRACTION

‚úÖ Metadata automatically extracted from SQL comments!
   Format: -- Description [key: value, key2: value2]

Supported metadata:
   ‚Ä¢ description - Natural language description
   ‚Ä¢ pii - PII flag (true/false)
   ‚Ä¢ owner - Data owner/team
   ‚Ä¢ tags - Space-separated tags
   ‚Ä¢ custom fields - Any other key-value pairs


### Visualize Pipeline Lineage

Display the simplified column lineage for the metadata comments pipeline.

In [None]:
import shutil

from clgraph import visualize_pipeline_lineage

if shutil.which("dot") is None:
    print("‚ö†Ô∏è  Graphviz not installed. Install with: brew install graphviz")
else:
    print("Metadata Comments Pipeline - Simplified Lineage:")
    display(visualize_pipeline_lineage(pipeline.column_graph.to_simplified()))