# Values Clause Lineage

**Example: VALUES Clause (Inline Table Literal) Support for Column Lineage**


Demonstrates how clgraph tracks column lineage through VALUES clauses:
- Inline literal data detection
- Column alias extraction
- Type inference from sample values
- Lineage through literal columns

### Imports

In [1]:
from clgraph import JSONExporter, Pipeline, RecursiveLineageBuilder
from clgraph.query_parser import RecursiveQueryParser

### Example 1: Simple VALUES clause

In [2]:
print("Example 1: Simple VALUES Clause")

sql_simple = """
SELECT id, name
FROM (VALUES (1, 'Alice'), (2, 'Bob'), (3, 'Charlie')) AS t(id, name)
"""

parser = RecursiveQueryParser(sql_simple, dialect="postgres")
unit_graph = parser.parse()

print("\nSQL:")
print(sql_simple)

unit = unit_graph.units["main"]
print("\nVALUES Sources:")
for alias, values_info in unit.values_sources.items():
    print(f"  {alias}:")
    print(f"    Columns: {values_info.column_names}")
    print(f"    Types: {values_info.column_types}")
    print(f"    Rows: {values_info.row_count}")
    print(f"    Sample: {values_info.sample_values}")

Example 1: Simple VALUES Clause

SQL:

SELECT id, name
FROM (VALUES (1, 'Alice'), (2, 'Bob'), (3, 'Charlie')) AS t(id, name)


VALUES Sources:
  t:
    Columns: ['id', 'name']
    Types: ['integer', 'string']
    Rows: 3
    Sample: [[1, 'Alice'], [2, 'Bob'], [3, 'Charlie']]


### Example 2: Column lineage

In [3]:
print("Example 2: Column Lineage Through VALUES")

sql_lineage = """
SELECT id, name FROM (VALUES (1, 'Alice'), (2, 'Bob')) AS t(id, name)
"""

builder = RecursiveLineageBuilder(sql_lineage, dialect="postgres")
graph = builder.build()

print("\nSQL:")
print(sql_lineage)

print("\nColumn Nodes:")
for name, node in graph.nodes.items():
    literal_marker = " [LITERAL]" if node.is_literal else ""
    if node.is_literal:
        print(f"  {name}{literal_marker}")
        print(f"    Type: {node.literal_type}")
        print(f"    Values: {node.literal_values}")
    else:
        print(f"  {name}")

print("\nEdges:")
for edge in graph.edges:
    print(f"  {edge.from_node.full_name} -> {edge.to_node.full_name} [{edge.edge_type}]")

Example 2: Column Lineage Through VALUES

SQL:

SELECT id, name FROM (VALUES (1, 'Alice'), (2, 'Bob')) AS t(id, name)


Column Nodes:
  output.id
  t.id [LITERAL]
    Type: integer
    Values: [1, 2]
  output.name
  t.name [LITERAL]
    Type: string
    Values: ['Alice', 'Bob']

Edges:
  t.id -> output.id [literal_source]
  t.name -> output.name [literal_source]


### Example 3: VALUES with JOIN

In [4]:
print("Example 3: VALUES with JOIN")

sql_join = """
SELECT u.id, u.name, l.label
FROM users u
JOIN (VALUES (1, 'admin'), (2, 'user')) AS l(id, label)
ON u.role_id = l.id
"""

parser2 = RecursiveQueryParser(sql_join, dialect="postgres")
unit_graph2 = parser2.parse()

print("\nSQL:")
print(sql_join)

unit2 = unit_graph2.units["main"]
print("\nTables and VALUES sources:")
print(f"  Tables: {unit2.depends_on_tables}")
print(f"  VALUES sources: {list(unit2.values_sources.keys())}")

Example 3: VALUES with JOIN

SQL:

SELECT u.id, u.name, l.label
FROM users u
JOIN (VALUES (1, 'admin'), (2, 'user')) AS l(id, label)
ON u.role_id = l.id


Tables and VALUES sources:
  Tables: ['users']
  VALUES sources: ['l']


### Example 4: VALUES in Pipeline

In [5]:
print("Example 4: VALUES in Pipeline")

sql_pipeline = """
CREATE TABLE enriched AS
SELECT id, name FROM (VALUES (1, 'A'), (2, 'B')) AS t(id, name)
"""

pipeline = Pipeline([("create_lookup", sql_pipeline)], dialect="postgres")

print("\nSQL:")
print(sql_pipeline)

print("\nPipeline columns:")
for name, col in pipeline.column_graph.columns.items():
    if col.is_literal:
        print(f"  {name} [LITERAL, type={col.literal_type}]")
    else:
        print(f"  {name}")

Example 4: VALUES in Pipeline

SQL:

CREATE TABLE enriched AS
SELECT id, name FROM (VALUES (1, 'A'), (2, 'B')) AS t(id, name)


Pipeline columns:
  enriched.id
  create_lookup:unknown.id [LITERAL, type=integer]
  enriched.name
  create_lookup:unknown.name [LITERAL, type=string]


### Example 5: Type inference

In [6]:
print("Example 5: Type Inference")

sql_types = """
SELECT *
FROM (VALUES
    (1, 'text', 3.14, true),
    (2, 'more', 2.71, false)
) AS t(int_col, str_col, float_col, bool_col)
"""

parser3 = RecursiveQueryParser(sql_types, dialect="postgres")
unit_graph3 = parser3.parse()

print("\nSQL:")
print(sql_types)

values_info = list(unit_graph3.units["main"].values_sources.values())[0]
print("\nInferred Types:")
for col_name, col_type in zip(values_info.column_names, values_info.column_types, strict=False):
    print(f"  {col_name}: {col_type}")

Example 5: Type Inference

SQL:

SELECT *
FROM (VALUES
    (1, 'text', 3.14, true),
    (2, 'more', 2.71, false)
) AS t(int_col, str_col, float_col, bool_col)


Inferred Types:
  int_col: integer
  str_col: string
  float_col: numeric
  bool_col: boolean


### Example 6: JSON Export

In [7]:
import json

print("Example 6: JSON Export with Literal Metadata")

sql_export = """
SELECT id, name FROM (VALUES (1, 'Alice')) AS t(id, name)
"""

export_pipeline = Pipeline([("values_query", sql_export)], dialect="postgres")
exporter = JSONExporter()
export_data = exporter.export(export_pipeline)

print("\nSQL:")
print(sql_export)

print("\nExported literal columns:")
for col in export_data.get("columns", []):
    if col.get("is_literal"):
        print(json.dumps(col, indent=2))

# Summary
print("Summary")
print(
    """
ES clause support captures:
line literal data detection (table literals)
lumn aliases from AS t(col1, col2) syntax
pe inference from values (integer, string, numeric, boolean)
mple values stored for reference
teral column nodes in lineage graph

 metadata is preserved through:
cursiveQueryParser (query structure analysis)
cursiveLineageBuilder (column lineage analysis)
peline (multi-query analysis)
ON export

ES clauses are commonly used for:
st data and examples
okup/mapping tables
atic configuration data
all inline reference tables
"""
)

Example 6: JSON Export with Literal Metadata

SQL:

SELECT id, name FROM (VALUES (1, 'Alice')) AS t(id, name)


Exported literal columns:
{
  "full_name": "values_query:unknown.id",
  "column_name": "id",
  "table_name": "t",
  "query_id": "values_query",
  "node_type": "literal",
  "expression": "VALUES(...)",
  "operation": "literal",
  "is_literal": true,
  "literal_type": "integer",
  "literal_values": [
    1
  ],
  "description": null,
  "description_source": null,
  "owner": null,
  "pii": false,
  "tags": [],
  "custom_metadata": {}
}
{
  "full_name": "values_query:unknown.name",
  "column_name": "name",
  "table_name": "t",
  "query_id": "values_query",
  "node_type": "literal",
  "expression": "VALUES(...)",
  "operation": "literal",
  "is_literal": true,
  "literal_type": "string",
  "literal_values": [
    "Alice"
  ],
  "description": null,
  "description_source": null,
  "owner": null,
  "pii": false,
  "tags": [],
  "custom_metadata": {}
}
Summary

ES clause support capt

### Visualize Pipeline Lineage

Display the simplified column lineage for VALUES clause pipelines.

In [None]:
import shutil

from clgraph import visualize_pipeline_lineage

if shutil.which("dot") is None:
    print("⚠️  Graphviz not installed. Install with: brew install graphviz")
else:
    print("VALUES Pipeline - Simplified Lineage:")
    display(visualize_pipeline_lineage(pipeline.column_graph.to_simplified()))

    print("\nExport Pipeline - Simplified Lineage:")
    display(visualize_pipeline_lineage(export_pipeline.column_graph.to_simplified()))