# Interactive graph visualization with yFiles and Kuzu

In [1]:
import shutil

import kuzu
import polars as pl

In [2]:
DB_NAME = "ex_kuzu_db"
shutil.rmtree(DB_NAME, ignore_errors=True)
db = kuzu.Database(DB_NAME)
conn = kuzu.Connection(db)

## Nodes
We first scan the nodes JSON file and extract the contents relevant to our graph schema.

In [3]:
# --- Nodes ---

# Read in the nodes
nodes_df = pl.read_json("data/nodes.json").drop("position", "selected").unnest("data")
nodes_df.head()

id,Strength,selected,cytoscape_alias_list,canonicalName,Milk,Synonym,Quality,Type,SUID,NodeType,name,Country,shared_name
str,i64,bool,list[str],str,str,str,i64,str,i64,str,str,str,str
"""430""",5.0,False,"[""Aarauer Bierdeckel""]","""Aarauer Bierdeckel""","""Raw cow's milk""","""Kuentener""",90.0,"""Semi-soft""",430,"""Cheese""","""Aarauer Bierdeckel""","""Switzerland""","""Aarauer Bierdeckel"""
"""429""",,False,"[""Bergues""]","""Bergues""",,,,,429,"""Cheese""","""Bergues""",,"""Bergues"""
"""428""",,False,"[""Chimay""]","""Chimay""",,,,,428,"""Cheese""","""Chimay""",,"""Chimay"""
"""427""",,False,"[""Fiumorbu""]","""Fiumorbu""",,,,,427,"""Cheese""","""Fiumorbu""",,"""Fiumorbu"""
"""426""",,False,"[""Beaujolais""]","""Beaujolais""",,,,,426,"""RedWine""","""Beaujolais""",,"""Beaujolais"""


The relevant columnsd `id` and `name` are defined as properties for the `RedWine` and `WhiteWine` nodes. For the `Cheese` nodes, we include additionaly metadata, such `strength`, `quality` and `milk` (indicating what milk the cheese is made from).

In [4]:
# Create node tables
conn.execute("CREATE NODE TABLE IF NOT EXISTS RedWine(id INT64 PRIMARY KEY, name STRING)")
conn.execute("CREATE NODE TABLE IF NOT EXISTS WhiteWine(id INT64 PRIMARY KEY, name STRING)")
conn.execute(
    """
    CREATE NODE TABLE IF NOT EXISTS Cheese(
        id INT64 PRIMARY KEY,
        name STRING,
        strength INT64,
        quality INT64,
        milk STRING
    )
"""
)

# Copy node data
conn.execute(
    """
    COPY RedWine FROM (
        LOAD FROM nodes_df
        WHERE NodeType = "RedWine"
        RETURN SUID AS id, name
    )
    """
)
conn.execute(
    """
    COPY WhiteWine FROM (
        LOAD FROM nodes_df
        WHERE NodeType = "WhiteWine"
        RETURN SUID AS id, name
    )
    """
)
conn.execute(
    """
    COPY Cheese FROM (
        LOAD FROM nodes_df
        WHERE NodeType = "Cheese"
        RETURN
          SUID AS id, name,
          Strength AS strength,
          Quality AS quality,
          Milk AS milk
    )
    """
)

<kuzu.query_result.QueryResult at 0x10f527530>

## Edges

The edges consist of pairings of red wines, white wines and cheeses. Cheeses can pair with each other as they are together considered to "go with" a particular wine. The contents of the edges JSON file are scanned and the relevant columns are isolated for the relationship tables in Kuzu.

In [5]:
# --- Edges ---

# Read in the edges
edges_df = (
    pl.read_json("data/edges.json")
    .drop("selected")
    .unnest("data")
    .select("source", "target")
    .with_columns(pl.col("source").cast(pl.Int64), pl.col("target").cast(pl.Int64))
)

# Create edge tables
conn.execute(
    """
    CREATE REL TABLE IF NOT EXISTS PAIRS_WITH(
        FROM Cheese TO Cheese,
        FROM Cheese TO RedWine,
        FROM Cheese TO WhiteWine
    )
    """
)

# Cheese -> RedWine
conn.execute(
    """
    LOAD FROM edges_df
    MATCH (s1:Cheese {id: source}), (t1:RedWine {id: target})
    MERGE (s1)-[:PAIRS_WITH]->(t1)
    """
)

# Cheese -> WhiteWine
conn.execute(
    """
    LOAD FROM edges_df
    MATCH (s2:Cheese {id: source}), (t2:WhiteWine {id: target})
    MERGE (s2)-[:PAIRS_WITH]->(t2)
    """
)

# Cheese -> Cheese
conn.execute(
    """
    LOAD FROM edges_df
    MATCH (s3:Cheese {id: source}), (t3:Cheese {id: target})
    MERGE (s3)-[:PAIRS_WITH]->(t3)
    """
)

<kuzu.query_result.QueryResult at 0x11a9eeb50>

## Visualization
We're now ready to interactively visualize our graph! Using the same underlying connection object, we can instantiate a `KuzuGraphWidget` using the yFiles Jupyter Graph library that connects to a Kuzu database.

In [6]:
from yfiles_jupyter_graphs_for_kuzu import KuzuGraphWidget

g = KuzuGraphWidget(conn)

# Make the colours of each node more intuitive - red wine nodes are burgundy in colour!
g.add_node_configuration("Cheese", color="yellow", text= lambda node: {"text": node["properties"]["name"], "position": "south", "color": "#FFFFFF", "backgroundColor": "rgba(0,0,0,0.7)"})  # type: ignore
g.add_node_configuration("RedWine", color="#800020", text= lambda node: {"text": node["properties"]["name"], "position": "south", "color": "#FFFFFF", "backgroundColor": "rgba(0,0,0,0.7)"})  # type: ignore
g.add_node_configuration("WhiteWine", color="white", text= lambda node: {"text": node["properties"]["name"], "position": "south", "color": "#FFFFFF", "backgroundColor": "rgba(0,0,0,0.7)"})  # type: ignore

In [7]:
# Cheeses that go with Chianti Classico (Static)
g.show_cypher(
    """
    MATCH (cheese:Cheese)-[r:PAIRS_WITH]->(w:RedWine)
    WHERE w.name = "Chianti Classico"
    RETURN * LIMIT 50;
    """
)

GraphWidget(layout=Layout(height='770px', width='100%'))

In [8]:
# Are there any paths between Gruyere and Cheshire cheeses? (Interactive)
g.show_cypher(
    """
    MATCH (c1:Cheese)-[r *1..3]-(c2:Cheese)
    WHERE c1.name = "Cheshire" AND c2.name = "Gruyere"
    RETURN DISTINCT * LIMIT 50;
    """,
    layout="interactive_organic"
)

GraphWidget(layout=Layout(height='500px', width='100%'))

In [9]:
# How are Brie cheeses connected to Munster cheeses? (Tree)
g.show_cypher(
    """
    MATCH (c1:Cheese)-[r:PAIRS_WITH *1..4 (_, n | WHERE label(n) = "Cheese") ]-(c2:Cheese)
    WHERE c1.name CONTAINS "Brie" AND c2.name CONTAINS "Munster"
    RETURN DISTINCT * LIMIT 50;
    """,
    layout="Tree"
)

GraphWidget(layout=Layout(height='500px', width='100%'))

In [10]:
# Starting from Gruyere, what wines can we pair with it? (Radial)
g.show_cypher(
    """
    MATCH (c1:Cheese)-[r *1..3]->(x)
    WHERE c1.name = "Gruyere"
    RETURN * LIMIT 50;
    """,
    layout="radial"
)

GraphWidget(layout=Layout(height='790px', width='100%'))

In [None]:
# What cheeses go with Californian Reds and Tuscan Reds? (Circular)
g.show_cypher(
    """
    MATCH (w1:RedWine)<-[r1]-(c:Cheese)-[r2]->(w2:RedWine)
    WHERE w1.name CONTAINS "California" AND w2.name CONTAINS "Tuscan"
    RETURN * LIMIT 50;
    """,
    layout="circular"
)