1. Data preparation for Iceberg table (omit this step if using other data sources).

It will create Iceberg tables that matches the graph schema, then insert data to those tables.

In [None]:
# Account table
spark.sql("""
    CREATE TABLE demo.financialGraph.Account (
        name STRING,
        isBlocked BOOLEAN
    ) USING iceberg
""")
spark.sql("""
    INSERT INTO demo.financialGraph.Account (name, isBlocked) VALUES
    ('Scott', FALSE),
    ('Jenny', FALSE),
    ('Steven', TRUE),
    ('Paul', FALSE),
    ('Ed', FALSE)
""")
print("Displaying data from Account table:")
spark.sql("SELECT * FROM demo.financialGraph.Account").show()
# +------+---------+
# |  name|isBlocked|
# +------+---------+
# | Scott|    false|
# | Jenny|    false|
# |Steven|     true|
# |  Paul|    false|
# |    Ed|    false|
# +------+---------+

# City table
spark.sql("""
    CREATE TABLE demo.financialGraph.City (
        name STRING
    ) USING iceberg
""")
spark.sql("""
    INSERT INTO demo.financialGraph.City (name) VALUES
    ('New York'),
    ('Gainesville'),
    ('San Francisco')
""")
print("Displaying data from City table:")
spark.sql("SELECT * FROM demo.financialGraph.City").show()
# +-------------+
# |         name|
# +-------------+
# |     New York|
# |  Gainesville|
# |San Francisco|
# +-------------+

# Phone table
spark.sql("""
    CREATE TABLE demo.financialGraph.Phone (
        number STRING,
        isBlocked BOOLEAN
    ) USING iceberg
""")
spark.sql("""
    INSERT INTO demo.financialGraph.Phone (number, isBlocked) VALUES
    ('718-245-5888', FALSE),
    ('650-658-9867', TRUE),
    ('352-871-8978', FALSE)
""")
print("Displaying data from Phone table:")
spark.sql("SELECT * FROM demo.financialGraph.Phone").show()
# +------------+---------+
# |      number|isBlocked|
# +------------+---------+
# |718-245-5888|    false|
# |650-658-9867|     true|
# |352-871-8978|    false|
# +------------+---------+

# Transfer table
spark.sql("""
    CREATE TABLE demo.financialGraph.transfer (
        from_account STRING,
        to_account STRING,
        date DATE,
        amount INT
    ) USING iceberg
""")
spark.sql("""
    INSERT INTO demo.financialGraph.transfer (from_account, to_account, date, amount) VALUES
    ('Scott', 'Ed', CAST('2024-01-04' AS DATE), 20000),
    ('Scott', 'Ed', CAST('2024-02-01' AS DATE), 800),
    ('Scott', 'Ed', CAST('2024-02-14' AS DATE), 500),
    ('Jenny', 'Scott', CAST('2024-04-04' AS DATE), 1000),
    ('Paul', 'Jenny', CAST('2024-02-01' AS DATE), 653),
    ('Steven', 'Jenny', CAST('2024-05-01' AS DATE), 8560),
    ('Ed', 'Paul', CAST('2024-01-04' AS DATE), 1500),
    ('Paul', 'Steven', CAST('2023-05-09' AS DATE), 20000)
""")
print("Displaying data from Transfer table:")
spark.sql("SELECT * FROM demo.financialGraph.transfer").show()
# +------------+----------+----------+------+
# |from_account|to_account|      date|amount|
# +------------+----------+----------+------+
# |       Scott|        Ed|2024-01-04| 20000|
# |       Scott|        Ed|2024-02-01|   800|
# |       Scott|        Ed|2024-02-14|   500|
# |       Jenny|     Scott|2024-04-04|  1000|
# |        Paul|     Jenny|2024-02-01|   653|
# |      Steven|     Jenny|2024-05-01|  8560|
# |          Ed|      Paul|2024-01-04|  1500|
# |        Paul|    Steven|2023-05-09| 20000|
# +------------+----------+----------+------+

# hasPhone table
spark.sql("""
    CREATE TABLE demo.financialGraph.hasPhone (
        account STRING,
        phone STRING
    ) USING iceberg
""")
spark.sql("""
    INSERT INTO demo.financialGraph.hasPhone (account, phone) VALUES
    ('Scott', '718-245-5888'),
    ('Jenny', '718-245-5888'),
    ('Jenny', '650-658-9867'),
    ('Paul', '650-658-9867'),
    ('Ed', '352-871-8978')
""")
print("Displaying data from hasPhone table:")
spark.sql("SELECT * FROM demo.financialGraph.hasPhone").show()
# +-------+------------+
# |account|       phone|
# +-------+------------+
# |  Scott|718-245-5888|
# |  Jenny|718-245-5888|
# |  Jenny|650-658-9867|
# |   Paul|650-658-9867|
# |     Ed|352-871-8978|
# +-------+------------+

# isLocatedIn table
spark.sql("""
    CREATE TABLE demo.financialGraph.isLocatedIn (
        account STRING,
        city STRING
    ) USING iceberg
""")
spark.sql("""
    INSERT INTO demo.financialGraph.isLocatedIn (account, city) VALUES
    ('Scott', 'New York'),
    ('Jenny', 'San Francisco'),
    ('Steven', 'San Francisco'),
    ('Paul', 'Gainesville'),
    ('Ed', 'Gainesville')
""")
print("Displaying data from isLocatedIn table:")
spark.sql("SELECT * FROM demo.financialGraph.isLocatedIn").show()
# +-------+-------------+
# |account|         city|
# +-------+-------------+
# |  Scott|     New York|
# |  Jenny|San Francisco|
# | Steven|San Francisco|
# |   Paul|  Gainesville|
# |     Ed|  Gainesville|
# +-------+-------------+