# Exploring the Legislation Corpus with Neo4j

In [1]:
# Load dotenv
from dotenv import load_dotenv
import os

load_dotenv()

NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USER = os.getenv("NEO4J_USER")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE", "neo4j")

In [2]:
from neo4j_analysis import Neo4jAnalysis

# Initialize the analysis helper
analysis = Neo4jAnalysis(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD, NEO4J_DATABASE)

In [3]:
colors = {
    "Legislation": "#1f77b4",  # Blue for Legislation
    "Part": "#ff7f0e",  # Orange for Parts
    "Chapter": "#2ca02c",  # Green for Chapters
    "Section": "#d62728",  # Red for Sections
    "Paragraph": "#9467bd",  # Purple for Paragraphs
    "Schedule": "#8c564b",  # Brown for Schedules
    "ScheduleParagraph": "#e377c2",  # Pink for Schedule Paragraphs
    "ScheduleSubparagraph": "#7f7f7f",  # Gray for Schedule Subparagraphs
    "Commentary": "#bcbd22",  # Olive for Commentaries
    "Citation": "#17becf",  # Cyan for Citations
    "CitationSubRef": "#aec7e8",  # Light Blue for Citation Sub References
    "ExplanatoryNotes": "#ffbb78",  # Light Orange for Explanatory Notes
    "ExplanatoryNotesParagraph": "#98df8a",  # Light Green for Explanatory Notes Paragraphs
}

## The complete graph schema

In [4]:
# Show the graph schema
from neo4j_viz.neo4j import from_neo4j, ColorSpace

query = """
CALL db.schema.visualization()
"""
results = analysis.run_query_viz(query)

VG = from_neo4j(results)
VG.color_nodes(
    field="caption",  # Using the internal labels property
    color_space=ColorSpace.DISCRETE,
    colors=colors,
)

generated_html = VG.render(layout="forcedirected")
await analysis.capture_graph_to_png(
    generated_html, "renderings/schema_graph.png", width=1080, height=1080
)

![Graph Schema](renderings/schema_graph.png)

## The corpus

In [5]:
query = """
MATCH p=(l:Legislation)
RETURN l.category AS Category, l.status AS Status, l.title AS Title, l.uri AS URI, l.enactment_date AS Enactment
ORDER BY Enactment
"""

corpus_df = analysis.run_query_df(query)
# filter by Status="final" and sort by enactment date
corpus_df[corpus_df["Status"] == "final"].sort_values(
    "Enactment", ascending=False
).head(10)

Unnamed: 0,Category,Status,Title,URI,Enactment
1367,primary,final,Biodiversity Beyond National Jurisdiction Act ...,http://www.legislation.gov.uk/ukpga/2026/6/ena...,2026-02-12
1360,primary,final,General Cemetery Act 2025,http://www.legislation.gov.uk/ukla/2025/2/enacted,2025-10-27
1294,primary,final,Housing (Amendment) Act (Northern Ireland) 2020,http://www.legislation.gov.uk/nia/2020/5/enacted,2020-08-28
1259,primary,final,City of London Corporation (Open Spaces) Act 2018,http://www.legislation.gov.uk/ukla/2018/1/enacted,2018-03-15
1153,primary,final,Humber Bridge Act 2013,http://www.legislation.gov.uk/ukla/2013/6/enacted,2013-12-18
938,primary,final,London Local Authorities and Transport for Lon...,http://www.legislation.gov.uk/ukla/2003/3/enacted,2003-10-30
886,primary,final,Colchester Borough Council Act 2001,http://www.legislation.gov.uk/ukla/2001/2/enacted,2001-03-22
715,primary,final,London Underground (Jubilee) Act 1993,http://www.legislation.gov.uk/ukla/1993/9/enacted,1993-07-01
709,primary,final,London Docklands Railway (Lewisham) Act 1993,http://www.legislation.gov.uk/ukla/1993/7/enacted,1993-05-27
705,primary,final,British Railways Act 1993,http://www.legislation.gov.uk/ukla/1993/4/enacted,1993-03-29


## A piece of legislation down to the section level

With the graph in place, we can start exploring the legislation corpus. Let's start with a piece of legislation, the Corporation Tax Act 2010, and explore its structure down to the section level.

In [6]:
query = """
MATCH p=(l:Legislation)-[:HAS_PART]->(:Part)-[:HAS_CHAPTER]->(:Chapter)-[:HAS_SECTION]->(:Section)
WHERE l.uri CONTAINS "ukpga/2010/4"
RETURN p
"""

results = analysis.run_query_viz(query)

VG = from_neo4j(results)
VG.color_nodes(
    field="caption",
    color_space=ColorSpace.DISCRETE,
    colors=colors,
)

generated_html = VG.render(layout="forcedirected")
await analysis.capture_graph_to_png(
    generated_html, "renderings/legislation_example.png", width=1080, height=1080
)

![Legislation Example](renderings/legislation_example.png)

## Focusing on a single part down to paragraphs and citations

Let us focus on a single part of the Corporation Tax Act 2010, and explore the network of paragraphs and commentaries in that specific part.

In [7]:
query = """
MATCH p=(l:Legislation)-[:HAS_PART]->(part:Part)-[:HAS_CHAPTER]->(:Chapter)-[:HAS_SECTION]->(section:Section)-[:HAS_PARAGRAPH]->(para:Paragraph)-[:HAS_COMMENTARY]->(comm:Commentary)
WHERE l.uri CONTAINS "ukpga/2010/4" AND part.order=2
RETURN p
"""

results = analysis.run_query_viz(query)

VG = from_neo4j(results)
VG.color_nodes(
    field="caption",
    color_space=ColorSpace.DISCRETE,
    colors=colors,
)

generated_html = VG.render(layout="forcedirected", initial_zoom=1.0)
await analysis.capture_graph_to_png(
    generated_html, "renderings/legislation_example_detail.png", width=1080, height=1080
)

![Legislation Example](renderings/legislation_example_detail.png)

## Commentaries

Let us run a quick query that retrieves the network of commentaries which cite a specific piece of legislation (in this case, the Data Protection Act 2018).

In [8]:
query = """
MATCH p=(:Commentary)-[:HAS_CITATION]->(:Citation)-[:CITES_ACT]->(l:Legislation)
WHERE l.uri CONTAINS "ukpga/2018/12"
RETURN p
"""

results = analysis.run_query_viz(query)

VG = from_neo4j(results)
VG.color_nodes(
    field="caption",
    color_space=ColorSpace.DISCRETE,
    colors=colors,
)

generated_html = VG.render(layout="forcedirected", initial_zoom=1.0)
await analysis.capture_graph_to_png(
    generated_html, "renderings/commentary_network.png", width=1080, height=1080
)

![Commentary Network](renderings/commentary_network.png)