# Citizen Graph notebook

for experimenting with citizen graph structure, generation, visualization


In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
GEMINI_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_KEY:
    raise RuntimeError("GEMINI_API_KEY not found. Add it to a .env file in the notebook root.")
import pathlib
from pathlib import Path

from pydantic import BaseModel

from google import genai
from typing import Optional, List

from rdflib import Graph, RDF, RDFS, Namespace, Literal, URIRef

from pyvis.network import Network
import webbrowser

import json

from pyshacl import validate
import time
import random



### Gemini initialization


In [2]:
# Initialize client 
client = genai.Client()
gemini_model = "gemini-2.5-pro"

# Load the PDF
document_name = "foititiko"
file_name = f"Precondition documents/{document_name}.pdf"

Aux functions for the API's basic functionality and other parsing utilities.

In [4]:
# Text generation in general
def call_gemini(content):
    response = client.models.generate_content(
        model = gemini_model,
        contents = content
        )

    return response.text

# Document understanding 
def call_gemini_pdf(content, file_name):    
    # Retrieve and encode the PDF byte
    file_path = pathlib.Path(file_name)

    # Upload the PDF using the File API
    content_file = client.files.upload(file = file_path)

    response = client.models.generate_content(
        model = gemini_model,
        contents=[content_file, content]
        )

    return response.text

# JSON structured output
def call_gemini_json(content, schema):
    response = client.models.generate_content(
        model = gemini_model,
        contents=content,
        config={
            "response_mime_type": "application/json",
            "response_schema": schema
            }
        )

    return response.text

# Read txt file into a string (used for prompts)
def read_txt(path):
    with open(path, "r", encoding="utf-8") as f:
        return f.read()
    
# Read a JSON file and return it as string or raw JSON
def read_json(path, raw = False):
    with open(path, "r") as f:
        json_file = json.load(f)
    if (raw):
        return json_file
    # Else, convert JSON to string 
    return json.dumps(json_file, indent=2)

# Retry wrapper to combat model overload errors
def with_retries(func, *args, base_delay=4.0):
    overloads = 0    
    exhaustions = 0
    while True:
        try:
            return func(*args)
        except Exception as e:
            msg = str(e).lower()
            overloaded = "overloaded" in msg
            exhausted = "exhausted" in msg            
            if overloaded:
                overloads += 1
                wait = base_delay * (2 ** overloads)
                print(f"Gemini overloaded {overloads} times, retrying in {wait:.1f}s...")
                time.sleep(wait)
                continue
            elif exhausted:
                exhaustions +=1
                print(f"Gemini exhausted {exhaustions} times, waiting 1 minute and rerunning the code...")
                time.sleep(60) 
                overloads = 0                
                continue          
            # Anything else
            raise
                
# Pyvis graph visualization
def visualize_graph(ttl_file):
    # Load TTL file
    g = Graph()
    g.parse(ttl_file, format="turtle")  

    # Namespace
    CCCEV = Namespace("http://data.europa.eu/m8g/")
    CPSV = Namespace("http://purl.org/vocab/cpsv#")
    EX = Namespace("http://example.org/")

    net = Network(height="1440px", width="100%", notebook=True, directed=True)
    net.force_atlas_2based()

    # Just visual effects
    def node_color(uri):
        if (uri, RDF.type, CPSV.PublicService) in g:
            return "gold"
        if (uri, RDF.type, CCCEV.Constraint) in g:
            return "maroon"
        if (uri, RDF.type, CCCEV.InformationConcept) in g:
            return "darkturquoise"
        return "lightgrey"

    # Just a way to make the graph more readable
    def node_label(uri):
        if isinstance(uri, Literal):
            return str(uri)

        for lbl in g.objects(uri, RDFS.label):
            return str(lbl)
        for lbl in g.objects(uri, CCCEV.name):
            return str(lbl)

        uri_str = str(uri)
        if "#" in uri_str:
            return uri_str.split("#")[-1]
        return uri_str.split("/")[-1]

    # Add nodes and edges, skipping rdf:type for extra readability
    for s, p, o in g:
        if p == RDF.type:
            continue

        # Subject
        net.add_node(str(s), label=node_label(s), color=node_color(s))

        # Object
        if isinstance(o, Literal):
            net.add_node(str(o), label=node_label(o), color="beige", shape="box") # if it's a literal put it in a text box instead of a circular node
        else:
            net.add_node(str(o), label=node_label(o), color=node_color(o))

        # Edge
        net.add_edge(str(s), str(o), label=node_label(p), arrows="to")

    html_file = ttl_file.replace("ttl", "html")
    # Render and show
    net.show(html_file)
    webbrowser.open(html_file)



### Generate a citizen instance from the ontology

First an eligible one

In [None]:
preconditions_summary = read_txt(f"{document_name} preconditions summary.txt")
citizen_ontology = read_txt("Citizens/Citizen Ontology.ttl")

prompt = """
You are given a list of preconditions for a public service and an ontology in RDFS format that describes a citizen. 
Generate an instance of this ontology, that represents a citizen with just enough nodes from the ontology to be able to get verified for the public service.
After the nodes are created, make sure that they are connected with all possible edges (so bidirectional paths do exist if the ontology supports them).
Populate the node properties with appropriate data so the citizen DOES meet the criteria for ALL the preconditions listed.
Make sure the citizen being checked against the preconditions is the first node mentioned in the output and is named :Applicant
Return ONLY the output ttl. Return nothing else, including commentary.
"""

content = [prompt, preconditions_summary, citizen_ontology]

eligible_citizen = with_retries(call_gemini, content)

# Save to a file too
with open(f"{document_name} eligible.ttl", "w") as f:
    f.write("".join(eligible_citizen))

Then not eligible

In [None]:
preconditions_summary = read_txt(f"{document_name} preconditions summary.txt")
citizen_ontology = read_txt("Citizens/Citizen Ontology.ttl")

prompt = """
You are given a list of preconditions for a public service and an ontology in RDFS format that describes a citizen. 
Generate an instance of this ontology, that represents a citizen with just enough nodes from the ontology to be able to get verified for the public service.
After the nodes are created, make sure that they are connected with all possible edges (so bidirectional paths do exist if the ontology supports them).
Populate the node properties with appropriate data so the citizen DOES NOT meet the cirteria for ANY of the preconditions listed.
Make sure the citizen being checked against the preconditions is the first node mentioned in the output and is named :Applicant
Return ONLY the output ttl. Return nothing else, including commentary.
"""

content = [prompt, preconditions_summary, citizen_ontology]

not_eligible_citizen = with_retries(call_gemini, content)

# Save to a file too
with open(f"{document_name} not eligible.ttl", "w") as f:
    f.write("".join(not_eligible_citizen))

NameError: name 'eligible_citizen' is not defined

### Graph Visualization

In [9]:
# Render graph of the public service
visualize_graph(f"{document_name} not eligible.ttl")


foititiko not eligible.html


### Get a precondition megalist

from a directory of precondition documents

In [None]:
precondition_megalist = ""
documents = os.listdir('Precondition documents')

prompt = read_txt('Prompts/summarization.txt')

for document in documents:
    preconditions_summary = with_retries(call_gemini_pdf, prompt, f"Precondition documents/{document}")
    preconditions_summary += "\n\n"
    precondition_megalist += preconditions_summary
    print(f"Completed file: {document}")
    
print(precondition_megalist)

Gemini overloaded 1 times, retrying in 8.1s...
Gemini overloaded 2 times, retrying in 16.2s...
Gemini overloaded 3 times, retrying in 32.0s...
Completed file: foititiko.pdf
Completed file: genisi.pdf
Completed file: paidiou.pdf
Completed file: sitisi aen.pdf
Based on the document provided, here are the eligibility preconditions for the Student Housing Allowance:

*   The annual family income from the previous year must not exceed â‚¬30,000.
*   The student must rent a residence in a different city from their family's primary residence due to their studies.
*   The student or their parents must not have full ownership or usufruct of a residence in the city of study.
*   The rental lease must be valid for a minimum of six months.
*   The total area of properties owned or held in usufruct by the student or their parents must not exceed 200 square meters.
*   The student must have successfully passed exams in at least half of the courses of the previous academic year.
*   The student must 