# Building a Big Ball of Mud

1. start up a Neo4J graph database
1. Add services to the graph database
1. Add a bunch of random JSON schemas to the database.  Ask ChatGPT to write these for you?
1. Connect all of the services to a random set of other services as dependencies, and attach a random JSON schema to that dependency
1. Generate small scripts - when you call this service's endpoint, it needs to do 0-3 other calls
1. Validate there are no cycles in the graph?



In [None]:
# Connect to Neo4j

import os

from neo4j import GraphDatabase
from py2neo import Graph, Node, Relationship

import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt

import pycorpora
import random
import re

import pandas as pd
import numpy as np

In [None]:
APPLICATION_COUNT = 20
APIS_PER_APPLICATION = 4

In [None]:
# driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "neo4j"))

# def neo4j_query(query, parameters=None, db=None):
#     assert driver is not None, "Driver not initialized!"
#     session = None
#     response = None
#     try:
#         session = driver.session(database=db) if db is not None else driver.session() 
#         response = list(session.run(query, parameters))
#     except Exception as e:
#         print("Query failed:", e)
#     finally:
#         if session is not None:
#             session.close()
#     return response

In [None]:
graph = Graph()
def neo4j_query(query, parameters=None):
    response = None
    try:
        response = graph.run(query, parameters)
    except Exception as e:
        print("Query failed:", e)
    return response.to_data_frame()

In [None]:
graph.delete_all()
# neo4j_query("MATCH (n1)-[r]->(n2) DELETE r, n1, n2")
# neo4j_query("MATCH (n:Service) DELETE n")
# neo4j_query("MATCH (n:HttpEndpoint) DELETE n")
# neo4j_query("MATCH (n:MessagingEndpoint) DELETE n")
# neo4j_query("MATCH (n:Schema) DELETE n")

Our goal is to create a database that looks something like the following:

In [None]:
def mm(graph):
  graphbytes = graph.encode("ascii")
  base64_bytes = base64.b64encode(graphbytes)
  base64_string = base64_bytes.decode("ascii")
  display(Image(url="https://mermaid.ink/img/" + base64_string))

mm("""
flowchart LR
    service1((Service A)) -- :PRODUCES --> httpEndpoint((HTTP Endpoint))
    service1((Service A)) -- :PUBLISHES --> messageEndpoint((Message Endpoint))
    httpEndpoint((HTTP Endpoint)) -- :CONSUMES --> service2((Service B))
    messageEndpoint((Message Endpoint)) -- :SUBSCRIBES --> service2((Service B))
""")

## Add Services to the database

In [None]:
# print(random.choice(pycorpora.words["infinitive_verbs"]))

available_schemas = {}

for file in os.listdir("schemas"):
  if file.endswith(".json"):
    with open(os.path.join("schemas", file), "r") as f:
      schema = f.read()
      available_schemas[file] = schema
      graph.run("CREATE (s:Schema {file: $file, schema: $schema})", {"file": file, "schema": schema})

In [None]:
graph.run('CREATE CONSTRAINT services IF NOT EXISTS FOR (s:Service) REQUIRE s.name IS UNIQUE')
graph.run('CREATE CONSTRAINT endpoints IF NOT EXISTS FOR (e:HttpEndpoint) REQUIRE e.name IS UNIQUE')
graph.run('CREATE CONSTRAINT endpoints IF NOT EXISTS FOR (d:Schema) REQUIRE d.file IS UNIQUE')
# neo4j_query('CREATE CONSTRAINT authors IF NOT EXISTS ON (a:Author) ASSERT a.name IS UNIQUE')
# neo4j_query('CREATE CONSTRAINT categories IF NOT EXISTS ON (c:Category) ASSERT c.category IS UNIQUE')

def create_service(name):
  graph.run("CREATE (s:Service) "
                "SET s.name = '%s' "
                "RETURN s" % (name))
  endpoints = random.sample(pycorpora.words["infinitive_verbs"], k=APIS_PER_APPLICATION)
  for endpoint in endpoints:
    path = "/"+endpoint
    randomSchema = random.choice(list(available_schemas.keys()))
    e_result = graph.run("CREATE (e:HttpEndpoint) "
                  "SET e.name = '%s' "
                  "SET e.path = '%s' "
                  "RETURN e" % (name+":"+path, path))
    graph.run("MATCH (s:Service), (e:HttpEndpoint), (d:Schema) "
                  "WHERE s.name = '%s' AND e.name = '%s' AND d.file = '%s' "
                  "MERGE (s)-[se:PRODUCES]->(e) "
                  "MERGE (e)-[ed:VALIDATES]->(d) "
                  "RETURN se, ed" % (name, name+":"+path, randomSchema))

In [None]:
knots = random.sample(pycorpora.technology.knots['knots'], k=APPLICATION_COUNT)
techs = random.sample(pycorpora.technology.new_technologies['technologies'], k=APPLICATION_COUNT)
for i in range(APPLICATION_COUNT):  
  randomServiceName = knots[i].lower().replace(" ", "-") + "-" + techs[i].lower().replace(" ", "-")
  randomServiceName = re.sub("[^a-zA-Z0-9-]", "", randomServiceName)

  create_service(randomServiceName)

# Create random service dependencies
for i in range(APPLICATION_COUNT):
  graph.run("MATCH (s:Service)-[:PRODUCES]->(e:HttpEndpoint), (s2:Service) "
                "WITH apoc.coll.randomItems(COLLECT(e), 4) AS endpoints, apoc.coll.randomItems(COLLECT(s), 1) AS consumers "
                "WHERE SIZE(endpoints) > 1 AND SIZE(consumers) > 0 "
                "UNWIND RANGE(0, SIZE(endpoints)/2*2-1) AS i "
                "WITH endpoints[i] AS endpoint, consumers[0] AS consumer "
                "WHERE NOT (consumer)-[:PRODUCES]->(endpoint) "
                "MERGE (consumer)-[:CONSUMES]->(endpoint)")

In [None]:
graph.query("MATCH (p:Service)-[:PRODUCES]->(e:HttpEndpoint)<-[:CONSUMES]-(c:Service) RETURN p.name, e.id, c.name").to_data_frame()

In [None]:
graph.query("MATCH (e:HttpEndpoint)<-[:CONSUMES]-(c:Service) " 
            "RETURN e.name, COUNT(c) AS deps "
            "ORDER BY deps DESC").to_data_frame()