First, we download the webpage of the Practical Gremlin book

In [1]:
from bs4 import BeautifulSoup
import requests
import re
response = requests.get("https://kelvinlawrence.net/book/Gremlin-Graph-Guide.html")

Then we extract all Gremlin queries on this page

In [17]:
pattern = re.compile(r"g\.(?:V|E|add).*(?:\;|$)")
newpattern = re.compile(r"g\.(?:V|E|add)(?:[^\)]+\)+\.[a-zA-Z])*[^\)]+\)+")

In [18]:
# Assuming the response from the GET request is stored in the variable 'response'
soup = BeautifulSoup(response.text, 'html.parser')

#find all sections
sections = soup.find_all('div', {"class": "sect1"})
section_filter = filter(lambda x: x.find('h2').get_text().split(".")[0] in ["3", "4", "5"], sections)

queries = []

for section in section_filter:
	# Find all code blocks in the HTML page
	code_blocks = section.find_all('code')

	# Extract the text from each code block
	code_texts = (line for code in code_blocks for line in code.get_text(strip=True).split("\n") if len(line)>0)

	# Print the extracted code texts
	for code_text in code_texts:
		# Find all occurrences of the pattern in the code text
		queries.extend(newpattern.findall(code_text))

In [20]:
len(queries)

908

In [None]:
for i, query in enumerate(queries):
	print(f"{i+1}: {query}")

Now, we'd like to find all steps used in these queries

In [34]:
steps = {}
steps_pattern = re.compile(r"(\w+)\(")
for query in queries:
	for step in steps_pattern.findall(query):
		steps[step] = steps.get(step, 0) + 1

In [35]:
print(len(steps))
print(steps)

131
{'V': 885, 'hasLabel': 239, 'groupCount': 43, 'by': 577, 'has': 703, 'out': 383, 'path': 135, 'repeat': 68, 'times': 32, 'next': 72, 'getClass': 3, 'values': 244, 'E': 28, 'hasNot': 1, 'not': 21, 'count': 209, 'outE': 88, 'label': 20, 'group': 79, 'select': 161, 'fold': 83, 'in': 22, 'inV': 58, 'limit': 151, 'as': 207, 'from': 28, 'to': 34, 'dedup': 32, 'hasNext': 2, 'project': 30, 'unfold': 66, 'inE': 5, 'outV': 7, 'tail': 7, 'range': 7, 'timeLimit': 2, 'until': 30, 'skip': 2, 'both': 13, 'valueMap': 76, 'with': 9, 'elementMap': 3, 'union': 30, 'id': 19, 'toList': 19, 'join': 3, 'toSet': 1, 'order': 80, 'toBulkSet': 2, 'fill': 4, 'hasId': 32, 'between': 18, 'lt': 18, 'where': 72, 'is': 71, 'eq': 15, 'neq': 32, 'mean': 12, 'local': 27, 'sum': 12, 'max': 8, 'min': 2, 'gte': 8, 'inside': 3, 'outside': 2, 'within': 33, 'without': 14, 'gt': 55, 'lte': 8, 'and': 13, 'coin': 4, 'sample': 12, 'startingWith': 5, 'or': 10, 'endingWith': 1, 'containing': 2, 'notStartingWith': 1, 'notEndingWi

In [36]:
#steps in PyMogwai
steps_in_pymogwai = {"filter", "has", "hasId", "hasName", "hasLabel", "is",
                     "contains", "within", "simplePath", "limit", "dedup", "identity", "name",
                     "value", "label", "properties", "select", "order", "count", "path",
                     "max", "min", "sum", "mean", "out", "outE", "outV", "inE", "inV",
                     "in", "both", "bothE", "bothV", "repeat", "branch", "option", "until",
                     "times", "emit", "as", "by", "sideEffect", "property", "toList", "next",
                     "iter","asPath", "V", "E", "gt", "gte", "lt", "lte", "or", "not", "and"}
len(steps_in_pymogwai)

57

In [38]:
query_step_map = {query: steps_pattern.findall(query) for query in queries}
counter = 0
for query, steps in query_step_map.items():
	if len(set(steps)-steps_in_pymogwai)>0:
		counter += 1
		print(f"{query}: {set(steps)-steps_in_pymogwai}")
print(f"{counter} queries contain steps not in PyMogwai")

g.V().hasLabel('airport').groupCount().by('country'): {'groupCount'}
g.V().has('airport','code','DFW').next().getClass(): {'getClass'}
g.V().has('airport','code','DFW').values(): {'values'}
g.V().has('airport','code','DFW').values('city'): {'values'}
g.V().has('airport','code','DFW').values('runways','icao'): {'values'}
g.V().hasNot('region'): {'hasNot'}
g.V().groupCount().by(label): {'groupCount'}
g.V().label().groupCount(): {'groupCount'}
g.E().groupCount().by(label): {'groupCount'}
g.E().label().groupCount(): {'groupCount'}
g.V().group().by(label).by(count()): {'group'}
g.V().hasLabel('country').group().by('code').by(out().count()): {'group'}
g.V().hasLabel('continent').group().by('code').by(out().count()): {'group'}
g.V().hasLabel('airport').groupCount().by('country').select('FR'): {'groupCount'}
g.V().hasLabel('airport').groupCount().by('country').select('FR','GR','BE'): {'groupCount'}
g.V().has('airport','code','AUS').out().values('code').fold(): {'values', 'fold'}
g.V().has('air

In [39]:
with open("queries.txt", "w") as f:
	counter = 0
	for query, steps in query_step_map.items():
		if len(set(steps)-steps_in_pymogwai)==0:
			counter += 1
			f.write(f"{query}\n")
	print(f"Saved {counter} queries")

Saved 204 queries


## Testing wrappers

In [1]:
from typing import Any, Callable
def parameterizable_step(func:Callable):
    def check_is_hello(x:Any):
        if x == 'hello':
            return True
        elif isinstance(x, (list,tuple)):
            return any(check_is_hello(i for i in x))
        elif isinstance(x, dict):
            return any(check_is_hello(i for i in x.values()))
        return False

    def wrapper(instance, *args, **kwargs):
        if check_is_hello(args) or check_is_hello(kwargs):
            #use a parameter
            return instance._sayhello(*args, **kwargs)
        else: return func(*args, **kwargs)
    return wrapper

class Test:
	@parameterizable_step
	def _sayhello(self, *args, **kwargs):
		print("Hello", args, kwargs)

	def sayhello(self, *args, **kwargs):
		print(args, kwargs)