In [40]:
class Answer:
    content: str
    model: str

    def __init__(self, content: str = "", model: str = ""):
        self.content = content
        self.model = model


class Question:
    content: str
    prompt: str = ""
    answers: list[Answer]

    def __init__(self, q: str):
        self.content = q
        self.answers = []


questions = [
    # automaton:directionality
    Question("Would you say that the automaton is directed?"),
    Question("Could you tell me whether the automaton is directed?"),
    # grammar:symbols
    Question("Can you describe the automaton's input and output symbols?"),
    Question("Could you identify the grammar or set of symbols that the automaton recognizes?"),
    # state:details
    Question("Could you highlight the primary aspects of state q0?"),
    Question("Can you provide more details about state q0?"),
    # state:final_list
    Question("Which state or states are considered final?"),
    Question("What are the final states in the automaton?"),
    # transition:existence_directed
    Question("Does a directed arc exist from q1 to q0?"),
    Question("Is q0 directly connected to q1 by a transition?"),
    # transition:self_loop
    Question("Can you spot any transition where the start and end state are identical?"),
    Question("Is there a looping transition from a state back to the same state?"),
    # transition:state_connections
    Question("What are the incoming and outgoing transition paths for q1?"),
    Question("What links are present for state q2 in the automaton?"),
    Question("What are the entry and exit transitions for state q2?"),
]

models = [
    ("ollama", "deepseek-r1:8b"), ("ollama", "gemma2:9b"), ("ollama", "llama3.1:8b"),
    ("openai", "gpt-4o"), ("openai", "o3-mini")
]
# models = ["gemma2:9b", "llama3.1:8b"]

dot_fsa = """
digraph FSA {
    rankdir=LR;
    node [shape = circle];
    q0 [shape = doublecircle];
    q1; q2; q3; q4;

    start [shape=none, label=""];  // Invisible start indicator
    start -> q0;                  // Start arrow pointing to starting state

    q0 -> q1 [label = "1"];
    q1 -> q2 [label = "1"];
    q2 -> q3 [label = "1"];
    q3 -> q4 [label = "0"];
    q4 -> q0 [label = "0"];
}
"""

len(questions)

15

In [41]:
import json
import networkx as nx
from networkx.drawing.nx_pydot import read_dot
import io

fsa = nx.DiGraph(read_dot(io.StringIO(dot_fsa)))


# Preprocessing: Add attributes to nodes and edges
def preprocess_fsa(graph: nx.DiGraph):
    # Initialize all nodes with attributes
    for node in graph.nodes():
        graph.nodes[node]['is_start'] = False
        graph.nodes[node]['is_final'] = False
        graph.nodes[node]['transitions_in'] = []
        graph.nodes[node]['transitions_out'] = []

    # Identify and mark the starting node
    for source, target, attrs in graph.edges(data=True):
        if source == "start":  # Artificial start node
            graph.nodes[target]['is_start'] = True

    # Remove the artificial "start" node after marking the actual start node
    if "start" in graph:
        graph.remove_node("start")

    # Mark final states and collect state transitions
    for node, data in graph.nodes(data=True):
        if 'doublecircle' in data.get('shape', ''):
            graph.nodes[node]['is_final'] = True

    # Add edge attributes and collect transitions
    for source, target, attrs in graph.edges(data=True):
        edge_label = attrs.get('label', '').strip('"')  # Extract the label
        graph.edges[source, target]['label'] = edge_label
        graph.nodes[source]['transitions_out'].append((target, edge_label))
        graph.nodes[target]['transitions_in'].append((source, edge_label))


# Apply preprocessing
preprocess_fsa(fsa)

# Example of accessing data
print("Start Nodes:")
print([n for n, d in fsa.nodes(data=True) if d.get('is_start')])

print("Final Nodes:")
print([n for n, d in fsa.nodes(data=True) if d.get('is_final')])

print("Transitions for q0:")
print(f"Incoming: {fsa.nodes['q0']['transitions_in']}")
print(f"Outgoing: {fsa.nodes['q0']['transitions_out']}")

Start Nodes:
['q0']
Final Nodes:
['q0']
Transitions for q0:
Incoming: [('q4', '0')]
Outgoing: [('q1', '1')]


In [42]:
from pathlib import Path
import ollama
import re
from labelling.chat_helper import Chat
import openai
from dotenv import load_dotenv

base_prompt = """
You are a helpful assistant expert in finite state automata.
Answer the question given by the user using the retrieved data, using plain text only.
Avoid referring to the data directly; there is no need to provide any additional information.
Keep the answer concise and short, and avoid using any additional information not provided.

The system has retrieved the following data:
```
{data}
```

The user has asked the following question:
```
{question}
```
"""

load_dotenv()

# noinspection PyArgumentList
graph_data = nx.node_link_data(fsa, edges="edges")
json_graph = json.dumps(graph_data, indent=2)

for question in questions:
    print(f"Question: {question.content}")
    for service, model in models:
        prepared_question = base_prompt.format(question=question.content, data=json_graph)
        question.prompt = prepared_question

        if service == "openai":
            answer = openai.chat.completions.create(
                model=model,
                messages=[
                    {"role": "user", "content": prepared_question}
                ],

            ).choices[0].message.content

            print(answer)
        elif service == "ollama":
            answer = ollama.chat(model=model, messages=[Chat.new_message(prepared_question)])['message']['content']
        else:
            # raise error
            raise ValueError("Invalid service name")

        cleaned_text = re.sub(r"<think>.*?</think>", "", answer, flags=re.DOTALL).strip()

        question.answers.append(Answer(cleaned_text, model))
        print(f"{model}: {cleaned_text}")

    print("")

Question: Would you say that the automaton is directed?
deepseek-r1:8b: Yes, the automaton is directed because each node has at most one outgoing transition for any given input symbol.
gemma2:9b: Yes.
llama3.1:8b: Yes.
Yes, the automaton is directed.
gpt-4o: Yes, the automaton is directed.
Yes, the automaton is directed.
o3-mini: Yes, the automaton is directed.

Question: Could you tell me whether the automaton is directed?
deepseek-r1:8b: Yes, the automaton is directed, as indicated by the "directed": true property in the data.
gemma2:9b: Yes.
llama3.1:8b: Yes.
Yes, the automaton is directed.
gpt-4o: Yes, the automaton is directed.
Yes, the automaton is directed.
o3-mini: Yes, the automaton is directed.

Question: Can you describe the automaton's input and output symbols?
deepseek-r1:8b: The automaton processes two symbols: **0** and **1**, as these are the only symbols present in transitions_in and transitions_out.

Answer: The automaton accepts inputs consisting of symbols **0** and

In [43]:
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        # Check if the object is an instance of Answer or Question
        if isinstance(obj, Answer):
            return {
                "content": obj.content,
                "model": obj.model,
            }
        elif isinstance(obj, Question):
            return {
                "content": obj.content,
                "prompt": obj.prompt,
                "answers": obj.answers,  # Let JSON handle the list of Answer objects
            }
        # For other object types, fall back to the default serialization
        return super().default(obj)


# save all questions and answers to json file.
with Path("questions_answers.json").open("w") as file:
    json.dump(questions, file, cls=CustomEncoder, indent=2)  # Use indent for readability
