In [2]:
class State:
    def __init__(self, name, accepting=False):
        self.name = name
        self.transitions = {}
        self.accepting = False

    def add_transition(self, symbol, target_state):
        # Add a transition from this state to the target state
        # on the given symbol. If a transition on the given
        # symbol already exists, it is replaced.
        self.transitions[symbol] = target_state
        return target_state        

In [38]:
import json


class NFA:
    def __init__(self):
        self.states = {}
        self.starting_state = None
        self.state_count = 0

    def new_state(self):
        """Generate a new unique state."""
        state = f"S{self.state_count}"
        self.state_count += 1
        self.states[state] = {"isTerminatingState": False}
        return state

    def add_transition(self, from_state, symbol, to_state):
        """Add a transition for a given state and symbol."""
        if symbol not in self.states[from_state]:
            self.states[from_state][symbol] = []
        
        self.states[from_state][symbol].append(to_state)

    def parse_or(self,expression):
        parts = []
        current = []
        depth = 0

        for i, char in enumerate(expression):
            if char == "(":
                depth += 1
            elif char == ")":
                depth -= 1
            elif char == "|" and depth == 0:
                # Split only if outside parentheses
                parts.append("".join(current))
                current = []
                continue

            current.append(char)

        parts.append("".join(current))

        print("Parsed parts:", parts)
        return parts

    def build_sub_nfa(self, expr):

        start = self.new_state()
        current = start
        for char in expr:
            next_state = self.new_state()
            self.add_transition(current, char, next_state)
            current = next_state

            return start, current

    def construct_or(self, expression):
        """Construct an NFA for the OR operation (a|b|c)"""
        parts = self.parse_or(expression)

        start = self.new_state()  # New start state for the OR
        end = self.new_state()  # New final state for the OR

        self.starting_state = start
        # self.states.add(end)
        self.states[end]["isTerminatingState"] = True

        for part in parts:
            sub_start, sub_end = self.build_sub_nfa(part)
            self.add_transition(start, "epsilon", sub_start)
            self.add_transition(sub_end, "epsilon", end)

        return self.to_json()


    def to_json(self):
        """Convert NFA to the desired JSON format."""
        output = {"startingState": self.starting_state}
        output.update(self.states)
        return json.dumps(output, indent=4)

In [39]:
regex = "a|b"
nfa = NFA()
print(nfa.construct_or(regex))

Parsed parts: ['a', 'b']
{
    "startingState": "S0",
    "S0": {
        "isTerminatingState": false,
        "epsilon": [
            "S2",
            "S4"
        ]
    },
    "S1": {
        "isTerminatingState": true
    },
    "S2": {
        "isTerminatingState": false,
        "a": [
            "S3"
        ]
    },
    "S3": {
        "isTerminatingState": false,
        "epsilon": [
            "S1"
        ]
    },
    "S4": {
        "isTerminatingState": false,
        "b": [
            "S5"
        ]
    },
    "S5": {
        "isTerminatingState": false,
        "epsilon": [
            "S1"
        ]
    }
}


In [None]:
def parse_or(expression):
    parts = []
    current = []
    depth = 0 
    
    for i, char in enumerate(expression):
        if char == '(':
            depth += 1
        elif char == ')':
            depth -= 1
        elif char == '|' and depth == 0:
            # Split only if outside parentheses
            parts.append(''.join(current))
            current = []
            continue
        
        current.append(char)

    parts.append(''.join(current))
    
    print("Parsed parts:", parts)
    return parts

# ✅ Test cases
print(parse_or("a|b"))           # ['a', 'b']
print(parse_or("abc|de"))        # ['abc', 'de']
print(parse_or("a|(bc)|d"))      # ['a', '(bc)', 'd']
print(parse_or("a|b|c"))         # ['a', 'b', 'c']
print(parse_or("a(b|c)d"))       # ['a(b|c)d']
print(parse_or("ab"))     # ['a', '(b|c)', 'd']


Parsed parts: ['a', 'b']
['a', 'b']
Parsed parts: ['abc', 'de']
['abc', 'de']
Parsed parts: ['a', '(bc)', 'd']
['a', '(bc)', 'd']
Parsed parts: ['a', 'b', 'c']
['a', 'b', 'c']
Parsed parts: ['a(b|c)d']
['a(b|c)d']
Parsed parts: ['ab']
['ab']
