In [2]:
class State:
    def __init__(self, name, accepting=False):
        self.name = name
        self.transitions = {}
        self.accepting = False

    def add_transition(self, symbol, target_state):
        # Add a transition from this state to the target state
        # on the given symbol. If a transition on the given
        # symbol already exists, it is replaced.
        self.transitions[symbol] = target_state
        return target_state        

In [None]:
import json
class NFA:
    def __init__(self,states, starting_state=None, accepting_states=[]):
        self.states = states
        self.starting_state = starting_state
        self.accepting_states= accepting_states    

In [79]:
import json

class NFA:
    def __init__(self, states, starting_state=None, accepting_states=[]):
        self.states = states
        self.starting_state = starting_state
        self.accepting_states = accepting_states

class State:
    def __init__(self, name, accepting=False):
        self.name = name
        self.transitions = {}  # Dictionary: {symbol: [target_states]}
        self.accepting = accepting

    def add_transition(self, symbol, target_state):
        if symbol not in self.transitions:
            self.transitions[symbol] = []
        self.transitions[symbol].append(target_state)

def new_state(state_counter, accepting=False):
    state = State(f"S{state_counter}", accepting)
    return state

def handle_character(stack, state_counter, symbol):
    start = new_state(state_counter)
    end = new_state(state_counter + 1, True)
    start.add_transition(symbol, end)
    stack.append(NFA([start, end], start, [end]))
    return state_counter + 2

def handle_concatenation(stack, state_counter):
    if len(stack) < 2:
        raise ValueError("Invalid postfix expression for concatenation")
    nfa2 = stack.pop()
    nfa1 = stack.pop()
    for accepting_state in nfa1.accepting_states:
        accepting_state.accepting = False
        accepting_state.add_transition('ε', nfa2.starting_state)
    combined_nfa = NFA(nfa1.states + nfa2.states, nfa1.starting_state, nfa2.accepting_states)
    stack.append(combined_nfa)
    return combined_nfa,state_counter

def handle_or(stack, state_counter):
    nfa2 = stack.pop()
    nfa1 = stack.pop()
    start = new_state(state_counter)
    end = new_state(state_counter + 1, True)

    start.add_transition('ε', nfa1.starting_state)
    start.add_transition('ε', nfa2.starting_state)

    for accepting_state in nfa1.accepting_states + nfa2.accepting_states:
        accepting_state.accepting = False
        accepting_state.add_transition('ε', end)

    combined_nfa = NFA(nfa1.states + nfa2.states + [start, end], start, [end])
    stack.append(combined_nfa)
    return state_counter + 2

def handle_kleene_star(stack, state_counter):
    nfa = stack.pop()
    start = new_state(state_counter)
    end = new_state(state_counter + 1, True)

    start.add_transition('ε', nfa.starting_state)
    start.add_transition('ε', end)

    for accepting_state in nfa.accepting_states:
        accepting_state.accepting = False
        accepting_state.add_transition('ε', end)
        accepting_state.add_transition('ε', nfa.starting_state)

    combined_nfa = NFA(nfa.states + [start, end], start, [end])
    stack.append(combined_nfa)
    return state_counter + 2

def handle_one_or_more(stack, state_counter):
    nfa = stack.pop()
    start = new_state(state_counter)
    end = new_state(state_counter + 1, True)

    start.add_transition('ε', nfa.starting_state)

    for accepting_state in nfa.accepting_states:
        accepting_state.accepting = False
        accepting_state.add_transition('ε', end)
        accepting_state.add_transition('ε', nfa.starting_state)

    combined_nfa = NFA(nfa.states + [start, end], start, [end])
    stack.append(combined_nfa)
    return state_counter + 2

def handle_zero_or_one(stack, state_counter):
    nfa = stack.pop()
    start = new_state(state_counter)
    end = new_state(state_counter + 1, True)

    start.add_transition('ε', nfa.starting_state)
    start.add_transition('ε', end)

    for accepting_state in nfa.accepting_states:
        accepting_state.accepting = False
        accepting_state.add_transition('ε', end)

    combined_nfa = NFA(nfa.states + [start, end], start, [end])
    stack.append(combined_nfa)
    return state_counter + 2

def construct_nfa(postfix_regex):
    state_counter = 0
    stack = []

    for symbol in postfix_regex:
        if symbol.isalnum():
            state_counter = handle_character(stack, state_counter, symbol)
        elif symbol == '.':
            state_counter=handle_concatenation(stack, state_counter)
        elif symbol == '|':
            state_counter = handle_or(stack, state_counter)
        elif symbol == '*':
            state_counter = handle_kleene_star(stack, state_counter)
        elif symbol == '+':
            state_counter = handle_one_or_more(stack, state_counter)
        elif symbol == '?':
            state_counter = handle_zero_or_one(stack, state_counter)
        else:
            raise ValueError(f"Invalid symbol in postfix regex: {symbol}")

    return stack.pop()

def nfa_to_json(nfa):
    nfa_dict = {
        "startingState": nfa.starting_state.name,
    }

    for state in nfa.states:
        transitions = {symbol: [target.name for target in targets] for symbol, targets in state.transitions.items()}
        if state.accepting:
            transitions["isTerminatingState"] = True
        nfa_dict[state.name] = transitions

    return json.dumps(nfa_dict, indent=6, ensure_ascii=False)

# Example usage
postfix_regex = "ab|c|d|e|f|g|?o+."
postfix_regex="ab+."
nfa = construct_nfa(postfix_regex)
print(nfa_to_json(nfa))

{
      "startingState": "S0",
      "S0": {
            "a": [
                  "S1"
            ]
      },
      "S1": {
            "ε": [
                  "S4"
            ]
      },
      "S2": {
            "b": [
                  "S3"
            ]
      },
      "S3": {
            "ε": [
                  "S5",
                  "S2"
            ]
      },
      "S4": {
            "ε": [
                  "S2"
            ]
      },
      "S5": {
            "isTerminatingState": true
      }
}


In [39]:
regex = "a|b"
nfa = NFA()
print(nfa.construct_or(regex))

Parsed parts: ['a', 'b']
{
    "startingState": "S0",
    "S0": {
        "isTerminatingState": false,
        "epsilon": [
            "S2",
            "S4"
        ]
    },
    "S1": {
        "isTerminatingState": true
    },
    "S2": {
        "isTerminatingState": false,
        "a": [
            "S3"
        ]
    },
    "S3": {
        "isTerminatingState": false,
        "epsilon": [
            "S1"
        ]
    },
    "S4": {
        "isTerminatingState": false,
        "b": [
            "S5"
        ]
    },
    "S5": {
        "isTerminatingState": false,
        "epsilon": [
            "S1"
        ]
    }
}


In [None]:
def parse_or(expression):
    parts = []
    current = []
    depth = 0 
    
    for i, char in enumerate(expression):
        if char == '(':
            depth += 1
        elif char == ')':
            depth -= 1
        elif char == '|' and depth == 0:
            # Split only if outside parentheses
            parts.append(''.join(current))
            current = []
            continue
        
        current.append(char)

    parts.append(''.join(current))
    
    print("Parsed parts:", parts)
    return parts

# ✅ Test cases
print(parse_or("a|b"))           # ['a', 'b']
print(parse_or("abc|de"))        # ['abc', 'de']
print(parse_or("a|(bc)|d"))      # ['a', '(bc)', 'd']
print(parse_or("a|b|c"))         # ['a', 'b', 'c']
print(parse_or("a(b|c)d"))       # ['a(b|c)d']
print(parse_or("ab"))     # ['a', '(b|c)', 'd']


Parsed parts: ['a', 'b']
['a', 'b']
Parsed parts: ['abc', 'de']
['abc', 'de']
Parsed parts: ['a', '(bc)', 'd']
['a', '(bc)', 'd']
Parsed parts: ['a', 'b', 'c']
['a', 'b', 'c']
Parsed parts: ['a(b|c)d']
['a(b|c)d']
Parsed parts: ['ab']
['ab']


# Shunting Yard Algorithm

convert infix to postfix


In [77]:
import re

def replace_square(regex):
    i = 0
    regex = list(regex)
    while(i < len(regex)):
        if regex[i] == '[':
            regex[i] = '('
            while(regex[i] != ']'):
                if regex[i] == '-':
                    first = regex[i-1]
                    last = regex[i+1]
                    chars_between = [chr(i) for i in range(ord(first) + 1, ord(last)+1)]
                    str = []
                    for c in chars_between:
                        str.append("|")
                        str.append(c)
                    str.append('|')
                    regex[i:i+2] = str
                else : 
                    # print(regex[i],regex[i+1])
                    if regex[i] != '(' and regex[i] != '|' and regex[i+1] != '|' and regex[i+1] != '-': 
                        regex.insert(i+1, '|')
                i += 1
            regex[i-1:i+1] = ')'
        else :
            i = i + 1
    return ''.join(regex)


def replace_dot(regex):
    result = []
    regex = ''.join(regex)
    
    for curr in regex:
        if curr == '.':
            result.append("[a-zA-Z0-9]")
        else : 
            result.append(curr)
    
    return ''.join(result)


def add_concat(regex):
    result = []
    prev = None

    for curr in regex:
        if prev and (prev.isalnum() or prev in ('*', '+', ')','?')) and (curr.isalnum() or curr == '(' ):
            result.append('·')  
        result.append(curr)
        prev = curr
    return ''.join(result)

def preprocessing(regex):

    regex = replace_dot(regex)
    print("\nRegex after replacing dot  : ")
    print(regex)
    regex = replace_square(regex)
    print("\nRegex after replacing square brackets  : ")
    print(regex)
    # regex = add_concat(regex)
    print("\nRegex after adding concatination  : ")
    print(regex)    
    return regex



def infix_to_postfix(infix):
    try:
        re.compile(infix)
    except re.error:
        print("Invalid regex")
        return None
    # Add implicit concatenation operator ('.')
    output = []
    for i, char in enumerate(infix):
        output.append(char)
        
        if i < len(infix) - 1:
            next_char = infix[i + 1]
            if char not in "(|." and next_char not in ")*+?|)":
                output.append('.')
    infix = output

    precedence = { '*' : 5, '+' : 4, '?' : 3 , '.' : 2 , '|' : 1 , '(' : 0 }

    infix = list(infix)
    stack = []
    postfix = []
    for i,char in enumerate(infix):
        
        if char == '(':
            stack.append(char)
        
        elif char == ")":
            stack_top = stack.pop()
            while stack_top != "(":
                postfix.append(stack_top)
                stack_top = stack.pop()
        
        elif char in ['*','+','?','.','|']:
            stack_top = None
            if len(stack) > 0:
                stack_top = stack.pop()
            while stack_top != None  and precedence[char] <= precedence[stack_top]:
                postfix.append(stack_top)
                if len(stack) != 0:
                    stack_top = stack.pop()
                else :
                    break
            if stack_top != None and precedence[char] > precedence[stack_top]:
                stack.append(stack_top)
            stack.append(char)
        
        else:
            postfix.append(char)
                        
    
    while stack:
        top = stack.pop()
        if top == '(':
            print("Unbalanced parentheses")
            return None
        postfix.append(top)
    
    return ''.join(postfix)


infix = "[a-g]?o+"
# infix="a+b"
preprocessed_infix = preprocessing(infix)
print("\nPOSTFIX :")
print(infix_to_postfix(preprocessed_infix))



Regex after replacing dot  : 
[a-g]?o+

Regex after replacing square brackets  : 
(a|b|c|d|e|f|g)?o+

Regex after adding concatination  : 
(a|b|c|d|e|f|g)?o+

POSTFIX :
ab|c|d|e|f|g|?o+.


a+b.
