In [1]:
import re  # Import the regular expression module
import sys  # Import the sys module for system-related functionality


In [2]:
class Node:
    def __init__(self, value, children=None):
        # Initialize a Node with a value and children (default to an empty list if not provided)
        self.value = value
        self.children = children or []

    def to_dict(self):
        # Convert the Node and its children to a dictionary representation
        if not self.children:
            return {"type": "NumericLiteral", "value": self.value}
        elif len(self.children) == 1:
            return {"type": "UnaryExpression", "op": self.value, "argument": self.children[0].to_dict()}
        elif len(self.children) == 2:
            return {"type": "BinaryExpression", "op": self.value, "left": self.children[0].to_dict(), "right": self.children[1].to_dict()}

def tokenization(input_str):
    # Split the input string into a list of tokens
    return input_str.split()

def is_parenthesis(ch):
    # Check if the character is a parenthesis
    return ch == '(' or ch == ')'

def is_operator(ch):
    # Check if the character is an operator
    return ch in ['+', '-', '*', '/']

def is_number(s):
    # Check if the string represents a number
    try:
        float(s)
        return True
    except ValueError:
        return False


In [3]:
def parse_expr(tokens):
    # expr -> - expr
    if tokens[0] == '-' and is_operator(tokens[0]):
        tokens.pop(0)  # Consume the -
        if tokens[0] == '-':
            return None
        return Node('-', [parse_expr(tokens)])

    # expr -> ( expr )
    if tokens[0] == '(':
        tokens.pop(0)  # Consume the (
        inner_expr = parse_expr(tokens)
        if not inner_expr or not tokens or tokens[0] != ')':
            return None  # Missing )
        tokens.pop(0)  # Consume the )
        if tokens and is_operator(tokens[0]):
            # expr -> expr op expr
            operator = tokens.pop(0)
            return Node(operator, [inner_expr, parse_expr(tokens)])
        return inner_expr

    # expr -> id
    if is_number(tokens[0]):
        if tokens[0] == '-' and is_operator(tokens[0]):
            tokens.pop(0)  # Consume the -
            if tokens[0] == '-':
                return None
            return Node('-', [parse_expr(tokens)])

        value = tokens.pop(0)  # Consume the id
        if tokens and is_operator(tokens[0]):
            # expr -> expr op expr
            operator = tokens.pop(0)
            return Node(operator, [Node(value), parse_expr(tokens)])
        else:
            return Node(value)

    return None  # Token is not an identifier or a valid expression

def syntax_analyzer(tokens):
    # Check if the tokens form a valid expression and there are no leftover tokens
    return parse_expr(tokens) and not tokens

def print_ast_dict(parsed_dict, indent=0):
    # Print the abstract syntax tree (AST) represented by the parsed dictionary
    for key, value in parsed_dict.items():
        if isinstance(value, dict):
            print(" " * indent + f"{key}:")
            print_ast_dict(value, indent + 2)
        else:
            print(" " * indent + f"{key}: {value}")


In [4]:
def main():
    # Main program loop
    while True:
        # Get user input for a string
        value = input("Enter a String (Type 'exit' to terminate): ")

        # Check if the user wants to exit
        if value.lower() == "exit":
            break

        # Tokenize the input string
        tokens = tokenization(value)

        # Print header for Lexical Analysis
        print("------------------------")
        print("Lexical Analysis : ")
        print("------------------------")

        # Print each token and its type
        for token in tokens:
            print(token, end=' ')
            if is_number(token):
                print("Is a Value")
            elif is_operator(token):
                print("Is an Operator")
            elif is_parenthesis(token):
                print("Is a Parenthesis")
            else:
                print("Not accepted")

        # Parse the expression and check if it is accepted or rejected
        ast = parse_expr(tokens)
        print("String Accepted" if ast else "String Rejected")

        # Print header for Syntax Analysis
        print("------------------------")
        print("Syntax Analysis : ")
        print("------------------------")

        # If the string is accepted, print the parsed abstract syntax tree (AST)
        if ast:
            print("Parsed value:")
            parsed_dict = ast.to_dict()
            print_ast_dict(parsed_dict)

if __name__ == "__main__":
    # Run the main function if the script is executed directly
    main()


------------------------
Lexical Analysis : 
------------------------
( Is a Parenthesis
2 Is a Value
+ Is an Operator
3 Is a Value
) Is a Parenthesis
- Is an Operator
4 Is a Value
* Is an Operator
7 Is a Value
/ Is an Operator
5 Is a Value
String Accepted
------------------------
Syntax Analysis : 
------------------------
Parsed value:
type: BinaryExpression
op: -
left:
  type: BinaryExpression
  op: +
  left:
    type: NumericLiteral
    value: 2
  right:
    type: NumericLiteral
    value: 3
right:
  type: BinaryExpression
  op: *
  left:
    type: NumericLiteral
    value: 4
  right:
    type: BinaryExpression
    op: /
    left:
      type: NumericLiteral
      value: 7
    right:
      type: NumericLiteral
      value: 5
------------------------
Lexical Analysis : 
------------------------


IndexError: list index out of range