# Milestone 1 & 2

In [8]:
import re

class Tokenizer:
    def __init__(self):
        self.operators = set(['+', '-', '*', '/'])
        self.functions = set(['sin', 'cos', 'tan', 'sqrt'])
        self.variables = set(['x', 'y', 'z'])
        self.tokens = []

    def tokenize(self, expression):
        # Clear previous tokens
        self.tokens.clear()
        # Remove whitespaces and comments
        expression = re.sub(r'#.*$', '', expression)  # Remove comments
        expression = re.sub(r'\s+', '', expression)  # Remove whitespaces
        # Initialize variables for tracking index and token
        index = 0
        open_brackets = 0
        while index < len(expression):
            # Check for operators
            if expression[index] in self.operators:
                self.tokens.append(expression[index])
                index += 1
            # Check for numbers
            elif expression[index].isdigit():
                num_str = ''
                while index < len(expression) and expression[index].isdigit():
                    num_str += expression[index]
                    index += 1
                self.tokens.append(int(num_str))
            # Check for variables
            elif expression[index] in self.variables:
                self.tokens.append(expression[index])
                index += 1
            # Check for functions
            elif any(expression[index:].startswith(func) for func in self.functions):
                for func in self.functions:
                    if expression[index:].startswith(func):
                        self.tokens.append(func)
                        index += len(func)
                        break
            # Check for opening parentheses
            elif expression[index] == '(':
                open_brackets += 1
                self.tokens.append(expression[index])
                index += 1
            # Check for closing parentheses
            elif expression[index] == ')':
                # Check if there's a corresponding open bracket
                if open_brackets > 0:
                    open_brackets -= 1
                    self.tokens.append(expression[index])
                    index += 1
                else:
                    print("Error: Closing bracket ')' without corresponding open bracket.")
                    return None
            # Handle invalid characters
            else:
                print("Error: Invalid character '{}'".format(expression[index]))
                return None
        # Check for any remaining open brackets
        if open_brackets > 0:
            print("Error: {} open bracket(s) '(' without corresponding closing bracket ')'.".format(open_brackets))
            return None
        return self.tokens

# Test the Tokenizer
def test_tokenizer():
    tokenizer = Tokenizer()
    expression = input("Enter an arithmetic expression: ")
    tokens = tokenizer.tokenize(expression)
    if tokens is not None:
        print("Tokens:", tokens)

test_tokenizer()


Error: Closing bracket ')' without corresponding open bracket.
