In [1]:
import re

# Define token patterns
TOKEN_PATTERNS = [
    ("KEYWORD", r"\b(if|else|while|for|return|int|float|char|void)\b"),
    ("IDENTIFIER", r"\b[a-zA-Z_][a-zA-Z0-9_]*\b"),
    ("NUMBER", r"\b\d+(\.\d+)?\b"),
    ("OPERATOR", r"[+\-*/=<>!]+"),
    ("SEPARATOR", r"[(),;{}\[\]]"),
    ("WHITESPACE", r"\s+"),
    ("UNKNOWN", r".")
]

# Compile patterns
TOKEN_REGEX = [(name, re.compile(pattern)) for name, pattern in TOKEN_PATTERNS]

def lexical_analyzer(code):
    tokens = []
    position = 0

    while position < len(code):
        match_found = False

        for token_name, token_regex in TOKEN_REGEX:
            match = token_regex.match(code, position)
            if match:
                match_text = match.group(0)
                if token_name != "WHITESPACE":  # Ignore whitespace tokens
                    tokens.append((token_name, match_text))
                position = match.end()
                match_found = True
                break

        if not match_found:
            raise ValueError(f"Unexpected token at position {position}: {code[position]}")

    return tokens

# Test the lexical analyzer
if __name__ == "__main__":
    sample_code = "int x = 10; if (x > 5) x = x + 1;"
    print("Source Code:")
    print(sample_code)
    print("\nTokens:")
    for token in lexical_analyzer(sample_code):
        print(token)


Source Code:
int x = 10; if (x > 5) x = x + 1;

Tokens:
('KEYWORD', 'int')
('IDENTIFIER', 'x')
('OPERATOR', '=')
('NUMBER', '10')
('SEPARATOR', ';')
('KEYWORD', 'if')
('SEPARATOR', '(')
('IDENTIFIER', 'x')
('OPERATOR', '>')
('NUMBER', '5')
('SEPARATOR', ')')
('IDENTIFIER', 'x')
('OPERATOR', '=')
('IDENTIFIER', 'x')
('OPERATOR', '+')
('NUMBER', '1')
('SEPARATOR', ';')
