In [1]:
import re

class TokenType:
    KEYWORD = 'KEYWORD'
    IDENTIFIER = 'IDENTIFIER'
    LITERAL_INT = 'LITERAL_INT'
    LITERAL_BOOL = 'LITERAL_BOOL'
    ADD_OP = 'ADD_OP'
    SUB_OP = 'SUB_OP'
    MUL_OP = 'MUL_OP'
    DIV_OP = 'DIV_OP'
    EQ_OP = 'EQ_OP'
    ASSIGN_OP = 'ASSIGN_OP'
    NEQ_OP = 'NEQ_OP'
    ERROR = 'ERROR'
    NEWLINE = 'NEWLINE'

class Token:
    def __init__(self, token_type, lexeme):
        self.token_type = token_type
        self.lexeme = lexeme

class Scanner:
    def __init__(self, source_code):
        self.source_code = source_code
        self.current_index = 0
        self.keywords = {'if', 'else', 'print', 'true', 'false'}

    def scan_tokens(self):
        tokens = []

        while self.current_index < len(self.source_code):
            char = self.source_code[self.current_index]

            if char.isdigit():
                tokens.append(self.scan_literal_int())
            elif char.isalpha():
                tokens.append(self.scan_identifier_or_keyword())
            elif char == '+':
                tokens.append(Token(TokenType.ADD_OP, char))
                self.current_index += 1
            elif char == '-':
                tokens.append(Token(TokenType.SUB_OP, char))
                self.current_index += 1
            elif char == '*':
                tokens.append(Token(TokenType.MUL_OP, char))
                self.current_index += 1
            elif char == '/':
                tokens.append(Token(TokenType.DIV_OP, char))
                self.current_index += 1
            elif char == '=':
                if self.peek_next() == '=':
                    tokens.append(Token(TokenType.EQ_OP, '=='))
                    self.current_index += 2
                else:
                    tokens.append(Token(TokenType.ASSIGN_OP, char))
                    self.current_index += 1
            elif char == '!':
                if self.peek_next() == '=':
                    tokens.append(Token(TokenType.NEQ_OP, '!='))
                    self.current_index += 2
                else:
                    self.report_error("Unexpected character '!'")
                    self.current_index += 1
            elif char.isspace():
                self.current_index += 1
            elif char == '\n':
                tokens.append(Token(TokenType.NEWLINE, char))
                self.current_index += 1
            elif char == '/':
                if self.peek_next() == '/':
                    self.skip_comment()
                else:
                    self.report_error("Unexpected character '/'")
                    self.current_index += 1
            else:
                self.report_error(f"Invalid character '{char}'")
                self.current_index += 1

        return tokens

    def scan_literal_int(self):
        start_index = self.current_index
        while self.current_index < len(self.source_code) and self.source_code[self.current_index].isdigit():
            self.current_index += 1
        lexeme = self.source_code[start_index:self.current_index]
        return Token(TokenType.LITERAL_INT, lexeme)

    def scan_identifier_or_keyword(self):
        start_index = self.current_index
        while self.current_index < len(self.source_code) and (self.source_code[self.current_index].isalnum() or self.source_code[self.current_index] == '_'):
            self.current_index += 1
        lexeme = self.source_code[start_index:self.current_index]
        token_type = TokenType.KEYWORD if lexeme in self.keywords else TokenType.IDENTIFIER
        return Token(token_type, lexeme)

    def peek_next(self):
        if self.current_index + 1 < len(self.source_code):
            return self.source_code[self.current_index + 1]
        else:
            return ''

    def skip_comment(self):
        while self.current_index < len(self.source_code) and self.source_code[self.current_index] != '\n':
            self.current_index += 1

    def report_error(self, message):
        print(f"Lexical error: {message} at position {self.current_index}")

# Example usage:
source_code = """
// Simple addition
a = 5 + 3
print a
"""
scanner = Scanner(source_code)
tokens = scanner.scan_tokens()

for token in tokens:
    print(f"{token.token_type}: {token.lexeme}")


DIV_OP: /
DIV_OP: /
IDENTIFIER: Simple
IDENTIFIER: addition
IDENTIFIER: a
ASSIGN_OP: =
LITERAL_INT: 5
ADD_OP: +
LITERAL_INT: 3
KEYWORD: print
IDENTIFIER: a
