In [7]:
import re

class TokenType:
    INTEGER = 'INTEGER'
    BOOLEAN = 'BOOLEAN'
    PLUS = 'PLUS'
    MINUS = 'MINUS'
    MULTIPLY = 'MULTIPLY'
    DIVIDE = 'DIVIDE'
    ASSIGN = 'ASSIGN'
    EQUALITY = 'EQUALITY'
    INEQUALITY = 'INEQUALITY'
    IF = 'IF'
    ELSE = 'ELSE'
    PRINT = 'PRINT'
    TRUE = 'TRUE'
    FALSE = 'FALSE'
    IDENTIFIER = 'IDENTIFIER'
    COMMENT = 'COMMENT'
    ERROR = 'ERROR'

class Token:
    def __init__(self, type, lexeme):
        self.type = type
        self.lexeme = lexeme

class Scanner:
    def __init__(self, filename):
        self.filename = filename
        self.tokens = []

    def scan(self):
        with open(self.filename, 'r') as file:
            lines = file.readlines()
            for line_num, line in enumerate(lines):
                line = line.strip()
                tokens = re.findall(r'([a-zA-Z][a-zA-Z0-9]*|==|!=|//|[\+\-\*\/=])|\d+|true|false', line)
                for token in tokens:
                    if token.isdigit():
                        self.tokens.append(Token(TokenType.INTEGER, token))
                    elif token == 'true':
                        self.tokens.append(Token(TokenType.BOOLEAN, token))
                    elif token == 'false':
                        self.tokens.append(Token(TokenType.BOOLEAN, token))
                    elif token == '//':
                        self.tokens.append(Token(TokenType.COMMENT, line[line.index(token):]))
                        break
                    elif token in ('+', '-', '*', '/', '=', '==', '!='):
                        self.tokens.append(Token(token.upper(), token))
                    elif token == 'if':
                        self.tokens.append(Token(TokenType.IF, token))
                    elif token == 'else':
                        self.tokens.append(Token(TokenType.ELSE, token))
                    elif token == 'print':
                        self.tokens.append(Token(TokenType.PRINT, token))
                    elif re.match(r'^[a-zA-Z][a-zA-Z0-9]*$', token):
                        self.tokens.append(Token(TokenType.IDENTIFIER, token))
                    else:
                        self.tokens.append(Token(TokenType.ERROR, f'Lexical error at line {line_num+1}: Invalid token "{token}"'))
                        break

    def get_tokens(self):
        return self.tokens

# Test the Scanner
if __name__ == "__main__":
    scanner = Scanner("minilang_code.mini")
    scanner.scan()
    tokens = scanner.get_tokens()
    for token in tokens:
        print(f'Type: {token.type}, Lexeme: {token.lexeme}')


Type: COMMENT, Lexeme: // MiniLang source code example
Type: COMMENT, Lexeme: // Variable assignments
Type: IDENTIFIER, Lexeme: x
Type: =, Lexeme: =
Type: ERROR, Lexeme: Lexical error at line 4: Invalid token ""
Type: IDENTIFIER, Lexeme: y
Type: =, Lexeme: =
Type: ERROR, Lexeme: Lexical error at line 5: Invalid token ""
Type: COMMENT, Lexeme: // Basic arithmetic operations
Type: IDENTIFIER, Lexeme: sum
Type: =, Lexeme: =
Type: IDENTIFIER, Lexeme: x
Type: +, Lexeme: +
Type: IDENTIFIER, Lexeme: y
Type: IDENTIFIER, Lexeme: difference
Type: =, Lexeme: =
Type: IDENTIFIER, Lexeme: x
Type: -, Lexeme: -
Type: IDENTIFIER, Lexeme: y
Type: IDENTIFIER, Lexeme: product
Type: =, Lexeme: =
Type: IDENTIFIER, Lexeme: x
Type: *, Lexeme: *
Type: IDENTIFIER, Lexeme: y
Type: IDENTIFIER, Lexeme: quotient
Type: =, Lexeme: =
Type: IDENTIFIER, Lexeme: x
Type: /, Lexeme: /
Type: IDENTIFIER, Lexeme: y
Type: COMMENT, Lexeme: // Boolean variables
Type: IDENTIFIER, Lexeme: is
Type: IDENTIFIER, Lexeme: greater
Type: