In [2]:
import re

class TokenType:
    INTEGER = 'INTEGER'
    BOOLEAN = 'BOOLEAN'
    OPERATOR = 'OPERATOR'
    ASSIGNMENT = 'ASSIGNMENT'
    EQUALITY = 'EQUALITY'
    INEQUALITY = 'INEQUALITY'
    KEYWORD = 'KEYWORD'
    IDENTIFIER = 'IDENTIFIER'
    PRINT = 'PRINT'
    TRUE = 'TRUE'
    FALSE = 'FALSE'
    COMMENT = 'COMMENT'
    ERROR = 'ERROR'

class Token:
    def __init__(self, token_type, lexeme):
        self.token_type = token_type
        self.lexeme = lexeme

def scan(filename):
    tokens = []
    keywords = {'if', 'else', 'print', 'true', 'false'}
    operators = {'+', '-', '*', '/', '=', '==', '!='}
    with open(filename, 'r') as file:
        lines = file.readlines()
        for line_num, line in enumerate(lines):
            line = line.strip()
            if line.startswith('//'):
                tokens.append(Token(TokenType.COMMENT, line))
                continue
            tokens.extend(scan_line(line, line_num + 1, keywords, operators))
    return tokens

def scan_line(line, line_num, keywords, operators):
    tokens = []
    i = 0
    while i < len(line):
        if line[i].isspace():
            i += 1
            continue
        elif line[i].isdigit():
            token = scan_integer(line, i, line_num)
            tokens.append(token)
            i = token_end_position(token, i)
        elif line[i].isalpha() or line[i] == '_':
            token = scan_identifier_or_keyword(line, i, line_num, keywords)
            tokens.append(token)
            i = token_end_position(token, i)
        elif line[i] in operators:
            tokens.append(Token(TokenType.OPERATOR, line[i]))
            i += 1
        elif line[i] == '=':
            if i + 1 < len(line) and line[i + 1] == '=':
                tokens.append(Token(TokenType.EQUALITY, '=='))
                i += 2
            else:
                tokens.append(Token(TokenType.ASSIGNMENT, line[i]))
                i += 1
        elif line[i] == '!':
            if i + 1 < len(line) and line[i + 1] == '=':
                tokens.append(Token(TokenType.INEQUALITY, '!='))
                i += 2
            else:
                tokens.append(Token(TokenType.ERROR, f"Invalid symbol '!' at line {line_num}"))
                i += 1
        else:
            tokens.append(Token(TokenType.ERROR, f"Invalid symbol '{line[i]}' at line {line_num}"))
            i += 1
    return tokens

def scan_integer(line, start, line_num):
    integer_pattern = re.compile(r'\d+')
    match = integer_pattern.match(line, start)
    lexeme = match.group()
    return Token(TokenType.INTEGER, lexeme)

def scan_identifier_or_keyword(line, start, line_num, keywords):
    identifier_pattern = re.compile(r'[a-zA-Z_]\w*')
    match = identifier_pattern.match(line, start)
    lexeme = match.group()
    if lexeme in keywords:
        return Token(TokenType.KEYWORD, lexeme)
    elif lexeme == 'print':
        return Token(TokenType.PRINT, lexeme)
    elif lexeme == 'true':
        return Token(TokenType.TRUE, lexeme)
    elif lexeme == 'false':
        return Token(TokenType.FALSE, lexeme)
    else:
        return Token(TokenType.IDENTIFIER, lexeme)

def token_end_position(token, start):
    return start + len(token.lexeme)

# Test the scanner
filename = 'test.minilang'
tokens = scan(filename)
for token in tokens:
    print(f"{token.token_type}: {token.lexeme}")

IDENTIFIER: a
OPERATOR: =
INTEGER: 10
IDENTIFIER: b
OPERATOR: =
INTEGER: 20
KEYWORD: if
IDENTIFIER: a
OPERATOR: =
OPERATOR: =
IDENTIFIER: b
KEYWORD: print
KEYWORD: true
KEYWORD: else
KEYWORD: print
KEYWORD: false
