Writing `jlox` from [Crafting Interpreters](https://craftinginterpreters.com/) in Python.

In [2]:
import sys
from enum import Enum

In [3]:
# Globals
hadError = False

TokenType = Enum('TokenType',[
    # Single-character tokens.
    'LEFT_PAREN', 'RIGHT_PAREN', 'LEFT_BRACE', 'RIGHT_BRACE',
    'COMMA', 'DOT', 'MINUS', 'PLUS', 'SEMICOLON', 'SLASH', 'STAR',

    # One or two character tokens.
    'BANG', 'BANG_EQUAL',
    'EQUAL', 'EQUAL_EQUAL',
    'GREATER', 'GREATER_EQUAL',
    'LESS', 'LESS_EQUAL',

    # Literals.
    'IDENTIFIER', 'STRING', 'NUMBER',

    # Keywords.
    'AND', 'CLASS', 'ELSE', 'FALSE', 'FUN', 'FOR', 'IF', 'NIL', 'OR',
    'PRINT', 'RETURN', 'SUPER', 'THIS', 'TRUE', 'VAR', 'WHILE',

    'EOF'])

In [4]:
keywords = {"and" : TokenType.AND,
            "class" : TokenType.CLASS,
            "else" : TokenType.ELSE,
            "false" : TokenType.FALSE,
            "for" : TokenType.FOR,
            "fun" : TokenType.FUN,
            "if" : TokenType.IF,
            "nil" : TokenType.NIL,
            "or" : TokenType.OR,
            "print" : TokenType.PRINT,
            "return" : TokenType.RETURN,
            "super" : TokenType.SUPER,
            "this" : TokenType.THIS,
            "true" : TokenType.TRUE,
            "var" : TokenType.VAR,
            "while" : TokenType.WHILE}

In [5]:
def main():
  if len(sys.args) > 1:
    print("Usage: pylox [script]")
    sys.exit(64)
  elif len(sys.args) == 1:
    runFile(sys.args[0])
  else:
    runPrompt()
  return

In [6]:
def runPrompt():
  while True:
    line = input("pylox> ")
    if len(line) == 0: break
    run(line)
    hadError = False
  return

In [7]:
def runFile(fname):
  lines = open(fname,'r').readlines()
  for line in lines:
    run(line)
    if (hadError): sys.exit(65)
  return

In [8]:
def run(source):
  scanner = Scanner(source)
  tokens = scanner.scanTokens()

  for token in tokens:
    print(token)
  return

In [9]:
def error(line,message): report(line,"",message)
def report(line,where,message):
  print("[line ",line,"]","Error",where,":",message)
  hadError = True
  return

In [10]:
class Token: # Could also use a dataclass or a named tuple
  def __init__(self,type,lexeme,literal,line):
    self.type = type
    self.lexeme = lexeme
    self.literal = literal
    self.line = line
    return

  def __repr__(self): return self.toString()

  def toString(self): return f"{self.type} {self.lexeme} {self.literal}"

In [11]:
Token(1,2,3,4)

1 2 3

In [12]:
class Scanner:
  def __init__(self,source):
    self.source = source
    self.start = 0
    self.current = 0
    self.line = 1
    self.tokens = []
    return

  def scanTokens(self):
    while not self.isAtEnd():
      self.start = self.current
      self.scanToken()
    self.tokens.append(Token(TokenType.EOF,"",None,self.line)) # Add EOF character
    return self.tokens

  def scanToken(self):
    # Use 3.10's match/case statement
    c = self.advance()
    match c:
      case '(': self.addToken(TokenType.LEFT_PAREN)
      case ')': self.addToken(TokenType.RIGHT_PAREN)
      case '{': self.addToken(TokenType.LEFT_BRACE)
      case '}': self.addToken(TokenType.RIGHT_BRACE)
      case ',': self.addToken(TokenType.COMMA)
      case '.': self.addToken(TokenType.DOT)
      case '-': self.addToken(TokenType.MINUS)
      case '+': self.addToken(TokenType.PLUS)
      case ';': self.addToken(TokenType.SEMICOLON)
      case '*': self.addToken(TokenType.STAR)
      case '!':
        token = TokenType.BANG_EQUAL if self.matchNext('=') else TokenType.BANG
        self.addToken(token)
      case '=':
        token = TokenType.EQUAL_EQUAL if self.matchNext('=') else TokenType.EQUAL
        self.addToken(token)
      case '<':
        token = TokenType.LESS_EQUAL if self.matchNext('=') else TokenType.LESS
        self.addToken(token)
      case '>':
        token = TokenType.GREATER_EQUAL if self.matchNext('=') else TokenType.GREATER
        self.addToken(token)
      case '/':
        if self.matchNext('/'):
          while (self.peek() != '\n') and (not self.isAtEnd()): self.advance()
        else:
          self.addToken(TokenType.SLASH)
      case ' ':  pass
      case '\r': pass
      case '\t': pass
      case '\n': self.line += 1
      case '"': self.string()
      case _:
        if self.isDigit(c):
          self.number()
        elif self.isAlpha(c):
          self.identifier()
        else:
          error(self.line,"Unexpected character.")
    return

  def advance(self):
    c = self.source[self.current]
    self.current += 1
    return c

  def peek(self):
    if self.isAtEnd(): return '\0'
    return self.source[self.current]

  def peekNext(self):
    if self.current+1 > len(self.source): return '\0'
    return self.source[self.current+1]

  def matchNext(self,expected):
    if self.isAtEnd(): return False
    if self.source[self.current] != expected: return False
    self.current += 1
    return True

  def addToken(self,atype,literal=None):
    text = self.source[self.start:self.current] # +1?
    self.tokens.append(Token(atype,text,literal,self.line))
    return

  def isAtEnd(self): return self.current >= len(self.source)
  def isDigit(self,c): return '0' <= c <= '9'

  def string(self):
    while (self.peek() != '"') and (not self.isAtEnd()):
      if self.peek() == '\n': self.line += 1
      self.advance()
    if self.isAtEnd():
      error(self.line,"Unterminated string.")
      return

    self.advance()

    # Trim surrounding quotes:
    value = self.source[self.start+1:self.current-1]
    self.addToken(TokenType.STRING,value)
    return

  def number(self):
    while self.isDigit(self.peek()): self.advance()

    # Look for a fractional part
    if self.peek() == '.' and self.isDigit(self.peekNext()):
      self.advance() # consume the "."
      while self.isDigit(self.peek()): self.advance()


    self.addToken(TokenType.NUMBER,float(self.source[self.start:self.current]))
    return

  def identifier(self):
    while self.isAlphaNumeric(self.peek()): self.advance()
    text = self.source[self.start:self.current]
    atype = keywords.get(text,TokenType.IDENTIFIER)
    self.addToken(atype,text)
    return

  def isAlpha(self,c): return ('a' <= c <= 'z') or ('A' <= c <= 'Z') or c=='_'
  def isAlphaNumeric(self,c): return self.isAlpha(c) or self.isDigit(c)





In [13]:
s = Scanner('Hello "World !" 14 14.1!')
s.scanTokens()

[TokenType.IDENTIFIER Hello Hello,
 TokenType.STRING "World !" World !,
 TokenType.NUMBER 14 14.0,
 TokenType.NUMBER 14.1 14.1,
 TokenType.BANG ! None,
 TokenType.EOF  None]

In [14]:
s = Scanner("for(i=1, i<10, i++) print(i)")
s.scanTokens()

[TokenType.FOR for for,
 TokenType.LEFT_PAREN ( None,
 TokenType.IDENTIFIER i i,
 TokenType.EQUAL = None,
 TokenType.NUMBER 1 1.0,
 TokenType.COMMA , None,
 TokenType.IDENTIFIER i i,
 TokenType.LESS < None,
 TokenType.NUMBER 10 10.0,
 TokenType.COMMA , None,
 TokenType.IDENTIFIER i i,
 TokenType.PLUS + None,
 TokenType.PLUS + None,
 TokenType.RIGHT_PAREN ) None,
 TokenType.PRINT print print,
 TokenType.LEFT_PAREN ( None,
 TokenType.IDENTIFIER i i,
 TokenType.RIGHT_PAREN ) None,
 TokenType.EOF  None]