Skip to content

Commit

Permalink
Add Clean lexer (#1305)
Browse files Browse the repository at this point in the history
This commit adds a lexer for the Clean language.
  • Loading branch information
camilstaps authored and pyrmont committed Aug 22, 2019
1 parent 8533fea commit 6ad8346
Show file tree
Hide file tree
Showing 4 changed files with 308 additions and 0 deletions.
6 changes: 6 additions & 0 deletions lib/rouge/demos/clean
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
delete :: !a !.(Set a) -> Set a | < a
delete x Tip = Tip
delete x (Bin _ y l r)
| x < y = balanceR y (delete x l) r
| x > y = balanceL y l (delete x r)
| otherwise = glue l r
156 changes: 156 additions & 0 deletions lib/rouge/lexers/clean.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class Clean < RegexLexer
title "Clean"
desc "The Clean programming language (clean.cs.ru.nl)"

tag 'clean'
filenames '*.dcl', '*.icl'

def self.keywords
@keywords ||= Set.new %w(
if otherwise
let in
with where
case of
infix infixl infixr
class instance
generic derive
special
implementation definition system module
from import qualified as
dynamic
code inline foreign export ccall stdcall
)
end

# These are literal patterns common to the ABC intermediate language and
# Clean. Clean has more extensive literal patterns (see :basic below).
state :common_literals do
rule %r/'(?:[^'\\]|\\(?:x[0-9a-fA-F]+|\d+|.))'/, Str::Char

rule %r/[+~-]?\d+\.\d+(?:E[+-]?\d+)?\b/, Num::Float
rule %r/[+~-]?\d+E[+-]?\d+\b/, Num::Float
rule %r/[+~-]?\d+/, Num::Integer

rule %r/"/, Str::Double, :string
end

state :basic do
rule %r/\s+/m, Text::Whitespace

rule %r/\/\/\*.*/, Comment::Doc
rule %r/\/\/.*/, Comment::Single
rule %r/\/\*\*/, Comment::Doc, :comment_doc
rule %r/\/\*/, Comment::Multiline, :comment

rule %r/[+~-]?0[0-7]+/, Num::Oct
rule %r/[+~-]?0x[0-9a-fA-F]+/, Num::Hex
mixin :common_literals
rule %r/(\[)(\s*)(')(?=.*?'\])/ do
groups Punctuation, Text::Whitespace, Str::Single, Punctuation
push :charlist
end
end

# nested commenting
state :comment_doc do
rule %r/\*\//, Comment::Doc, :pop!
rule %r/\/\/.*/, Comment::Doc # Singleline comments in multiline comments are skipped
rule %r/\/\*/, Comment::Doc, :comment
rule %r/[^*\/]+/, Comment::Doc
rule %r/[*\/]/, Comment::Doc
end

# This is the same as the above, but with Multiline instead of Doc
state :comment do
rule %r/\*\//, Comment::Multiline, :pop!
rule %r/\/\/.*/, Comment::Multiline # Singleline comments in multiline comments are skipped
rule %r/\/\*/, Comment::Multiline, :comment
rule %r/[^*\/]+/, Comment::Multiline
rule %r/[*\/]/, Comment::Multiline
end

state :root do
mixin :basic

rule %r/code(\s+inline)?\s*{/, Comment::Preproc, :abc

rule %r/_*[a-z][\w_`]*/ do |m|
if self.class.keywords.include?(m[0])
token Keyword
else
token Name
end
end

rule %r/_*[A-Z][\w_`]*/ do |m|
if m[0]=='True' || m[0]=='False'
token Keyword::Constant
else
token Keyword::Type
end
end

rule %r/[^\w_\s`]/, Punctuation
rule %r/_\b/, Punctuation
end

state :escapes do
rule %r/\\x[0-9a-fA-F]{1,2}/i, Str::Escape
rule %r/\\d\d{0,3}/i, Str::Escape
rule %r/\\0[0-7]{0,3}/, Str::Escape
rule %r/\\[0-7]{1,3}/, Str::Escape
rule %r/\\[nrfbtv\\"']/, Str::Escape
end

state :string do
rule %r/"/, Str::Double, :pop!
mixin :escapes
rule %r/[^\\"]+/, Str::Double
end

state :charlist do
rule %r/(')(\])/ do
groups Str::Single, Punctuation
pop!
end
mixin :escapes
rule %r/[^\\']/, Str::Single
end

state :abc_basic do
rule %r/\s+/, Text::Whitespace
rule %r/\|.*/, Comment::Single
mixin :common_literals
end

# The ABC intermediate language can be included, similar to C's inline
# assembly. For some information about ABC, see:
# https://en.wikipedia.org/wiki/Clean_(programming_language)#The_ABC-Machine
state :abc do
mixin :abc_basic

rule %r/}/, Comment::Preproc, :pop!
rule %r/\.\w*/, Keyword, :abc_rest_of_line
rule %r/[\w_]+/, Name::Builtin, :abc_rest_of_line
end

state :abc_rest_of_line do
rule %r/\n/, Text::Whitespace, :pop!
rule %r/}/ do
token Comment::Preproc
pop!
pop!
end

mixin :abc_basic

rule %r/\S+/, Name
end
end
end
end
15 changes: 15 additions & 0 deletions spec/lexers/clean_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::Clean do
let(:subject) { Rouge::Lexers::Clean.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.dcl'
assert_guess :filename => 'foo.icl'
end
end
end
131 changes: 131 additions & 0 deletions spec/visual/samples/clean
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
implementation module Data.GenCompress

// Samples for testing were taken from Clean Platform ...
// https://gitlab.science.ru.nl/clean-and-itasks/clean-platform
// ... but cut down for brevity.

// -- The following are some additions to test features not included in the sample:

literals =
( 'a', '\x61', '\0141', '\n'
, 17.42E3 // lower-case e or no digits before . are invalid!
, 17.42
, 0x11, 052, 3, 0x3ff
, ['charlist sugar'], [ 'may start with spaces']
, "escapes: \x11 \X11 \052 \d17 \D42 \n \" \' ' \\ "
, ['in a charlist: \x11 \X11 \052 \d17 \D42 \n \" \' " \\ ']
, "unicode: ° →"
)

/* Clean comments can be nested: /* like so */
* Furthermore, // in multiline comments ignore closing */ on the remainder
* Hence this is still part of the comment, and it only closes here: */

/**
* By convention, documentation is in comments starting with two asterisks ...
*/

//* ... or starting with an asterisk in the case of singleline documentation.

// -- end additions

import StdGeneric, StdEnv
from Data.Maybe import :: Maybe(..)
import Data._Array, Data.Func

//--------------------------------------------------
// uncompressor monad

ret :: !.a !u:CompressSt -> (!Maybe .a,!u:CompressSt)
ret a st = (Just a, st)
(>>=) infixl 5
(>>=) pa pb = bind pa pb
where
bind pa pb st
#! (ma, st) = pa st
= case ma of
Nothing -> (Nothing, st)
Just x -> pb x st

//--------------------------------------------------

:: BitVector :== {#Int}
:: BitPos :== Int

:: CompressSt = { cs_pos :: !Int, cs_bits :: !.{#Int} }
mkCompressSt arr = { cs_pos = 0, cs_bits = arr}


:: Compress a :== a -> *CompressSt -> *CompressSt
:: Uncompress a :== .CompressSt -> .(.(Maybe a), .CompressSt)

compressBool :: !Bool !*CompressSt -> *CompressSt
compressBool bit {cs_pos = pos, cs_bits = bits}
#! s = size bits
#! int_pos = pos >> (IF_INT_64_OR_32 6 5)
#! bit_pos = pos bitand (IF_INT_64_OR_32 63 31)
| s == int_pos
= abort "reallocate"
#! int = bits.[int_pos]
#! bit_mask = 1 << bit_pos
#! new_int = if bit (int bitor bit_mask) (int bitand (bitnot bit_mask))
= {cs_pos = inc pos, cs_bits = {bits & [int_pos] = new_int}}

realToBinary32 :: !Real -> (!Int,!Int);
realToBinary32 _ = code {
.d 0 1 r
pop_b 0 | don't do anything
.o 0 2 ii
};
// Alternatively, with inline code:
realToBinary32 _ = code inline {
no_op
};

uncompressArray :: (u:CompressSt -> ((Maybe v:a),w:CompressSt)) -> .(x:CompressSt -> ((Maybe y:(b v:a)),z:CompressSt)) | Array b a, [x w <= u,y <= v,x w <= z]
uncompressArray f
= uncompressInt >>= \s -> uncompress_array 0 s (unsafeCreateArray s)
where
uncompress_array i s arr
| i == s
= ret arr
= f >>= \x -> uncompress_array (inc i) s {arr & [i] = x}

compressList :: (a *CompressSt -> *CompressSt) ![a] -> *CompressSt -> *CompressSt
compressList c xs = compressArray c (list_to_arr xs)
where
list_to_arr :: [b] -> {b} | Array {} b
list_to_arr xs = {x \\ x <- xs}

generic gCompress a :: !a -> *CompressSt -> *CompressSt
gCompress{|Int|} x = compressInt x
gCompress{|EITHER|} cl cr (LEFT x) = cl x o compressBool False
gCompress{|{}|} c xs = compressArray c xs
gCompress{|{!}|} c xs = compressArray c xs
gCompress{|[]|} c xs = compressList c xs


generic gCompressedSize a :: a -> Int
gCompressedSize{|Int|} _ = IF_INT_64_OR_32 64 32
gCompressedSize{|PAIR|} cx cy (PAIR x y) = cx x + cy y
gCompressedSize{|[]|} c xs = foldSt (\x st -> c x + st) xs (IF_INT_64_OR_32 64 32)
gCompressedSize{|{}|} c xs = foldSt (\x st -> c x + st) [x\\x<-:xs] (IF_INT_64_OR_32 64 32)
gCompressedSize{|{!}|} c xs = foldSt (\x st -> c x + st) [x\\x<-:xs] (IF_INT_64_OR_32 64 32)

generic gUncompress a :: (u:CompressSt -> ((Maybe a),u:CompressSt))
gUncompress{|PAIR|} fx fy = fx >>= \x -> fy >>= \y -> ret (PAIR x y)
gUncompress{|CONS|} f = f >>= ret o CONS
gUncompress{|FIELD|} f = f >>= \x -> ret $ FIELD x
gUncompress{|OBJECT|} f = f >>= \x -> ret $ OBJECT x

//-------------------------------------------------------------------------------------

uncompress :: (BitVector -> Maybe a) | gUncompress{|*|} a
uncompress = fst o gUncompress{|*|} o mkCompressSt

compress :: !a -> BitVector | gCompressedSize{|*|} a & gCompress{|*|} a
compress x
#! compressed_size = gCompressedSize{|*|} x
#! arr_size = (compressed_size + (IF_INT_64_OR_32 63 31)) >> (IF_INT_64_OR_32 6 5)
#! bits = createArray arr_size 0
= (gCompress{|*|} x (mkCompressSt bits)).cs_bits

0 comments on commit 6ad8346

Please sign in to comment.