diff --git a/lib/rouge/demos/clean b/lib/rouge/demos/clean new file mode 100644 index 0000000000..9f947f6388 --- /dev/null +++ b/lib/rouge/demos/clean @@ -0,0 +1,6 @@ +delete :: !a !.(Set a) -> Set a | < a +delete x Tip = Tip +delete x (Bin _ y l r) + | x < y = balanceR y (delete x l) r + | x > y = balanceL y l (delete x r) + | otherwise = glue l r diff --git a/lib/rouge/lexers/clean.rb b/lib/rouge/lexers/clean.rb new file mode 100644 index 0000000000..894c5289ec --- /dev/null +++ b/lib/rouge/lexers/clean.rb @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class Clean < RegexLexer + title "Clean" + desc "The Clean programming language (clean.cs.ru.nl)" + + tag 'clean' + filenames '*.dcl', '*.icl' + + def self.keywords + @keywords ||= Set.new %w( + if otherwise + let in + with where + case of + infix infixl infixr + class instance + generic derive + special + implementation definition system module + from import qualified as + dynamic + code inline foreign export ccall stdcall + ) + end + + # These are literal patterns common to the ABC intermediate language and + # Clean. Clean has more extensive literal patterns (see :basic below). + state :common_literals do + rule %r/'(?:[^'\\]|\\(?:x[0-9a-fA-F]+|\d+|.))'/, Str::Char + + rule %r/[+~-]?\d+\.\d+(?:E[+-]?\d+)?\b/, Num::Float + rule %r/[+~-]?\d+E[+-]?\d+\b/, Num::Float + rule %r/[+~-]?\d+/, Num::Integer + + rule %r/"/, Str::Double, :string + end + + state :basic do + rule %r/\s+/m, Text::Whitespace + + rule %r/\/\/\*.*/, Comment::Doc + rule %r/\/\/.*/, Comment::Single + rule %r/\/\*\*/, Comment::Doc, :comment_doc + rule %r/\/\*/, Comment::Multiline, :comment + + rule %r/[+~-]?0[0-7]+/, Num::Oct + rule %r/[+~-]?0x[0-9a-fA-F]+/, Num::Hex + mixin :common_literals + rule %r/(\[)(\s*)(')(?=.*?'\])/ do + groups Punctuation, Text::Whitespace, Str::Single, Punctuation + push :charlist + end + end + + # nested commenting + state :comment_doc do + rule %r/\*\//, Comment::Doc, :pop! + rule %r/\/\/.*/, Comment::Doc # Singleline comments in multiline comments are skipped + rule %r/\/\*/, Comment::Doc, :comment + rule %r/[^*\/]+/, Comment::Doc + rule %r/[*\/]/, Comment::Doc + end + + # This is the same as the above, but with Multiline instead of Doc + state :comment do + rule %r/\*\//, Comment::Multiline, :pop! + rule %r/\/\/.*/, Comment::Multiline # Singleline comments in multiline comments are skipped + rule %r/\/\*/, Comment::Multiline, :comment + rule %r/[^*\/]+/, Comment::Multiline + rule %r/[*\/]/, Comment::Multiline + end + + state :root do + mixin :basic + + rule %r/code(\s+inline)?\s*{/, Comment::Preproc, :abc + + rule %r/_*[a-z][\w_`]*/ do |m| + if self.class.keywords.include?(m[0]) + token Keyword + else + token Name + end + end + + rule %r/_*[A-Z][\w_`]*/ do |m| + if m[0]=='True' || m[0]=='False' + token Keyword::Constant + else + token Keyword::Type + end + end + + rule %r/[^\w_\s`]/, Punctuation + rule %r/_\b/, Punctuation + end + + state :escapes do + rule %r/\\x[0-9a-fA-F]{1,2}/i, Str::Escape + rule %r/\\d\d{0,3}/i, Str::Escape + rule %r/\\0[0-7]{0,3}/, Str::Escape + rule %r/\\[0-7]{1,3}/, Str::Escape + rule %r/\\[nrfbtv\\"']/, Str::Escape + end + + state :string do + rule %r/"/, Str::Double, :pop! + mixin :escapes + rule %r/[^\\"]+/, Str::Double + end + + state :charlist do + rule %r/(')(\])/ do + groups Str::Single, Punctuation + pop! + end + mixin :escapes + rule %r/[^\\']/, Str::Single + end + + state :abc_basic do + rule %r/\s+/, Text::Whitespace + rule %r/\|.*/, Comment::Single + mixin :common_literals + end + + # The ABC intermediate language can be included, similar to C's inline + # assembly. For some information about ABC, see: + # https://en.wikipedia.org/wiki/Clean_(programming_language)#The_ABC-Machine + state :abc do + mixin :abc_basic + + rule %r/}/, Comment::Preproc, :pop! + rule %r/\.\w*/, Keyword, :abc_rest_of_line + rule %r/[\w_]+/, Name::Builtin, :abc_rest_of_line + end + + state :abc_rest_of_line do + rule %r/\n/, Text::Whitespace, :pop! + rule %r/}/ do + token Comment::Preproc + pop! + pop! + end + + mixin :abc_basic + + rule %r/\S+/, Name + end + end + end +end diff --git a/spec/lexers/clean_spec.rb b/spec/lexers/clean_spec.rb new file mode 100644 index 0000000000..6dc80f0344 --- /dev/null +++ b/spec/lexers/clean_spec.rb @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::Clean do + let(:subject) { Rouge::Lexers::Clean.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.dcl' + assert_guess :filename => 'foo.icl' + end + end +end diff --git a/spec/visual/samples/clean b/spec/visual/samples/clean new file mode 100644 index 0000000000..3a5fae037e --- /dev/null +++ b/spec/visual/samples/clean @@ -0,0 +1,131 @@ +implementation module Data.GenCompress + +// Samples for testing were taken from Clean Platform ... +// https://gitlab.science.ru.nl/clean-and-itasks/clean-platform +// ... but cut down for brevity. + +// -- The following are some additions to test features not included in the sample: + +literals = + ( 'a', '\x61', '\0141', '\n' + , 17.42E3 // lower-case e or no digits before . are invalid! + , 17.42 + , 0x11, 052, 3, 0x3ff + , ['charlist sugar'], [ 'may start with spaces'] + , "escapes: \x11 \X11 \052 \d17 \D42 \n \" \' ' \\ " + , ['in a charlist: \x11 \X11 \052 \d17 \D42 \n \" \' " \\ '] + , "unicode: ° →" + ) + +/* Clean comments can be nested: /* like so */ + * Furthermore, // in multiline comments ignore closing */ on the remainder + * Hence this is still part of the comment, and it only closes here: */ + +/** + * By convention, documentation is in comments starting with two asterisks ... + */ + +//* ... or starting with an asterisk in the case of singleline documentation. + +// -- end additions + +import StdGeneric, StdEnv +from Data.Maybe import :: Maybe(..) +import Data._Array, Data.Func + +//-------------------------------------------------- +// uncompressor monad + +ret :: !.a !u:CompressSt -> (!Maybe .a,!u:CompressSt) +ret a st = (Just a, st) +(>>=) infixl 5 +(>>=) pa pb = bind pa pb +where + bind pa pb st + #! (ma, st) = pa st + = case ma of + Nothing -> (Nothing, st) + Just x -> pb x st + +//-------------------------------------------------- + +:: BitVector :== {#Int} +:: BitPos :== Int + +:: CompressSt = { cs_pos :: !Int, cs_bits :: !.{#Int} } +mkCompressSt arr = { cs_pos = 0, cs_bits = arr} + + +:: Compress a :== a -> *CompressSt -> *CompressSt +:: Uncompress a :== .CompressSt -> .(.(Maybe a), .CompressSt) + +compressBool :: !Bool !*CompressSt -> *CompressSt +compressBool bit {cs_pos = pos, cs_bits = bits} + #! s = size bits + #! int_pos = pos >> (IF_INT_64_OR_32 6 5) + #! bit_pos = pos bitand (IF_INT_64_OR_32 63 31) + | s == int_pos + = abort "reallocate" + #! int = bits.[int_pos] + #! bit_mask = 1 << bit_pos + #! new_int = if bit (int bitor bit_mask) (int bitand (bitnot bit_mask)) + = {cs_pos = inc pos, cs_bits = {bits & [int_pos] = new_int}} + +realToBinary32 :: !Real -> (!Int,!Int); +realToBinary32 _ = code { + .d 0 1 r + pop_b 0 | don't do anything + .o 0 2 ii +}; +// Alternatively, with inline code: +realToBinary32 _ = code inline { + no_op +}; + +uncompressArray :: (u:CompressSt -> ((Maybe v:a),w:CompressSt)) -> .(x:CompressSt -> ((Maybe y:(b v:a)),z:CompressSt)) | Array b a, [x w <= u,y <= v,x w <= z] +uncompressArray f + = uncompressInt >>= \s -> uncompress_array 0 s (unsafeCreateArray s) +where + uncompress_array i s arr + | i == s + = ret arr + = f >>= \x -> uncompress_array (inc i) s {arr & [i] = x} + +compressList :: (a *CompressSt -> *CompressSt) ![a] -> *CompressSt -> *CompressSt +compressList c xs = compressArray c (list_to_arr xs) +where + list_to_arr :: [b] -> {b} | Array {} b + list_to_arr xs = {x \\ x <- xs} + +generic gCompress a :: !a -> *CompressSt -> *CompressSt +gCompress{|Int|} x = compressInt x +gCompress{|EITHER|} cl cr (LEFT x) = cl x o compressBool False +gCompress{|{}|} c xs = compressArray c xs +gCompress{|{!}|} c xs = compressArray c xs +gCompress{|[]|} c xs = compressList c xs + + +generic gCompressedSize a :: a -> Int +gCompressedSize{|Int|} _ = IF_INT_64_OR_32 64 32 +gCompressedSize{|PAIR|} cx cy (PAIR x y) = cx x + cy y +gCompressedSize{|[]|} c xs = foldSt (\x st -> c x + st) xs (IF_INT_64_OR_32 64 32) +gCompressedSize{|{}|} c xs = foldSt (\x st -> c x + st) [x\\x<-:xs] (IF_INT_64_OR_32 64 32) +gCompressedSize{|{!}|} c xs = foldSt (\x st -> c x + st) [x\\x<-:xs] (IF_INT_64_OR_32 64 32) + +generic gUncompress a :: (u:CompressSt -> ((Maybe a),u:CompressSt)) +gUncompress{|PAIR|} fx fy = fx >>= \x -> fy >>= \y -> ret (PAIR x y) +gUncompress{|CONS|} f = f >>= ret o CONS +gUncompress{|FIELD|} f = f >>= \x -> ret $ FIELD x +gUncompress{|OBJECT|} f = f >>= \x -> ret $ OBJECT x + +//------------------------------------------------------------------------------------- + +uncompress :: (BitVector -> Maybe a) | gUncompress{|*|} a +uncompress = fst o gUncompress{|*|} o mkCompressSt + +compress :: !a -> BitVector | gCompressedSize{|*|} a & gCompress{|*|} a +compress x + #! compressed_size = gCompressedSize{|*|} x + #! arr_size = (compressed_size + (IF_INT_64_OR_32 63 31)) >> (IF_INT_64_OR_32 6 5) + #! bits = createArray arr_size 0 + = (gCompress{|*|} x (mkCompressSt bits)).cs_bits