-
Notifications
You must be signed in to change notification settings - Fork 730
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit adds a lexer for the Clean language.
- Loading branch information
1 parent
8533fea
commit 6ad8346
Showing
4 changed files
with
308 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
delete :: !a !.(Set a) -> Set a | < a | ||
delete x Tip = Tip | ||
delete x (Bin _ y l r) | ||
| x < y = balanceR y (delete x l) r | ||
| x > y = balanceL y l (delete x r) | ||
| otherwise = glue l r |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
# -*- coding: utf-8 -*- # | ||
# frozen_string_literal: true | ||
|
||
module Rouge | ||
module Lexers | ||
class Clean < RegexLexer | ||
title "Clean" | ||
desc "The Clean programming language (clean.cs.ru.nl)" | ||
|
||
tag 'clean' | ||
filenames '*.dcl', '*.icl' | ||
|
||
def self.keywords | ||
@keywords ||= Set.new %w( | ||
if otherwise | ||
let in | ||
with where | ||
case of | ||
infix infixl infixr | ||
class instance | ||
generic derive | ||
special | ||
implementation definition system module | ||
from import qualified as | ||
dynamic | ||
code inline foreign export ccall stdcall | ||
) | ||
end | ||
|
||
# These are literal patterns common to the ABC intermediate language and | ||
# Clean. Clean has more extensive literal patterns (see :basic below). | ||
state :common_literals do | ||
rule %r/'(?:[^'\\]|\\(?:x[0-9a-fA-F]+|\d+|.))'/, Str::Char | ||
|
||
rule %r/[+~-]?\d+\.\d+(?:E[+-]?\d+)?\b/, Num::Float | ||
rule %r/[+~-]?\d+E[+-]?\d+\b/, Num::Float | ||
rule %r/[+~-]?\d+/, Num::Integer | ||
|
||
rule %r/"/, Str::Double, :string | ||
end | ||
|
||
state :basic do | ||
rule %r/\s+/m, Text::Whitespace | ||
|
||
rule %r/\/\/\*.*/, Comment::Doc | ||
rule %r/\/\/.*/, Comment::Single | ||
rule %r/\/\*\*/, Comment::Doc, :comment_doc | ||
rule %r/\/\*/, Comment::Multiline, :comment | ||
|
||
rule %r/[+~-]?0[0-7]+/, Num::Oct | ||
rule %r/[+~-]?0x[0-9a-fA-F]+/, Num::Hex | ||
mixin :common_literals | ||
rule %r/(\[)(\s*)(')(?=.*?'\])/ do | ||
groups Punctuation, Text::Whitespace, Str::Single, Punctuation | ||
push :charlist | ||
end | ||
end | ||
|
||
# nested commenting | ||
state :comment_doc do | ||
rule %r/\*\//, Comment::Doc, :pop! | ||
rule %r/\/\/.*/, Comment::Doc # Singleline comments in multiline comments are skipped | ||
rule %r/\/\*/, Comment::Doc, :comment | ||
rule %r/[^*\/]+/, Comment::Doc | ||
rule %r/[*\/]/, Comment::Doc | ||
end | ||
|
||
# This is the same as the above, but with Multiline instead of Doc | ||
state :comment do | ||
rule %r/\*\//, Comment::Multiline, :pop! | ||
rule %r/\/\/.*/, Comment::Multiline # Singleline comments in multiline comments are skipped | ||
rule %r/\/\*/, Comment::Multiline, :comment | ||
rule %r/[^*\/]+/, Comment::Multiline | ||
rule %r/[*\/]/, Comment::Multiline | ||
end | ||
|
||
state :root do | ||
mixin :basic | ||
|
||
rule %r/code(\s+inline)?\s*{/, Comment::Preproc, :abc | ||
|
||
rule %r/_*[a-z][\w_`]*/ do |m| | ||
if self.class.keywords.include?(m[0]) | ||
token Keyword | ||
else | ||
token Name | ||
end | ||
end | ||
|
||
rule %r/_*[A-Z][\w_`]*/ do |m| | ||
if m[0]=='True' || m[0]=='False' | ||
token Keyword::Constant | ||
else | ||
token Keyword::Type | ||
end | ||
end | ||
|
||
rule %r/[^\w_\s`]/, Punctuation | ||
rule %r/_\b/, Punctuation | ||
end | ||
|
||
state :escapes do | ||
rule %r/\\x[0-9a-fA-F]{1,2}/i, Str::Escape | ||
rule %r/\\d\d{0,3}/i, Str::Escape | ||
rule %r/\\0[0-7]{0,3}/, Str::Escape | ||
rule %r/\\[0-7]{1,3}/, Str::Escape | ||
rule %r/\\[nrfbtv\\"']/, Str::Escape | ||
end | ||
|
||
state :string do | ||
rule %r/"/, Str::Double, :pop! | ||
mixin :escapes | ||
rule %r/[^\\"]+/, Str::Double | ||
end | ||
|
||
state :charlist do | ||
rule %r/(')(\])/ do | ||
groups Str::Single, Punctuation | ||
pop! | ||
end | ||
mixin :escapes | ||
rule %r/[^\\']/, Str::Single | ||
end | ||
|
||
state :abc_basic do | ||
rule %r/\s+/, Text::Whitespace | ||
rule %r/\|.*/, Comment::Single | ||
mixin :common_literals | ||
end | ||
|
||
# The ABC intermediate language can be included, similar to C's inline | ||
# assembly. For some information about ABC, see: | ||
# https://en.wikipedia.org/wiki/Clean_(programming_language)#The_ABC-Machine | ||
state :abc do | ||
mixin :abc_basic | ||
|
||
rule %r/}/, Comment::Preproc, :pop! | ||
rule %r/\.\w*/, Keyword, :abc_rest_of_line | ||
rule %r/[\w_]+/, Name::Builtin, :abc_rest_of_line | ||
end | ||
|
||
state :abc_rest_of_line do | ||
rule %r/\n/, Text::Whitespace, :pop! | ||
rule %r/}/ do | ||
token Comment::Preproc | ||
pop! | ||
pop! | ||
end | ||
|
||
mixin :abc_basic | ||
|
||
rule %r/\S+/, Name | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# -*- coding: utf-8 -*- # | ||
# frozen_string_literal: true | ||
|
||
describe Rouge::Lexers::Clean do | ||
let(:subject) { Rouge::Lexers::Clean.new } | ||
|
||
describe 'guessing' do | ||
include Support::Guessing | ||
|
||
it 'guesses by filename' do | ||
assert_guess :filename => 'foo.dcl' | ||
assert_guess :filename => 'foo.icl' | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
implementation module Data.GenCompress | ||
|
||
// Samples for testing were taken from Clean Platform ... | ||
// https://gitlab.science.ru.nl/clean-and-itasks/clean-platform | ||
// ... but cut down for brevity. | ||
|
||
// -- The following are some additions to test features not included in the sample: | ||
|
||
literals = | ||
( 'a', '\x61', '\0141', '\n' | ||
, 17.42E3 // lower-case e or no digits before . are invalid! | ||
, 17.42 | ||
, 0x11, 052, 3, 0x3ff | ||
, ['charlist sugar'], [ 'may start with spaces'] | ||
, "escapes: \x11 \X11 \052 \d17 \D42 \n \" \' ' \\ " | ||
, ['in a charlist: \x11 \X11 \052 \d17 \D42 \n \" \' " \\ '] | ||
, "unicode: ° →" | ||
) | ||
|
||
/* Clean comments can be nested: /* like so */ | ||
* Furthermore, // in multiline comments ignore closing */ on the remainder | ||
* Hence this is still part of the comment, and it only closes here: */ | ||
|
||
/** | ||
* By convention, documentation is in comments starting with two asterisks ... | ||
*/ | ||
|
||
//* ... or starting with an asterisk in the case of singleline documentation. | ||
|
||
// -- end additions | ||
|
||
import StdGeneric, StdEnv | ||
from Data.Maybe import :: Maybe(..) | ||
import Data._Array, Data.Func | ||
|
||
//-------------------------------------------------- | ||
// uncompressor monad | ||
|
||
ret :: !.a !u:CompressSt -> (!Maybe .a,!u:CompressSt) | ||
ret a st = (Just a, st) | ||
(>>=) infixl 5 | ||
(>>=) pa pb = bind pa pb | ||
where | ||
bind pa pb st | ||
#! (ma, st) = pa st | ||
= case ma of | ||
Nothing -> (Nothing, st) | ||
Just x -> pb x st | ||
|
||
//-------------------------------------------------- | ||
|
||
:: BitVector :== {#Int} | ||
:: BitPos :== Int | ||
|
||
:: CompressSt = { cs_pos :: !Int, cs_bits :: !.{#Int} } | ||
mkCompressSt arr = { cs_pos = 0, cs_bits = arr} | ||
|
||
|
||
:: Compress a :== a -> *CompressSt -> *CompressSt | ||
:: Uncompress a :== .CompressSt -> .(.(Maybe a), .CompressSt) | ||
|
||
compressBool :: !Bool !*CompressSt -> *CompressSt | ||
compressBool bit {cs_pos = pos, cs_bits = bits} | ||
#! s = size bits | ||
#! int_pos = pos >> (IF_INT_64_OR_32 6 5) | ||
#! bit_pos = pos bitand (IF_INT_64_OR_32 63 31) | ||
| s == int_pos | ||
= abort "reallocate" | ||
#! int = bits.[int_pos] | ||
#! bit_mask = 1 << bit_pos | ||
#! new_int = if bit (int bitor bit_mask) (int bitand (bitnot bit_mask)) | ||
= {cs_pos = inc pos, cs_bits = {bits & [int_pos] = new_int}} | ||
|
||
realToBinary32 :: !Real -> (!Int,!Int); | ||
realToBinary32 _ = code { | ||
.d 0 1 r | ||
pop_b 0 | don't do anything | ||
.o 0 2 ii | ||
}; | ||
// Alternatively, with inline code: | ||
realToBinary32 _ = code inline { | ||
no_op | ||
}; | ||
|
||
uncompressArray :: (u:CompressSt -> ((Maybe v:a),w:CompressSt)) -> .(x:CompressSt -> ((Maybe y:(b v:a)),z:CompressSt)) | Array b a, [x w <= u,y <= v,x w <= z] | ||
uncompressArray f | ||
= uncompressInt >>= \s -> uncompress_array 0 s (unsafeCreateArray s) | ||
where | ||
uncompress_array i s arr | ||
| i == s | ||
= ret arr | ||
= f >>= \x -> uncompress_array (inc i) s {arr & [i] = x} | ||
|
||
compressList :: (a *CompressSt -> *CompressSt) ![a] -> *CompressSt -> *CompressSt | ||
compressList c xs = compressArray c (list_to_arr xs) | ||
where | ||
list_to_arr :: [b] -> {b} | Array {} b | ||
list_to_arr xs = {x \\ x <- xs} | ||
|
||
generic gCompress a :: !a -> *CompressSt -> *CompressSt | ||
gCompress{|Int|} x = compressInt x | ||
gCompress{|EITHER|} cl cr (LEFT x) = cl x o compressBool False | ||
gCompress{|{}|} c xs = compressArray c xs | ||
gCompress{|{!}|} c xs = compressArray c xs | ||
gCompress{|[]|} c xs = compressList c xs | ||
|
||
|
||
generic gCompressedSize a :: a -> Int | ||
gCompressedSize{|Int|} _ = IF_INT_64_OR_32 64 32 | ||
gCompressedSize{|PAIR|} cx cy (PAIR x y) = cx x + cy y | ||
gCompressedSize{|[]|} c xs = foldSt (\x st -> c x + st) xs (IF_INT_64_OR_32 64 32) | ||
gCompressedSize{|{}|} c xs = foldSt (\x st -> c x + st) [x\\x<-:xs] (IF_INT_64_OR_32 64 32) | ||
gCompressedSize{|{!}|} c xs = foldSt (\x st -> c x + st) [x\\x<-:xs] (IF_INT_64_OR_32 64 32) | ||
|
||
generic gUncompress a :: (u:CompressSt -> ((Maybe a),u:CompressSt)) | ||
gUncompress{|PAIR|} fx fy = fx >>= \x -> fy >>= \y -> ret (PAIR x y) | ||
gUncompress{|CONS|} f = f >>= ret o CONS | ||
gUncompress{|FIELD|} f = f >>= \x -> ret $ FIELD x | ||
gUncompress{|OBJECT|} f = f >>= \x -> ret $ OBJECT x | ||
|
||
//------------------------------------------------------------------------------------- | ||
|
||
uncompress :: (BitVector -> Maybe a) | gUncompress{|*|} a | ||
uncompress = fst o gUncompress{|*|} o mkCompressSt | ||
|
||
compress :: !a -> BitVector | gCompressedSize{|*|} a & gCompress{|*|} a | ||
compress x | ||
#! compressed_size = gCompressedSize{|*|} x | ||
#! arr_size = (compressed_size + (IF_INT_64_OR_32 63 31)) >> (IF_INT_64_OR_32 6 5) | ||
#! bits = createArray arr_size 0 | ||
= (gCompress{|*|} x (mkCompressSt bits)).cs_bits |