diff --git a/lib/rouge/demos/ada b/lib/rouge/demos/ada new file mode 100644 index 0000000000..36182fbc4c --- /dev/null +++ b/lib/rouge/demos/ada @@ -0,0 +1,26 @@ +with Ada.Directories; +with Ada.Direct_IO; +with Ada.Text_IO; + +procedure Extra_IO.Read_File (Name : String) is + + package Dirs renames Ada.Directories; + package Text_IO renames Ada.Text_IO; + + -- Get the size of the file for a new string. + Size : Natural := Natural (Dirs.Size (Name)); + subtype File_String is String (1 .. Size); + + -- Instantiate Direct_IO for our file type. + package FIO is new Ada.Direct_IO (File_String); + + File : FIO.File_Type; + Contents : File_String; + +begin + FIO.Open (File, FIO.In_File, Name); + FIO.Read (File, Contents); + FIO.Close (File); + + Text_IO.Put (Contents); +end Extra_IO.Read_File; diff --git a/lib/rouge/lexers/ada.rb b/lib/rouge/lexers/ada.rb new file mode 100644 index 0000000000..0e47ec71f0 --- /dev/null +++ b/lib/rouge/lexers/ada.rb @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class Ada < RegexLexer + tag 'ada' + filenames '*.ada', '*.ads', '*.adb', '*.gpr' + mimetypes 'text/x-ada' + + title 'Ada' + desc 'The Ada 2012 programming language' + + # Ada identifiers are Unicode with underscores only allowed as separators. + ID = /\b[[:alpha:]](?:\p{Pc}?[[:alnum:]])*\b/ + + # Numerals can also contain underscores. + NUM = /\d(_?\d)*/ + XNUM = /\h(_?\h)*/ + EXP = /(E[-+]?#{NUM})?/i + + # Return a hash mapping lower-case identifiers to token classes. + def self.idents + @idents ||= Hash.new(Name).tap do |h| + %w( + abort abstract accept access aliased all array at begin body + case constant declare delay delta digits do else elsif end + exception exit for generic goto if in interface is limited + loop new null of others out overriding pragma private + protected raise range record renames requeue return reverse + select separate some synchronized tagged task terminate then + until use when while with + ).each {|w| h[w] = Keyword} + + %w(abs and mod not or rem xor).each {|w| h[w] = Operator::Word} + + %w( + entry function package procedure subtype type + ).each {|w| h[w] = Keyword::Declaration} + + %w( + boolean character constraint_error duration float integer + natural positive long_float long_integer long_long_float + long_long_integer program_error short_float short_integer + short_short_integer storage_error string tasking_error + wide_character wide_string wide_wide_character + wide_wide_string + ).each {|w| h[w] = Name::Builtin} + end + end + + state :whitespace do + rule %r{\s+}m, Text + rule %r{--.*$}, Comment::Single + end + + state :dquote_string do + rule %r{[^"\n]+}, Literal::String::Double + rule %r{""}, Literal::String::Escape + rule %r{"}, Literal::String::Double, :pop! + rule %r{\n}, Error, :pop! + end + + state :attr do + mixin :whitespace + rule ID, Name::Attribute, :pop! + rule %r{}, Text, :pop! + end + + # Handle a dotted name immediately following a declaration keyword. + state :decl_name do + mixin :whitespace + rule %r{body\b}i, Keyword::Declaration # package body Foo.Bar is... + rule %r{(#{ID})(\.)} do + groups Name::Namespace, Punctuation + end + # function "<=" (Left, Right: Type) is ... + rule %r{#{ID}|"(and|or|xor|/?=|<=?|>=?|\+|–|&\|/|mod|rem|\*?\*|abs|not)"}, + Name::Function, :pop! + rule %r{}, Text, :pop! + end + + # Handle a sequence of library unit names: with Ada.Foo, Ada.Bar; + # + # There's a chance we entered this state mistakenly since 'with' + # has multiple other uses in Ada (none of which are likely to + # appear at the beginning of a line). Try to bail as soon as + # possible if we see something suspicious like keywords. + # + # See ada_spec.rb for some examples. + state :libunit_name do + mixin :whitespace + + rule ID do |m| + t = self.class.idents[m[0].downcase] + if t <= Name + # Convert all kinds of Name to namespaces in this context. + token Name::Namespace + else + # Yikes, we're not supposed to get a keyword in a library unit name! + # We probably entered this state by mistake, so try to fix it. + token t + if t == Keyword::Declaration + goto :decl_name + else + pop! + end + end + end + + rule %r{[.,]}, Punctuation + rule %r{}, Text, :pop! + end + + state :root do + mixin :whitespace + + # String literals. + rule %r{'.'}, Literal::String::Char + rule %r{"[^"\n]*}, Literal::String::Double, :dquote_string + + # Real literals. + rule %r{#{NUM}\.#{NUM}#{EXP}}, Literal::Number::Float + rule %r{#{NUM}##{XNUM}\.#{XNUM}##{EXP}}, Literal::Number::Float + + # Integer literals. + rule %r{2#[01](_?[01])*##{EXP}}, Literal::Number::Bin + rule %r{8#[0-7](_?[0-7])*##{EXP}}, Literal::Number::Oct + rule %r{16##{XNUM}*##{EXP}}, Literal::Number::Hex + rule %r{#{NUM}##{XNUM}##{EXP}}, Literal::Number::Integer + rule %r{#{NUM}#\w+#}, Error + rule %r{#{NUM}#{EXP}}, Literal::Number::Integer + + # Special constructs. + rule %r{'}, Punctuation, :attr + rule %r{<<#{ID}>>}, Name::Label + + # Context clauses are tricky because the 'with' keyword is used + # for many purposes. Detect at beginning of the line only. + rule %r{^(?:(limited)(\s+))?(?:(private)(\s+))?(with)\b}i do + groups Keyword::Namespace, Text, Keyword::Namespace, Text, Keyword::Namespace + push :libunit_name + end + + # Operators and punctuation characters. + rule %r{[+*/&<=>|]|-|=>|\.\.|\*\*|[:>>|<>}, Operator + rule %r{[.,:;()]}, Punctuation + + rule ID do |m| + t = self.class.idents[m[0].downcase] + token t + if t == Keyword::Declaration + push :decl_name + end + end + + # Flag word-like things that don't match the ID pattern. + rule %r{\b(\p{Pc}|[[alpha]])\p{Word}*}, Error + end + end + end +end diff --git a/spec/lexers/ada_spec.rb b/spec/lexers/ada_spec.rb new file mode 100644 index 0000000000..e0ba1e8261 --- /dev/null +++ b/spec/lexers/ada_spec.rb @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::Ada do + let(:subject) { Rouge::Lexers::Ada.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.ada' + assert_guess :filename => 'foo.adb' + assert_guess :filename => 'foo.ads' + end + + it 'guesses by mimetype' do + assert_guess :mimetype => 'text/x-ada' + end + end + + describe 'lexing' do + include Support::Lexing + + it 'classifies identifiers' do + assert_tokens_equal 'constant Boolean := A and B', + ['Keyword', 'constant'], + ['Text', ' '], + ['Name.Builtin', 'Boolean'], + ['Text', ' '], + ['Operator', ':='], + ['Text', ' '], + ['Name', 'A'], + ['Text', ' '], + ['Operator.Word', 'and'], + ['Text', ' '], + ['Name', 'B'] + end + + it 'accepts Unicode identifiers' do + assert_tokens_equal '東京', ['Name', '東京'] + assert_tokens_equal '0東京', ['Error', '0'], ['Name', '東京'] + assert_tokens_equal '東京0', ['Name', '東京0'] + end + + it 'rejects identifiers with double or trailing underscores' do + assert_tokens_equal '_ab', ['Error', '_ab'] + assert_tokens_equal 'a__b', ['Error', 'a__b'] + assert_tokens_equal 'a_b', ['Name', 'a_b'] + assert_tokens_equal 'ab_', ['Error', 'ab_'] + end + + it 'understands other connecting punctuation' do + assert_tokens_equal 'a﹏b', ['Name', 'a﹏b'] + assert_tokens_equal '﹏ab', ['Error', '﹏ab'] + assert_tokens_equal 'a﹏﹏b', ['Error', 'a﹏﹏b'] + assert_tokens_equal 'ab﹏', ['Error', 'ab﹏'] + end + + it 'classifies based number literals' do + assert_tokens_equal '2#0001_1110#', ['Literal.Number.Bin', '2#0001_1110#'] + assert_tokens_equal '2#0001__1110#', ['Error', '2#0001__1110#'] + assert_tokens_equal '8#1234_0000#', ['Literal.Number.Oct', '8#1234_0000#'] + assert_tokens_equal '16#abc_BBB_12#', ['Literal.Number.Hex', '16#abc_BBB_12#'] + assert_tokens_equal '4#1230000#e+5', ['Literal.Number.Integer', '4#1230000#e+5'] + assert_tokens_equal '2#0001_1110#e3', ['Literal.Number.Bin', '2#0001_1110#e3'] + + assert_tokens_equal '16#abc_BBB.12#', ['Literal.Number.Float', '16#abc_BBB.12#'] + end + + it 'recognizes exponents in integers and reals' do + assert_tokens_equal '1e6', ['Literal.Number.Integer', '1e6'] + assert_tokens_equal '123_456', ['Literal.Number.Integer', '123_456'] + assert_tokens_equal '3.14159_26', ['Literal.Number.Float', '3.14159_26'] + assert_tokens_equal '3.141_592e-20', ['Literal.Number.Float', '3.141_592e-20'] + end + + it 'highlights escape sequences inside doubly quoted strings' do + assert_tokens_equal '"Archimedes said ""Εύρηκα"""', + ['Literal.String.Double', '"Archimedes said '], + ['Literal.String.Escape', '""'], + ['Literal.String.Double', 'Εύρηκα'], + ['Literal.String.Escape', '""'], + ['Literal.String.Double', '"'] + end + + it 'marks function names in declarations' do + assert_tokens_equal 'Entry Foo IS', + ['Keyword.Declaration', 'Entry'], + ['Text', ' '], + ['Name.Function', 'Foo'], + ['Text', ' '], + ['Keyword', 'IS'] + + assert_tokens_equal 'package body Ada.Foo IS', + ['Keyword.Declaration', 'package'], + ['Text', ' '], + ['Keyword.Declaration', 'body'], + ['Text', ' '], + ['Name.Namespace', 'Ada'], + ['Punctuation', '.'], + ['Name.Function', 'Foo'], + ['Text', ' '], + ['Keyword', 'IS'] + end + + it 'allows both names and builtin names in context clauses' do + assert_tokens_equal 'limited with Math.Integer;', + ['Keyword.Namespace', 'limited'], + ['Text', ' '], + ['Keyword.Namespace', 'with'], + ['Text', ' '], + ['Name.Namespace', 'Math'], + ['Punctuation', '.'], + ['Name.Namespace', 'Integer'], + ['Punctuation', ';'] + end + + it 'recovers quickly after mistakenly entering :libunit_name' do + # A `with` keyword at the beginning of a line is 99.9% sure to be + # a context clause, but there are things that could be mistaken if + # they are indented strangely: + # + # generic + # with function Random return Integer is <>; + # procedure Foo; + # + # If that `with` is not indented, we should recover immediately: + assert_tokens_equal 'with function Random', + ['Keyword.Namespace', 'with'], + ['Text', ' '], + ['Keyword.Declaration', 'function'], + ['Text', ' '], + ['Name.Function', 'Random'] + + # Another case that's even less likely to be at BOL: + # + # type Painted_Point is new Point with + # record + # Paint : Color := White; + # end record; + # + # type Addition is new Binary_Operation with null record; + assert_tokens_equal 'with record Paint', + ['Keyword.Namespace', 'with'], + ['Text', ' '], + ['Keyword', 'record'], + ['Text', ' '], + ['Name', 'Paint'] + assert_tokens_equal 'with null record;', + ['Keyword.Namespace', 'with'], + ['Text', ' '], + ['Keyword', 'null'], + ['Text', ' '], + ['Keyword', 'record'], + ['Punctuation', ';'] + + # Finally: raise Runtime_Error with "NO!" + assert_tokens_equal 'with "NO!"', + ['Keyword.Namespace', 'with'], + ['Text', ' '], + ['Literal.String.Double', '"NO!"'] + end + end +end diff --git a/spec/visual/samples/ada b/spec/visual/samples/ada new file mode 100644 index 0000000000..17587fb99d --- /dev/null +++ b/spec/visual/samples/ada @@ -0,0 +1,129 @@ +generic + + type Element_Type is private; + type Array_Type is array (Integer range <>) of Element_Type; + with function ">" (Left, Right : Element_Type) return Boolean is <>; + +procedure Algorithms.Best_Sort (A : in out Array_Type); + + +package Scanner is + + type Token_Type is + (Whitespace, New_Line, Ident, Equals, Left_Par, Right_Par, Pipe, Question, + Asterisk, Plus); + + -- Scanning for a token can fail, or it can return a token. + type Result (Ok : Boolean) is record + Last : Positive; + case Ok is + when True => + Token : Token_Type; + when False => + null; + end case; + end record; + + -- Identify the token at the beginning of Text. + function Scan_Token (Text : String) return Result with + Pre => Text'Length > 0; + +end Scanner; + + +package 動物園 is + type 動物 is (犬, 猫); + function いう (だれ : 動物) return Wide_Wide_String is + (case だれ is when 犬 => """ワン"", ""ワン""", when 猫 => """にゃん"""); +end 動物園; + + +procedure Algorithms.Best_Sort (A : in out Array_Type) is +begin + + if A'Length <= 1 then + return; + end if; + + <> + for I in A'First .. A'Last - 1 loop + if A (I) > A (I + 1) then + Exchange (A (I), A (I + 1)); + goto Try_Again; + end if; + end loop; + +end Algorithms.Best_Sort; + + +with Ada.Characters.Latin_1; +package body Scanner is + + package Lat1 renames Ada.Characters.Latin_1; + + function Scan_Token (Text : String) return Result is + + Pos : Positive := Text'First; + + -- Shortcut functions for returning a token or an error at the + -- current position. + function Ok (Token : Token_Type) return Result is (True, Pos, Token); + function Error return Result is (False, Pos); + + begin + case Text (Pos) is + when Lat1.LF => + return Ok (New_Line); + when '=' => + return Ok (Equals); + when '(' => + return Ok (Left_Par); + when ')' => + return Ok (Right_Par); + when '|' => + return Ok (Pipe); + when '?' => + return Ok (Question); + when '*' => + return Ok (Asterisk); + when '+' => + return Ok (Plus); + + when ' ' | Lat1.HT | Lat1.CR => + while Pos < Text'Last and Text (Pos + 1) in ' ' | Lat1.HT | Lat1.CR + loop + Pos := Pos + 1; + end loop; + return Ok (Whitespace); + + when 'A' .. 'Z' | 'a' .. 'z' => + while Pos < Text'Last and + Text (Pos + 1) in 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' + loop + Pos := Pos + 1; + end loop; + return Ok (Ident); + + when others => + return Error; + end case; + end Scan_Token; +end Scanner; + + +Package MACHINE_CODE Is + Type REGISTER Is Range 0 .. 16#F#; + Type DISPLACEMENT Is Range 0 .. 16#FFF#; + + Type SI Is Record + CODE : OPCODE; + B : REGISTER; + D : DISPLACEMENT; + End Record; + + for SI Use Record + CODE at 0 Range 0 .. 7; + B at 0 Range 16 .. 19; -- Bits 8 .. 15 Unused + D at 0 Range 20 .. 31; + End Record; +End MACHINE_CODE;