Merge pull request github-linguist#364 from zacstewart/ragel-ruby

Add Ragel Ruby to languages
qqshfox · Feb 21, 2013 · c9bd609 · c9bd609
2 parents 7d50697 + 5e4623a
commit c9bd609
Show file tree

Hide file tree

Showing 4 changed files with 244 additions and 0 deletions.
diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml
@@ -997,6 +997,12 @@ Racket:
   - .rktd
   - .rktl
 
+Ragel in Ruby Host:
+  type: programming
+  lexer: Ragel in Ruby Host
+  color: "#ff9c2e"
+  primary_extension: .rl
+
 Raw token data:
   search_term: raw
   aliases:

diff --git a/samples/Ragel in Ruby Host/ephemeris_parser.rl b/samples/Ragel in Ruby Host/ephemeris_parser.rl
@@ -0,0 +1,96 @@
+=begin
+%%{
+
+  machine ephemeris_parser;
+
+  action mark { mark = p }
+
+  action parse_start_time {
+    parser.start_time = data[mark..p].pack('c*')
+  }
+
+  action parse_stop_time {
+    parser.stop_time = data[mark..p].pack('c*')
+  }
+
+  action parse_step_size {
+    parser.step_size = data[mark..p].pack('c*')
+  }
+
+  action parse_ephemeris_table {
+    fhold;
+    parser.ephemeris_table = data[mark..p].pack('c*')
+  }
+
+  ws = [ \t\r\n];
+
+  adbc = ('A.D.'|'B.C.');
+  year = digit{4};
+  month = upper lower{2};
+  date = digit{2};
+  hours =  digit{2};
+  minutes = digit{2};
+  seconds = digit{2} '.' digit{4};
+  tz = 'UT';
+  datetime = adbc ' ' year '-' month '-' date ' ' hours ':' minutes ':' seconds ' ' tz;
+
+  time_unit = ('minute' [s]? | 'calendar year' [s]?);
+
+  soe = '$$SOE' '\n';
+  eoe = '$$EOE' '\n';
+  ephemeris_table = (alnum | ws | [*-./:])*;
+
+  start_time = 'Start time' ' '* ':' ' ' datetime >mark %parse_start_time space* '\n';
+  stop_time  = 'Stop  time' ' '* ':' ' ' datetime >mark %parse_stop_time space* '\n';
+  step_size  = 'Step-size' ' '* ':' ' ' (digit+ ' '* time_unit) >mark $parse_step_size '\n';
+
+  ephemeris = soe ephemeris_table >mark %parse_ephemeris_table eoe;
+
+  main := (
+    any*
+    start_time
+    stop_time
+    step_size
+    any*
+    ephemeris
+    any*
+  );
+
+}%%
+=end
+
+require 'date'
+
+module Tengai
+  EPHEMERIS_DATA = Struct.new(:start_time, :stop_time, :step_size, :ephemeris_table).freeze
+
+  class EphemerisParser < EPHEMERIS_DATA
+    def self.parse(data)
+      parser = new
+      data = data.unpack('c*') if data.is_a? String
+      eof = data.length
+
+      %% write init;
+      %% write exec;
+
+      parser
+    end
+
+    def start_time=(time)
+      super parse_time(time)
+    end
+
+    def stop_time=(time)
+      super parse_time(time)
+    end
+
+    %% write data;
+
+    # % fix syntax highlighting
+
+    private
+    def parse_time(time)
+      DateTime.parse(time)
+    end
+  end
+end
diff --git a/samples/Ragel in Ruby Host/simple_scanner.rl b/samples/Ragel in Ruby Host/simple_scanner.rl
@@ -0,0 +1,69 @@
+=begin
+%%{
+  machine simple_scanner;
+
+  action Emit {
+    emit data[(ts+8)..(te-7)].pack('c*')
+  }
+
+  foo = 'STARTFOO' any+ :>> 'ENDFOO';
+
+  main := |*
+    foo => Emit;
+    any;
+  *|;
+}%%
+=end
+
+
+# Scans a file for "STARTFOO[...]ENDFOO" blocks and outputs their contents.
+#
+# ENV['CHUNK_SIZE'] determines how much of the file to read in at a time, allowing you to control memory usage.
+#
+# Uses ragel's scanner functionality even though it's not strictly necessary.
+class SimpleScanner
+  attr_reader :path
+
+  def initialize(path)
+    @path = path
+    %% write data;
+    # % (this fixes syntax highlighting)
+  end
+
+  def emit(foo)
+    $stdout.puts foo
+  end
+
+  def perform
+    # So that ragel doesn't try to get it from data.length
+    pe = :ignored
+    eof = :ignored
+
+    %% write init;
+    # % (this fixes syntax highlighting)
+
+    leftover = []
+
+    File.open(path) do |f|
+      while chunk = f.read(ENV['CHUNK_SIZE'].to_i)
+        data = leftover + chunk.unpack('c*')
+        p ||= 0
+        pe = data.length
+
+        %% write exec;
+        # % (this fixes syntax highlighting)
+        if ts
+          leftover = data[ts..pe]
+          p = p - ts
+          ts = 0
+        else
+          leftover = []
+          p = 0
+        end
+      end
+    end
+  end
+end
+
+s = SimpleScanner.new ARGV[0]
+s.perform
diff --git a/samples/Ragel in Ruby Host/simple_tokenizer.rl b/samples/Ragel in Ruby Host/simple_tokenizer.rl
@@ -0,0 +1,73 @@
+=begin
+%%{
+  machine simple_tokenizer;
+
+  action MyTs {
+    my_ts = p
+  }
+  action MyTe {
+    my_te = p
+  }
+  action Emit {
+    emit data[my_ts...my_te].pack('c*')
+    my_ts = nil
+    my_te = nil    
+  }
+
+  foo = 'STARTFOO' any+ >MyTs :>> 'ENDFOO' >MyTe %Emit;
+  main := ( foo | any+ )*;
+
+}%%
+=end
+
+# Scans a file for "STARTFOO[...]ENDFOO" blocks and outputs their contents.
+#
+# ENV['CHUNK_SIZE'] determines how much of the file to read in at a time, allowing you to control memory usage.
+#
+# Does not use ragel's scanner functionality because no backtracking is needed.
+class SimpleTokenizer
+  attr_reader :path
+
+  def initialize(path)
+    @path = path
+    %% write data;
+    # % (this fixes syntax highlighting)
+  end
+
+  def emit(foo)
+    $stdout.puts foo
+  end
+
+  def perform
+    # So that ragel doesn't try to get it from data.length
+    pe = :ignored
+    eof = :ignored
+
+    %% write init;
+    # % (this fixes syntax highlighting)
+
+    leftover = []
+    my_ts = nil
+    my_te = nil
+
+    File.open(path) do |f|
+      while chunk = f.read(ENV['CHUNK_SIZE'].to_i)
+        data = leftover + chunk.unpack('c*')
+        p = 0
+        pe = data.length
+        %% write exec;
+        # % (this fixes syntax highlighting)
+        if my_ts
+          leftover = data[my_ts..-1]
+          my_te = my_te - my_ts if my_te
+          my_ts = 0
+        else
+          leftover = []
+        end
+      end
+    end
+  end
+end
+
+s = SimpleTokenizer.new ARGV[0]
+s.perform