forked from github-linguist/linguist
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request github-linguist#364 from zacstewart/ragel-ruby
Add Ragel Ruby to languages
- Loading branch information
Showing
4 changed files
with
244 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
=begin | ||
%%{ | ||
|
||
machine ephemeris_parser; | ||
|
||
action mark { mark = p } | ||
|
||
action parse_start_time { | ||
parser.start_time = data[mark..p].pack('c*') | ||
} | ||
|
||
action parse_stop_time { | ||
parser.stop_time = data[mark..p].pack('c*') | ||
} | ||
|
||
action parse_step_size { | ||
parser.step_size = data[mark..p].pack('c*') | ||
} | ||
|
||
action parse_ephemeris_table { | ||
fhold; | ||
parser.ephemeris_table = data[mark..p].pack('c*') | ||
} | ||
|
||
ws = [ \t\r\n]; | ||
|
||
adbc = ('A.D.'|'B.C.'); | ||
year = digit{4}; | ||
month = upper lower{2}; | ||
date = digit{2}; | ||
hours = digit{2}; | ||
minutes = digit{2}; | ||
seconds = digit{2} '.' digit{4}; | ||
tz = 'UT'; | ||
datetime = adbc ' ' year '-' month '-' date ' ' hours ':' minutes ':' seconds ' ' tz; | ||
|
||
time_unit = ('minute' [s]? | 'calendar year' [s]?); | ||
|
||
soe = '$$SOE' '\n'; | ||
eoe = '$$EOE' '\n'; | ||
ephemeris_table = (alnum | ws | [*-./:])*; | ||
|
||
start_time = 'Start time' ' '* ':' ' ' datetime >mark %parse_start_time space* '\n'; | ||
stop_time = 'Stop time' ' '* ':' ' ' datetime >mark %parse_stop_time space* '\n'; | ||
step_size = 'Step-size' ' '* ':' ' ' (digit+ ' '* time_unit) >mark $parse_step_size '\n'; | ||
|
||
ephemeris = soe ephemeris_table >mark %parse_ephemeris_table eoe; | ||
|
||
main := ( | ||
any* | ||
start_time | ||
stop_time | ||
step_size | ||
any* | ||
ephemeris | ||
any* | ||
); | ||
|
||
}%% | ||
=end | ||
|
||
require 'date' | ||
|
||
module Tengai | ||
EPHEMERIS_DATA = Struct.new(:start_time, :stop_time, :step_size, :ephemeris_table).freeze | ||
|
||
class EphemerisParser < EPHEMERIS_DATA | ||
def self.parse(data) | ||
parser = new | ||
data = data.unpack('c*') if data.is_a? String | ||
eof = data.length | ||
|
||
%% write init; | ||
%% write exec; | ||
|
||
parser | ||
end | ||
|
||
def start_time=(time) | ||
super parse_time(time) | ||
end | ||
|
||
def stop_time=(time) | ||
super parse_time(time) | ||
end | ||
|
||
%% write data; | ||
|
||
# % fix syntax highlighting | ||
|
||
private | ||
def parse_time(time) | ||
DateTime.parse(time) | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
=begin | ||
%%{ | ||
machine simple_scanner; | ||
|
||
action Emit { | ||
emit data[(ts+8)..(te-7)].pack('c*') | ||
} | ||
|
||
foo = 'STARTFOO' any+ :>> 'ENDFOO'; | ||
|
||
main := |* | ||
foo => Emit; | ||
any; | ||
*|; | ||
}%% | ||
=end | ||
|
||
|
||
# Scans a file for "STARTFOO[...]ENDFOO" blocks and outputs their contents. | ||
# | ||
# ENV['CHUNK_SIZE'] determines how much of the file to read in at a time, allowing you to control memory usage. | ||
# | ||
# Uses ragel's scanner functionality even though it's not strictly necessary. | ||
class SimpleScanner | ||
attr_reader :path | ||
|
||
def initialize(path) | ||
@path = path | ||
%% write data; | ||
# % (this fixes syntax highlighting) | ||
end | ||
|
||
def emit(foo) | ||
$stdout.puts foo | ||
end | ||
|
||
def perform | ||
# So that ragel doesn't try to get it from data.length | ||
pe = :ignored | ||
eof = :ignored | ||
|
||
%% write init; | ||
# % (this fixes syntax highlighting) | ||
|
||
leftover = [] | ||
|
||
File.open(path) do |f| | ||
while chunk = f.read(ENV['CHUNK_SIZE'].to_i) | ||
data = leftover + chunk.unpack('c*') | ||
p ||= 0 | ||
pe = data.length | ||
|
||
%% write exec; | ||
# % (this fixes syntax highlighting) | ||
if ts | ||
leftover = data[ts..pe] | ||
p = p - ts | ||
ts = 0 | ||
else | ||
leftover = [] | ||
p = 0 | ||
end | ||
end | ||
end | ||
end | ||
end | ||
|
||
s = SimpleScanner.new ARGV[0] | ||
s.perform |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
=begin | ||
%%{ | ||
machine simple_tokenizer; | ||
|
||
action MyTs { | ||
my_ts = p | ||
} | ||
action MyTe { | ||
my_te = p | ||
} | ||
action Emit { | ||
emit data[my_ts...my_te].pack('c*') | ||
my_ts = nil | ||
my_te = nil | ||
} | ||
|
||
foo = 'STARTFOO' any+ >MyTs :>> 'ENDFOO' >MyTe %Emit; | ||
main := ( foo | any+ )*; | ||
|
||
}%% | ||
=end | ||
|
||
# Scans a file for "STARTFOO[...]ENDFOO" blocks and outputs their contents. | ||
# | ||
# ENV['CHUNK_SIZE'] determines how much of the file to read in at a time, allowing you to control memory usage. | ||
# | ||
# Does not use ragel's scanner functionality because no backtracking is needed. | ||
class SimpleTokenizer | ||
attr_reader :path | ||
|
||
def initialize(path) | ||
@path = path | ||
%% write data; | ||
# % (this fixes syntax highlighting) | ||
end | ||
|
||
def emit(foo) | ||
$stdout.puts foo | ||
end | ||
|
||
def perform | ||
# So that ragel doesn't try to get it from data.length | ||
pe = :ignored | ||
eof = :ignored | ||
|
||
%% write init; | ||
# % (this fixes syntax highlighting) | ||
|
||
leftover = [] | ||
my_ts = nil | ||
my_te = nil | ||
|
||
File.open(path) do |f| | ||
while chunk = f.read(ENV['CHUNK_SIZE'].to_i) | ||
data = leftover + chunk.unpack('c*') | ||
p = 0 | ||
pe = data.length | ||
%% write exec; | ||
# % (this fixes syntax highlighting) | ||
if my_ts | ||
leftover = data[my_ts..-1] | ||
my_te = my_te - my_ts if my_te | ||
my_ts = 0 | ||
else | ||
leftover = [] | ||
end | ||
end | ||
end | ||
end | ||
end | ||
|
||
s = SimpleTokenizer.new ARGV[0] | ||
s.perform |