diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..324a67b --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,31 @@ +name: Ruby + +on: + push: + branches: + - master + + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + ruby: + - "3.0" + - "2.7" + - "2.6" + - "jruby" + - "truffleruby" + + steps: + - uses: actions/checkout@v2 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + - name: Run RSpec + run: bundle exec rspec diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5c249f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +/.bundle/ +/.yardoc +/_yardoc/ +/coverage/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ + +# rspec failure tracking +.rspec_status + +Gemfile.lock diff --git a/.rspec b/.rspec new file mode 100644 index 0000000..34c5164 --- /dev/null +++ b/.rspec @@ -0,0 +1,3 @@ +--format documentation +--color +--require spec_helper diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..944c9fa --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,22 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog], +and this project adheres to [Semantic Versioning]. + +## [Unreleased] + +## [0.1.0] + +### Added + +- Initial implementation. ([@skryukov]) + +[@skryukov]: https://github.com/skryukov + +[Unreleased]: https://github.com/skryukov/paco/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/skryukov/paco/commits/v0.1.0 + +[Keep a Changelog]: https://keepachangelog.com/en/1.0.0/ +[Semantic Versioning]: https://semver.org/spec/v2.0.0.html diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..5bbd468 --- /dev/null +++ b/Gemfile @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +# Specify your gem's dependencies in paco.gemspec +gemspec + +gem "rake", "~> 13.0" +gem "rspec", "~> 3.0" +gem "simplecov", require: false +gem "standard" diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..380edf1 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2021 Svyatoslav Kryukov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..93d067b --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# Paco + +Paco is a parser combinator library inspired by Haskell's [Parsec] and [Parsimmon]. + +## Installation + +Add this line to your application's Gemfile: + +```ruby +gem "paco" +``` + +And then execute: + + $ bundle install + +Or install it yourself as: + + $ gem install paco + +## Usage + +See [API documentation](docs/paco.md), [examples](examples) and [specs](spec). + +## Development + +After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rspec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. + +To install this gem onto your local machine, run `bundle exec rake install`. + +## Contributing + +Bug reports and pull requests are welcome on GitHub at https://github.com/skryukov/paco. + +## License + +The gem is available as open source under the terms of the [MIT License]. + +[MIT License]: https://opensource.org/licenses/MIT +[Parsec]: https://github.com/haskell/parsec +[Parsimmon]: https://github.com/jneen/parsimmon +[parsby]: https://github.com/jolmg/parsby diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..b6ae734 --- /dev/null +++ b/Rakefile @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +require "bundler/gem_tasks" +require "rspec/core/rake_task" + +RSpec::Core::RakeTask.new(:spec) + +task default: :spec diff --git a/bin/console b/bin/console new file mode 100755 index 0000000..f7d0145 --- /dev/null +++ b/bin/console @@ -0,0 +1,15 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "paco" + +# You can add fixtures and/or initialization code here to make experimenting +# with your gem easier. You can also use a different console, if you like. + +# (If you use this, don't forget to add pry to your Gemfile!) +# require "pry" +# Pry.start + +require "irb" +IRB.start(__FILE__) diff --git a/bin/setup b/bin/setup new file mode 100755 index 0000000..dce67d8 --- /dev/null +++ b/bin/setup @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +IFS=$'\n\t' +set -vx + +bundle install + +# Do any other automated setup that you need to do here diff --git a/docs/paco.md b/docs/paco.md new file mode 100644 index 0000000..706dccd --- /dev/null +++ b/docs/paco.md @@ -0,0 +1,335 @@ +# Documentation + +## Paco::Combinators: Main methods + +### Paco::Combinators.not_followed_by(parser) + +Returns a parser that runs the passed `parser` without consuming the input, and returns `null` if the passed `parser` _does not match_ the input. Fails otherwise. + +### Paco::Combinators.succeed(result) + +Returns a parser that doesn't consume any input and always returns `result`. + +### Paco::Combinators.failed(message) + +Returns a parser that doesn't consume any input and always fails with passed `message`. + +### Paco::Combinators.lookahead(parser) + +Returns a parser that runs the passed `parser` without consuming the input, and returns empty string. + +### Paco::Combinators.alt(*parsers) + +Accepts one or more parsers, and returns a parser that returns the value of the first parser that succeeds, backtracking in between. + +### Paco::Combinators.seq(*parsers) + +Accepts one or more parsers, and returns a parser that expects them to match in order, returns an array of all their results. + +### Paco::Combinators.seq_map(*parsers, &block) + +Returns a parser that matches all `parsers` sequentially, and passes their results as an arguments to a `block`, and at the end returns its result. + +### Paco::Combinators.many(parser) + +Expects `parser` zero or more times, and returns an array of the results. + +### Paco::Combinators.sep_by(parser, separator) + +Returns a parser that expects **zero or more** matches for `parser`, separated by the parser `separator`. Returns an array of `parser` results. + +```ruby +example = Paco::Combinators.sep_by( + Paco::Combinators.digits, + Paco::Combinators.string(",") +) +example.parse("1,1,2,3,5,8,13,21") #=> ["1", "1", "2", "3", "5", "8", "13", "21"] +``` + +### Paco::Combinators.sep_by_1(parser, separator) + +Returns a parser that expects **one or more** matches for `parser`, separated by the parser `separator`. Returns an array of `parser` results. + +### Paco::Combinators.wrap(before, after, parser) + +Expects the parser `before` before `parser` and `after` after `parser. Returns the result of the parser. + +### Paco::Combinators.optional(parser) + +Returns parser that returns result of the passed `parser` or nil if `parser` fails. + +### Paco::Combinators.lazy(desc = "", &block) + +Accepts a block that returns a parser, which is evaluated the first time the parser is used. This is useful for referencing parsers that haven't yet been defined, and for implementing recursive parsers. + +## Paco::Combinators: Text related methods + +### Paco::Combinators.string(matcher) + +Returns a parser that looks for a passed `matcher` string and returns its value on success. + +### Paco::Combinators.satisfy(&block) + +Returns a parser that returns a single character if passed block result is truthy: + +```ruby +lower = Paco::Combinators.satisfy do |char| + char == char.downcase +end + +lower.parse("a") #=> "a" +lower.parse("P") #=> ParseError +``` + +### Paco::Combinators.take_while(&block) + +Returns a parser that returns a string containing all the next characters that are truthy for the passed block. + +Alias for `satisfy(&block).many`. + +### Paco::Combinators.one_of(matcher) + +Returns a parser that looks for exactly one character from passed `matcher`, and returns its value on success. + +### Paco::Combinators.none_of(matcher) + +Returns a parser that looks for exactly one character _NOT_ from passed `matcher`, and returns its value on success. + +### Paco::Combinators.regexp(regexp, group: 0) + +Returns a parser that looks for a match to the regexp and returns the entire text matched. The regexp will always match starting at the current parse location. When `group` is specified, it returns only the text in the specific regexp match group. + +### Paco::Combinators.regexp_char(regexp) + +Returns a parser that checks current character against the passed `regexp`. + +### Paco::Combinators.any_char + +Returns a parser that consumes and returns the next character of the input. + +### Paco::Combinators.remainder + +Returns a parser that consumes and returns the entire remainder of the input. + +### Paco::Combinators.eof + +Returns a parser that matches end of file and returns nil. + +### Paco::Combinators.cr + +Returns a parser that checks for the "carriage return" (`\r`) character. + +An alias for `Paco::Combinators.string("\r")` + +### Paco::Combinators.lf + +Returns a parser that checks for the "line feed" (`\n`) character. + +An alias for `Paco::Combinators.string("\n")` + +### Paco::Combinators.crlf + +Returns a parser that checks for the "carriage return" character followed by the "line feed" character (`\r\n`). + +An alias for `Paco::Combinators.string("\r\n")` + +### Paco::Combinators.newline + +Returns a parser that will match any kind of line ending. + +An alias for: + +```ruby +Combinators.alt( + Paco::Combinators.crlf, Paco::Combinators.lf, Paco::Combinators.cr +) +``` + +### Paco::Combinators.end_of_line + +Returns a parser that will match any kind of line ending *including* end of file. + +An alias for: + +```ruby +Combinators.alt( + Paco::Combinators.newline, Paco::Combinators.eof +) +``` + +### Paco::Combinators.letter + +Alias for `Paco::Combinators.regexp(/[a-z]/i)`. + +### Paco::Combinators.letters + +Alias for `Paco::Combinators.regexp(/[a-z]+/i)`. + +### Paco::Combinators.opt_letters + +Alias for `Paco::Combinators.regexp(/[a-z]*/i)`. + +### Paco::Combinators.digit + +Alias for `Paco::Combinators.regexp(/[0-9]/)`. + +### Paco::Combinators.digits + +Alias for `Paco::Combinators.regexp(/[0-9]+/)`. + +### Paco::Combinators.opt_digits + +Alias for `Paco::Combinators.regexp(/[0-9]*/)`. + +### Paco::Combinators.ws + +Alias for `Paco::Combinators.regexp(/\s+/)`. + +### Paco::Combinators.opt_ws + +Alias for `Paco::Combinators.regexp(/\s*/)`. + +### Paco::Combinators.spaced(parser) + +Alias for `parser.trim(Paco::Combinators.opt_ws)`. + +## Paco::Parser methods + +### Paco::Parser#parse(input) + +Applies `parser` on the provided string `input` and returns a parsed result or raises a `ParseError` exception. + +```ruby +example = Paco::Combinators.string("Paco") + +example.parse("Paco") +``` + +### Paco::Parser#failure(ctx) + +Raises `ParseError`, used internally by `Paco`: + +```ruby + +def eof + Parser.new("end of file") do |ctx, parser| + parser.failure(ctx) unless ctx.eof? + nil + end +end +``` + +### Paco::Parser#or(other) + +Returns a new parser which tries `parser`, and if it fails uses `other`. + +```ruby +bool = Paco::Combinators.string("true").or Paco::Combinators.string("false") + +bool.parse("true") #=> true +bool.parse("false") #=> false +``` + +### Paco::Parser#skip(other) + +Expects `other` parser to follow `parser`, but returns only the value of `parser`. + +```ruby +example = Paco::Combinators.any_char.skip(Paco::Combinators.opt_ws) + +example.parse("P ") #=> "P" +example.parse("a") #=> "a" +``` + +### Paco::Parser#next(other) + +Expects `other` parser to follow `parser`, but returns only the value of `other` parser. + +```ruby +example = Paco::Combinators.regexp(/[+ ]*/).next(Paco::Combinators.digits) + +example.parse("42") #=> "42" +example.parse("+42") #=> "42" +example.parse(" + 42") #=> "42" +``` + +### Paco::Parser#fmap(other) + +Transforms the output of `parser` with the given block. + +```ruby +example = Paco::Combinators.regexp(/[0-9]+/) + .fmap(&:to_i) + .fmap { |num| num + 1 } + +example.parse("9") #=> 10 +``` + +### Paco::Parser#bind(other) + +Returns a new parser which tries `parser`, and on success calls the `block` with the result of the parse, which is expected to return another parser, which will be tried next. This allows you to dynamically decide how to continue the parse. + +```ruby +include Paco::Combinators + +char_pairs = {"[" => "]", "(" => ")", "{" => "}", "<" => ">"} + +array_of_strings = string("%").next(any_char).bind do |char| + end_char = char_pairs[char] || char + + many(satisfy { |ch| ch != end_char }.skip(opt_ws)).skip(string(end_char)) +end + +array_of_strings.parse("%[a b c]") #=> ["a", "b", "c"] +array_of_strings.parse("%(a b c)") #=> ["a", "b", "c"] +array_of_strings.parse("%|a b c|") #=> ["a", "b", "c"] +``` + +### Paco::Parser#many + +Expects `parser` zero or more times, and returns an array of the results. + +### Paco::Parser#times(min, max = nil) + +Returns a parser that runs `parser` between `min` and `max` times, and returns an array of the results. When `max` is not specified, `max` = `min`. + +### Paco::Parser#at_least(num) + +Returns a parser that runs `parser` at least `num` times, and returns an array of the results. + +### Paco::Parser#at_most(num) + +Returns a parser that runs `parser` at most `num` times, and returns an array of the results. + +### Paco::Parser#result(value) + +Returns a new parser with the same behavior, but which returns passed `value`. + +### Paco::Parser#fallback(value) + +Returns a new parser which tries `parser` and, if it fails, returns `value` without consuming any input. + +```ruby +example = Paco::Combinators.digit.fallback("0") + +example.parse("4") #=> "4" +example.parse("") #=> "0" +``` + +### Paco::Parser#trim(other) + +Expects `other` parser before and after `parser`. Returns the result of the parser. + +### Paco::Parser#wrap(before, after) + +Expects the parser `before` before `parser` and `after` after `parser. Returns the result of the parser. + +### Paco::Parser#not_followed_by(other) + +Returns a parser that runs the passed `other` parser without consuming the input, and returns result of the `parser` if the passed one _does not match_ the input. Fails otherwise. + +### Paco::Parser#join(separator = "") + +Returns a parser that runs `parser` and concatenate it results with the `separator`. + +### Paco::Parser# diff --git a/examples/json_parser.rb b/examples/json_parser.rb new file mode 100644 index 0000000..ffc3e24 --- /dev/null +++ b/examples/json_parser.rb @@ -0,0 +1,119 @@ +# frozen_string_literal: true + +require "paco" + +module JsonParser + extend Paco + + module_function + + def parse(io) + spaced(value).parse(io) + end + + def value + memoize { alt(null, bool, number, str, array, object) } + end + + def null + memoize { string("null").result(nil) } + end + + def bool + memoize do + alt( + string("true").result(true), + string("false").result(false) + ) + end + end + + def sign + memoize { alt(string("-"), string("+")) } + end + + def decimal + memoize { digits.fmap(&:to_i) } + end + + def number + memoize do + seq( + optional(sign), + decimal, + optional(seq( + string("."), + decimal + )), + optional(seq( + one_of("eE"), + optional(sign), + decimal + )) + ).fmap do |sign, whole, (_, fractional), (_, exponent_sign, exponent)| + n = whole + n += fractional.to_f / 10**fractional.to_s.length if fractional + n *= -1 if sign == "-" + if exponent + e = exponent + e *= -1 if exponent_sign == "-" + n *= 10**e + end + n + end + end + end + + def str + memoize do + wrap( + string('"'), + string('"'), + many(alt(none_of('"\\'), escaped_chars)).join + ) + end + end + + def array + memoize do + wrap( + string("["), + opt_ws > string("]"), + sep_by(spaced(lazy { value }), string(",")) + ) + end + end + + def object + memoize do + wrap(string("{"), opt_ws > string("}"), + sep_by( + spaced(seq( + str < spaced(string(":")), + lazy { value } + )), + string(",") + )).fmap { |x| x.to_h } + end + end + + def four_hex_digits + memoize { regexp(/\h{4}/) } + end + + def escaped_chars + string("\\").next( + alt( + string('"'), + string("\\"), + string("/"), + string("f").result("\f"), + string("b").result("\b"), + string("r").result("\r"), + string("n").result("\n"), + string("t").result("\t"), + string("u").next(four_hex_digits.fmap { |s| [s.hex].pack("U") }) + ) + ) + end +end diff --git a/lib/paco.rb b/lib/paco.rb new file mode 100644 index 0000000..7d307cf --- /dev/null +++ b/lib/paco.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +require "paco/version" +require "paco/parse_error" +require "paco/context" +require "paco/parser" +require "paco/combinators" + +module Paco + def self.extended(base) + base.extend Combinators + end + + def self.included(base) + base.include Combinators + end +end diff --git a/lib/paco/combinators.rb b/lib/paco/combinators.rb new file mode 100644 index 0000000..518a637 --- /dev/null +++ b/lib/paco/combinators.rb @@ -0,0 +1,183 @@ +# frozen_string_literal: true + +require "monitor" + +require "paco/combinators/char" + +module Paco + module Combinators + def self.extended(base) + base.extend MonitorMixin + base.extend Char + end + + def self.included(base) + base.include MonitorMixin + base.include Char + end + + extend self + + # Returns a parser that runs the passed `parser` without consuming the input, and + # returns `null` if the passed `parser` _does not match_ the input. Fails otherwise. + # @param [Paco::Parser] parser + # @return [Paco::Parser] + def not_followed_by(parser) + Parser.new("not #{parser.desc}") do |ctx, pars| + start_pos = ctx.pos + begin + parser._parse(ctx) + rescue ParseError + ctx.pos = start_pos + nil + else + pars.failure(ctx) + end + end + end + + # Returns a parser that doesn't consume any input and always returns `result`. + # @return [Paco::Parser] + def succeed(result) + Parser.new { result } + end + + # Returns a parser that doesn't consume any input and always fails with passed `message`. + # @param [String] message + # @return [Paco::Parser] + def failed(message) + Parser.new(message) { |ctx, parser| parser.failure(ctx) } + end + + # Returns a parser that runs the passed `parser` without consuming the input, + # and returns empty string. + # @param [Paco::Parser] parser + # @return [Paco::Parser] + def lookahead(parser) + Parser.new do |ctx| + start_pos = ctx.pos + parser._parse(ctx) + ctx.pos = start_pos + "" + end + end + + # Accepts any number of parsers, and returns a parser that returns the value of the first parser that succeeds, backtracking in between. + # @param [Array] parsers + # @return [Paco::Parser] + def alt(*parsers) + raise ArgumentError, "no parsers specified" if parsers.empty? + + Parser.new do |ctx| + result = nil + last_error = nil + start_pos = ctx.pos + parsers.each do |pars| + break result = {value: pars._parse(ctx)} + rescue ParseError => e + last_error = e + ctx.pos = start_pos + next + end + raise last_error unless result + result[:value] + end + end + + # Accepts one or more parsers, and returns a parser that expects them + # to match in order, returns an array of all their results. + # @param [Array] parsers + # @return [Paco::Parser] + def seq(*parsers) + raise ArgumentError, "no parsers specified" if parsers.empty? + + Parser.new do |ctx| + parsers.map { |parser| parser._parse(ctx) } + end + end + + # Returns a parser that matches all `parsers` sequentially, and passes + # their results as an arguments to a `block`, and at the end returns its result. + # @param [Array] parsers + # @return [Paco::Parser] + def seq_map(*parsers, &block) + raise ArgumentError, "no parsers specified" if parsers.empty? + + seq(*parsers).fmap do |results| + block.call(*results) + end + end + + # Accepts a block that returns a parser, which is evaluated the first time the parser is used. + # This is useful for referencing parsers that haven't yet been defined, and for implementing recursive parsers. + # @return [Paco::Parser] + def lazy(desc = "", &block) + Parser.new(desc) { |ctx| block.call._parse(ctx) } + end + + # Returns a parser that expects zero or more matches for `parser`, + # separated by the parser `separator`. Returns an array of `parser` results. + # @param [Paco::Parser] parser + # @param [Paco::Parser] separator + # @return [Paco::Parser] + def sep_by(parser, separator) + alt(sep_by_1(parser, separator), succeed([])) + end + + # Returns a parser that expects one or more matches for `parser`, + # separated by the parser `separator`. Returns an array of `parser` results. + # @param [Paco::Parser] parser + # @param [Paco::Parser] separator + # @return [Paco::Parser] + def sep_by_1(parser, separator) + seq_map(parser, many(separator.next(parser))) do |first, arr| + [first] + arr + end + end + + # Expects the parser `before` before `parser` and `after` after `parser. Returns the result of the parser. + # @param [Paco::Parser] before + # @param [Paco::Parser] after + # @param [Paco::Parser] parser + # @return [Paco::Parser] + def wrap(before, after, parser) + before.next(parser).skip(after) + end + + # Expects `parser` zero or more times, and returns an array of the results. + # @param [Paco::Parser] parser + # @return [Paco::Parser] + def many(parser) + Parser.new do |ctx| + results = [] + # last_pos = ctx.pos + loop do + results << parser._parse(ctx) + # raise ArgumentError, "smth wrong" if last_pos == ctx.pos + # last_pos = ctx.pos + rescue ParseError + break + end + results + end + end + + # Returns parser that returns result of the passed `parser` or nil if `parser` fails. + # @param [Paco::Parser] parser + # @return [Paco::Parser] + def optional(parser) + alt(parser, succeed(nil)) + end + + # Helper used for memoization + def memoize(&block) + key = block.source_location + synchronize do + @_paco_memoized ||= {} + return @_paco_memoized[key] if @_paco_memoized.key?(key) + + @_paco_memoized[key] = block.call + end + end + end +end diff --git a/lib/paco/combinators/char.rb b/lib/paco/combinators/char.rb new file mode 100644 index 0000000..fb50a3c --- /dev/null +++ b/lib/paco/combinators/char.rb @@ -0,0 +1,206 @@ +# frozen_string_literal: true + +module Paco + module Combinators + module Char + # Returns a parser that returns a single character if passed block result is truthy: + # + # @example + # lower = Combinators.satisfy do |char| + # char == char.downcase + # end + # + # lower.parse("a") #=> "a" + # lower.parse("P") #=> ParseError + # + # @param [String] desc optional description for the parser + # @param [Proc] block proc with one argument – a next char of the input + # @return [Paco::Parser] + def satisfy(desc = "", &block) + Parser.new(desc) do |ctx, parser| + parser.failure(ctx) if ctx.eof? + + char = ctx.read(1) + parser.failure(ctx) unless block.call(char) + + ctx.pos += 1 + char + end + end + + # Returns a parser that looks for a passed `matcher` string and returns its value on success. + # @param [String] matcher + # @return [Paco::Parser] + def string(matcher) + Parser.new(matcher) do |ctx, parser| + src = ctx.read(matcher.length) + parser.failure(ctx) if src != matcher + + ctx.pos += matcher.length + src + end + end + + # Returns a parser that looks for a match to the regexp and returns the entire text matched. + # The regexp will always match starting at the current parse location. + # When `group` is specified, it returns only the text in the specific regexp match group. + # @param [Regexp] regexp + # @return [Paco::Parser] + def regexp(regexp, group: 0) + anchored_regexp = Regexp.new("^(?:#{regexp.source})", regexp.options) + Parser.new(regexp.inspect) do |ctx, parser| + match = anchored_regexp.match(ctx.read_all) + parser.failure(ctx) if match.nil? + + ctx.pos += match[0].length + match[group] + end + end + + # Returns a parser that checks current character against the passed `regexp` + # @param [Regexp] regexp + # @return [Paco::Parser] + def regexp_char(regexp) + satisfy(regexp.inspect) { |char| regexp.match?(char) } + end + + # Returns a parser that looks for exactly one character from passed + # `matcher`, and returns its value on success. + # @param [String, Array] matcher + # @return [Paco::Parser] + def one_of(matcher) + satisfy(matcher.to_s) { |char| matcher.include?(char) } + end + + # Returns a parser that looks for exactly one character _NOT_ from passed + # `matcher`, and returns its value on success. + # @param [String, Array] matcher + # @return [Paco::Parser] + def none_of(matcher) + satisfy("not #{matcher}") { |char| !matcher.include?(char) } + end + + # Returns a parser that consumes and returns the next character of the input. + # @return [Paco::Parser] + def any_char + memoize { satisfy("any_char") { |ch| ch.length > 0 } } + end + + # Returns a parser that consumes and returns the entire remainder of the input. + # @return [Paco::Parser] + def remainder + memoize do + Parser.new("remainder of the input") do |ctx, parser| + result = ctx.read_all + ctx.pos += result.length + result + end + end + end + + # Returns a parser that returns a string containing all the next + # characters that are truthy for the passed block. + # @param [Proc] block proc with one argument – a next char of the input + # @return [Paco::Parser] + def take_while(&block) + satisfy(&block).many.join + end + + # Returns a parser that matches end of file and returns nil. + # @return [Paco::Parser] + def eof + memoize do + Parser.new("end of file") do |ctx, parser| + parser.failure(ctx) unless ctx.eof? + nil + end + end + end + + # Returns a parser that checks for the "carriage return" (`\r`) character. + # @return [Paco::Parser] + def cr + memoize { string("\r") } + end + + # Returns a parser that checks for the "line feed" (`\n`) character. + # @return [Paco::Parser] + def lf + memoize { string("\n") } + end + + # Returns a parser that checks for the "carriage return" character followed by the "line feed" character (`\r\n`). + # @return [Paco::Parser] + def crlf + memoize { string("\r\n") } + end + + # Returns a parser that will match any kind of line ending. + # @return [Paco::Parser] + def newline + memoize { alt(crlf, lf, cr) } + end + + # Returns a parser that will match any kind of line ending *including* end of file. + # @return [Paco::Parser] + def end_of_line + memoize { alt(newline, eof) } + end + + # Alias for `Paco::Combinators.regexp(/[a-z]/i)`. + # @return [Paco::Parser] + def letter + memoize { regexp_char(/[a-z]/i) } + end + + # Alias for `Paco::Combinators.regexp(/[a-z]+/i)`. + # @return [Paco::Parser] + def letters + memoize { seq(letter, letter.many).fmap { |x| x.flatten.join } } + end + + # Alias for `Paco::Combinators.regexp(/[a-z]*/i)`. + # @return [Paco::Parser] + def opt_letters + memoize { letters | succeed("") } + end + + # Alias for `Paco::Combinators.regexp(/[0-9]/)`. + # @return [Paco::Parser] + def digit + memoize { regexp_char(/[0-9]/) } + end + + # Alias for `Paco::Combinators.regexp(/[0-9]+/)`. + # @return [Paco::Parser] + def digits + memoize { seq(digit, digit.many).fmap { |x| x.flatten.join } } + end + + # Alias for `Paco::Combinators.regexp(/[0-9]*/)`. + # @return [Paco::Parser] + def opt_digits + memoize { digits | succeed("") } + end + + # Alias for `Paco::Combinators.regexp(/\s+/)`. + # @return [Paco::Parser] + def ws + memoize { regexp(/\s+/) } + end + + # Alias for `Paco::Combinators.regexp(/\s*/)`. + # @return [Paco::Parser] + def opt_ws + memoize { regexp(/\s*/) } + end + + # Alias for `parser.trim(Paco::Combinators.opt_ws)`. + # @param [Paco::Parser] parser + # @return [Paco::Parser] + def spaced(parser) + parser.trim(opt_ws) + end + end + end +end diff --git a/lib/paco/context.rb b/lib/paco/context.rb new file mode 100644 index 0000000..9c6bcc2 --- /dev/null +++ b/lib/paco/context.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true +module Paco + class Context + attr_reader :input, :last_pos, :pos + + def pos=(np) + # TODO: is that needed? + @last_pos = @pos + @pos = np + end + + def initialize(input, pos = 0) + @input = input + @pos = pos + end + + def read(n) + input[pos, n] + end + + def read_all + input[pos..-1] + end + + def eof? + pos >= input.length + end + + def index(from = nil) + from ||= pos + lines = input[0..from].lines + + { + line: lines.length, + column: lines[-1]&.length || 0, + pos: from + } + end + end +end diff --git a/lib/paco/parse_error.rb b/lib/paco/parse_error.rb new file mode 100644 index 0000000..f829790 --- /dev/null +++ b/lib/paco/parse_error.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true +module Paco + class Error < StandardError; end + + class ParseError < Error + # @param [Paco::Context] ctx + def initialize(ctx, expected) + @ctx = ctx + @pos = ctx.pos + @expected = expected + + # TODO: make this possible to show every parsing message? or last n? + # puts "" + # puts "#{ctx.pos}/#{ctx.input.length}: #{ctx.input[ctx.last_pos..ctx.pos]}" + # puts "expected: #{expected}" + # puts "" + end + + def message + index = @ctx.index(@pos) + <<~MSG + Parsing error + line #{index[:line]}, column #{index[:column]}: + unexpected #{@ctx.input[@pos] || "end of file"} + expecting #{@expected} + MSG + end + end +end diff --git a/lib/paco/parser.rb b/lib/paco/parser.rb new file mode 100644 index 0000000..438504e --- /dev/null +++ b/lib/paco/parser.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +require "paco/combinators" + +module Paco + class Parser + attr_reader :desc + + def initialize(desc = "", &block) + @desc = desc + @block = block + end + + def parse(input) + ctx = input.is_a?(Context) ? input : Context.new(input) + skip(Paco::Combinators.eof)._parse(ctx) + end + + def _parse(ctx) + @block.call(ctx, self) + # TODO: add ability for debugging + # puts "" + # puts "#{@block.source_location} succeed." + # puts "#{ctx.input.length}/#{ctx.pos}: " + ctx.input[ctx.last_pos..ctx.pos].inspect + # puts "" + # res + end + + # Raises ParseError + # @param [Paco::Context] ctx + # @raise [Paco::ParseError] + def failure(ctx) + raise ParseError.new(ctx, desc), "", [] + end + + # Returns a new parser which tries `parser`, and if it fails uses `other`. + def or(other) + Parser.new do |ctx| + _parse(ctx) + rescue ParseError + other._parse(ctx) + end + end + alias_method :|, :or + + # Expects `other` parser to follow `parser`, but returns only the value of `parser`. + # @param [Poco::Parser] other + # @return [Paco::Parser] + def skip(other) + Paco::Combinators.seq(self, other).fmap { |results| results[0] } + end + alias_method :<, :skip + + # Expects `other` parser to follow `parser`, but returns only the value of `other` parser. + # @param [Poco::Parser] other + # @return [Paco::Parser] + def next(other) + bind { other } + end + alias_method :>, :next + + # Transforms the output of `parser` with the given block. + # @return [Paco::Parser] + def fmap(&block) + Parser.new do |ctx| + block.call(_parse(ctx)) + end + end + + # Returns a new parser which tries `parser`, and on success + # calls the `block` with the result of the parse, which is expected + # to return another parser, which will be tried next. This allows you + # to dynamically decide how to continue the parse, which is impossible + # with the other Paco::Combinators. + # @return [Paco::Parser] + def bind(&block) + Parser.new do |ctx| + block.call(_parse(ctx))._parse(ctx) + end + end + alias_method :chain, :bind + + # Expects `parser` zero or more times, and returns an array of the results. + # @return [Paco::Parser] + def many + Paco::Combinators.many(self) + end + + # Returns a new parser with the same behavior, but which returns passed `value`. + # @return [Paco::Parser] + def result(value) + fmap { value } + end + + # Returns a new parser which tries `parser` and, if it fails, returns `value` without consuming any input. + # @return [Paco::Parser] + def fallback(value) + self.or(Paco::Combinators.succeed(value)) + end + + # Expects `other` parser before and after `parser`, and returns the result of the parser. + # @param [Paco::Parser] other + # @return [Paco::Parser] + def trim(other) + other.next(self).skip(other) + end + + # Expects the parser `before` before `parser` and `after` after `parser. Returns the result of the parser. + # @param [Paco::Parser] before + # @param [Paco::Parser] after + # @return [Paco::Parser] + def wrap(before, after) + Paco::Combinators.wrap(before, after, self) + end + + # Returns a parser that runs passed `other` parser without consuming the input, and + # returns result of the `parser` if the passed one _does not match_ the input. Fails otherwise. + # @param [Paco::Parser] other + # @return [Paco::Parser] + def not_followed_by(other) + skip(Paco::Combinators.not_followed_by(other)) + end + + # Returns a parser that runs `parser` and concatenate it results with the `separator`. + # @param [String] separator + # @return [Paco::Parser] + def join(separator = "") + fmap { |result| result.join(separator) } + end + + # Returns a parser that runs `parser` between `min` and `max` times, + # and returns an array of the results. When `max` is not specified, `max` = `min`. + # @param [Integer] min + # @param [Integer] max + # @return [Paco::Parser] + def times(min, max = nil) + max ||= min + if min < 0 || max < min + raise ArgumentError, "invalid attributes: min `#{min}`, max `#{max}`" + end + + Parser.new do |ctx| + results = min.times.map { _parse(ctx) } + (max - min).times.each do + results << _parse(ctx) + rescue ParseError + break + end + + results + end + end + + # Returns a parser that runs `parser` at least `num` times, + # and returns an array of the results. + def at_least(num) + Paco::Combinators.seq_map(times(num), many) do |head, rest| + head + rest + end + end + + # Returns a parser that runs `parser` at most `num` times, + # and returns an array of the results. + def at_most(num) + times(0, num) + end + end +end diff --git a/lib/paco/version.rb b/lib/paco/version.rb new file mode 100644 index 0000000..641839d --- /dev/null +++ b/lib/paco/version.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +module Paco + VERSION = "0.1.0" +end diff --git a/paco.gemspec b/paco.gemspec new file mode 100644 index 0000000..3527db9 --- /dev/null +++ b/paco.gemspec @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +require_relative "lib/paco/version" + +Gem::Specification.new do |spec| + spec.name = "paco" + spec.version = Paco::VERSION + spec.authors = ["Svyatoslav Kryukov"] + spec.email = ["s.g.kryukov@yandex.ru"] + + spec.summary = "Parser combinator library" + spec.description = "Paco is a parser combinator library." + spec.homepage = "https://github.com/skryukov/paco" + spec.license = "MIT" + spec.required_ruby_version = ">= 2.6.0" + + spec.metadata = { + "bug_tracker_uri" => "#{spec.homepage}/issues", + "changelog_uri" => "#{spec.homepage}/blob/master/CHANGELOG.md", + "documentation_uri" => "#{spec.homepage}/blob/master/README.md", + "homepage_uri" => spec.homepage, + "source_code_uri" => spec.homepage + } + + spec.files = Dir.glob("lib/**/*") + Dir.glob("bin/**/*") + %w[README.md LICENSE.txt CHANGELOG.md] + + spec.require_paths = ["lib"] +end diff --git a/spec/paco/combinators/char_spec.rb b/spec/paco/combinators/char_spec.rb new file mode 100644 index 0000000..c3d7422 --- /dev/null +++ b/spec/paco/combinators/char_spec.rb @@ -0,0 +1,297 @@ +# frozen_string_literal: true +require "spec_helper" + +RSpec.describe Paco::Combinators::Char, :include_combinators do + describe "#string" do + it "matches a string" do + expect(string("Paco").parse("Paco")).to eq "Paco" + end + + it "raises an error" do + expect { string("Paco").parse("paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#satisfy" do + let(:example) { satisfy { |ch| ch == ch.downcase }.skip(remainder) } + + it "matches characters from string and returns an array" do + expect(example.parse("paco")).to eq "p" + end + + it "raises an error" do + expect { example.parse("Paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#take_while" do + let(:example) { take_while { |ch| ch == ch.downcase }.skip(remainder) } + + it "matches characters from string and returns an array" do + expect(example.parse("come here, Paco")).to eq "come here, " + end + + it "returns empty string if no matches found" do + expect(example.parse("Paco")).to eq "" + end + end + + describe "#one_of" do + it "matches a character from string" do + expect(one_of("abc").parse("b")).to eq "b" + end + + it "matches a character from array" do + expect(one_of(%w[a b c]).parse("c")).to eq "c" + end + + it "raises an error" do + expect { one_of("abc").parse("d") }.to raise_error(Paco::ParseError) + end + end + + describe "#none_of" do + it "matches a character not from string" do + expect(none_of("abc").parse("p")).to eq "p" + end + + it "matches a character not from array" do + expect(none_of(%w[a b c]).parse("A")).to eq "A" + end + + it "raises an error" do + expect { none_of("a b c").parse("b") }.to raise_error(Paco::ParseError) + end + end + + describe "#regexp" do + it "returns matched part of the string" do + expect(regexp(/\w{4}/).parse("Paco")).to eq("Paco") + end + + it "returns matched parts of the string" do + parser = seq(regexp(/pa/i), regexp(/\w+/)) + expect(parser.parse("Paco")).to eq(%w[Pa co]) + end + + it "raises an error" do + expect { regexp(/\w{4}/).parse("Alf") }.to raise_error(Paco::ParseError) + end + + context "with groups in regex" do + it "returns matched part of the string" do + expect(regexp(/(\w)\w*/).parse("Paco")).to eq("Paco") + end + + it "returns specified group" do + expect(regexp(/(\w{4}).*/, group: 1).parse("Paco!!!!111")).to eq("Paco") + end + end + end + + describe "#regexp_char" do + let(:example) { regexp_char(/\w/).skip(remainder) } + + it "returns matched char" do + expect(example.parse("Paco")).to eq("P") + end + + it "raises an error" do + expect { example.parse("П") }.to raise_error(Paco::ParseError) + end + end + + describe "#cr" do + it "returns cr" do + expect(cr.parse("\r")).to eq("\r") + end + + it "raises an error" do + expect { cr.parse("\n") }.to raise_error(Paco::ParseError) + end + end + + describe "#lf" do + it "returns lf" do + expect(lf.parse("\n")).to eq("\n") + end + + it "raises an error" do + expect { lf.parse("\r") }.to raise_error(Paco::ParseError) + end + end + + describe "#crlf" do + it "returns crlf" do + expect(crlf.parse("\r\n")).to eq("\r\n") + end + + it "raises an error" do + expect { crlf.parse("\r") }.to raise_error(Paco::ParseError) + end + end + + describe "#newline" do + subject { newline } + + it "returns parsed chars", :aggregate_failures do + expect(subject.parse("\n")).to eq("\n") + expect(subject.parse("\r")).to eq("\r") + expect(subject.parse("\r\n")).to eq("\r\n") + end + + it "raises an error" do + expect { subject.parse("") }.to raise_error(Paco::ParseError) + expect { subject.parse("paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#end_of_line" do + subject { end_of_line } + + it "returns parsed chars", :aggregate_failures do + expect(subject.parse("\n")).to eq("\n") + expect(subject.parse("\r")).to eq("\r") + expect(subject.parse("\r\n")).to eq("\r\n") + expect(subject.parse("")).to be_nil + end + + it "raises an error" do + expect { subject.parse("paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#any_char" do + it "returns parsed char" do + expect(any_char.parse("П")).to eq("П") + end + + it "raises an error" do + expect { any_char.parse("") }.to raise_error(Paco::ParseError) + end + end + + describe "#remainder" do + it "returns parsed char" do + expect(remainder.parse("Paco <3")).to eq("Paco <3") + end + + it "returns empty string when eof" do + expect(remainder.parse("")).to eq("") + end + end + + describe "#eof" do + it "returns parsed char" do + expect(eof.parse("")).to be_nil + end + + it "raises an error" do + expect { eof.parse("Paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#letter" do + it "returns parsed char" do + expect(letter.parse("Z")).to eq("Z") + end + + it "raises an error" do + expect { letter.parse("1") }.to raise_error(Paco::ParseError) + end + + it "raises an error for non a-z letters" do + expect { letter.parse("П") }.to raise_error(Paco::ParseError) + end + end + + describe "#letters" do + it "returns parsed char" do + expect(letters.parse("Paco")).to eq("Paco") + end + + it "raises an error for non a-z letters" do + expect { letter.parse("Пако") }.to raise_error(Paco::ParseError) + end + + it "raises an error" do + expect { letters.parse("42") }.to raise_error(Paco::ParseError) + end + end + + describe "#opt_letters" do + it "returns parsed chars" do + expect(opt_letters.parse("Paco")).to eq("Paco") + end + + it "returns empty string" do + expect(opt_letters.skip(remainder).parse("Пако")).to eq("") + end + end + + describe "#digit" do + it "returns parsed char" do + expect(digit.skip(remainder).parse("42!")).to eq("4") + end + + it "raises an error" do + expect { digit.parse("a") }.to raise_error(Paco::ParseError) + end + end + + describe "#digits" do + it "returns parsed chars" do + expect(digits.skip(remainder).parse("42!")).to eq("42") + end + + it "raises an error" do + expect { digits.parse("Paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#opt_digits" do + it "returns parsed chars" do + expect(opt_digits.skip(remainder).parse("42!")).to eq("42") + end + + it "returns empty string" do + expect(opt_digits.skip(remainder).parse("Paco")).to eq("") + end + end + + describe "#ws" do + it "returns parsed chars" do + expect(ws.skip(remainder).parse(" Paco")).to eq(" ") + end + + it "raises an error" do + expect { ws.parse("Paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#opt_ws" do + it "returns parsed chars" do + expect(opt_ws.skip(remainder).parse(" Paco")).to eq(" ") + end + + it "returns empty string" do + expect(opt_ws.skip(remainder).parse("Paco")).to eq("") + end + end + + describe "#spaced" do + let(:example) { spaced(letters).skip(remainder) } + + it "returns parser results" do + expect(example.parse(" Hello Paco!")).to eq("Hello") + end + + it "returns parser results when no spaces" do + expect(example.parse("Paco")).to eq("Paco") + end + + it "raises an error" do + expect { example.parse("!") }.to raise_error(Paco::ParseError) + end + end +end diff --git a/spec/paco/combinators_spec.rb b/spec/paco/combinators_spec.rb new file mode 100644 index 0000000..93ef700 --- /dev/null +++ b/spec/paco/combinators_spec.rb @@ -0,0 +1,192 @@ +# frozen_string_literal: true + +require "spec_helper" + +RSpec.describe Paco::Combinators, :include_combinators do + describe "#not_followed_by" do + it "returns nil" do + parser = seq(not_followed_by(string("a")), string("b")) + expect(parser.parse("b")).to eq([nil, "b"]) + end + + it "raises an error" do + expect { not_followed_by(string("a")).parse("a") }.to raise_error(Paco::ParseError) + end + end + + describe "#lookahead" do + it "returns empty string" do + parser = seq(lookahead(string("42")), digits) + expect(parser.parse("42")).to eq(["", "42"]) + end + + it "raises an error" do + expect { lookahead(string("Alf")).parse("Paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#succeed" do + it "returns passed value" do + parser = seq(succeed("Paco"), remainder) + expect(parser.parse("<3")).to eq(%w[Paco <3]) + end + end + + describe "#failed" do + it "raises an error" do + parser = seq(failed("message"), remainder) + expect { parser.parse("Paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#alt" do + let(:alt_true_or_false) { alt(string("true"), string("false")) } + let(:alt_t_or_true) { alt(string("t").skip(remainder), string("true")) } + + it "returns passed parser result" do + expect(alt_true_or_false.parse("true")).to eq("true") + end + + it "returns passed parser result" do + expect(alt_true_or_false.parse("false")).to eq("false") + end + + it "returns first passed parser result" do + expect(alt_t_or_true.parse("true")).to eq("t") + end + + it "raises an error" do + expect { alt_true_or_false.parse("null") }.to raise_error(Paco::ParseError) + end + + it "raises an error when no parsers passed" do + expect { alt.parse("Paco") }.to raise_error(ArgumentError) + end + end + + describe "#seq" do + let(:example) { seq(string("pa"), string("co")) } + + it "returns array of parsers results" do + expect(example.parse("paco")).to eq(%w[pa co]) + end + + it "raises an error" do + expect { example.parse("Paco") }.to raise_error(Paco::ParseError) + end + + it "raises an error when no parsers passed" do + expect { seq.parse("Paco") }.to raise_error(ArgumentError) + end + end + + describe "#many" do + let(:example) { many(digit).skip(remainder) } + + it "returns parsed result" do + expect(example.parse("123")).to eq(%w[1 2 3]) + end + + it "returns empty array when parser fails" do + expect(example.parse("Paco")).to eq([]) + end + end + + describe "#optional" do + let(:example) { optional(string("Paco")).skip(remainder) } + + it "returns parsed result" do + expect(example.parse("Paco!")).to eq("Paco") + end + + it "returns nil when parser fails" do + expect(example.parse("paco")).to be_nil + end + end + + describe "#seq_map" do + let(:example) do + seq_map(string("pa"), string("co")) { |x, y| y + x }.skip(remainder) + end + + it "returns result of the block" do + expect(example.parse("paco!")).to eq("copa") + end + + it "raises an error when parser fails" do + expect { example.parse("Paco") }.to raise_error(Paco::ParseError) + end + + it "raises an error when no parsers passed" do + expect { seq_map { |x| x }.parse("Paco") }.to raise_error(ArgumentError) + end + end + + describe "#sep_by" do + let(:example) { sep_by(digits, string(",")) } + + it "returns array of parsed results" do + expect(example.parse("1,2,3")).to eq(%w[1 2 3]) + end + + it "returns array of parsed results with trailing separator" do + expect(example.parse("1,2,3,")).to eq(%w[1 2 3]) + end + + it "returns empty array when nothing to parse" do + expect(example.skip(remainder).parse("paco")).to eq([]) + end + + it "raises an error when parser fails" do + expect { example.parse(",2,3") }.to raise_error(Paco::ParseError) + end + end + + describe "#sep_by_1" do + let(:example) { sep_by_1(digits, string(",")) } + + it "returns array of parsed results" do + expect(example.parse("1,2,3")).to eq(%w[1 2 3]) + end + + it "returns array of parsed results with trailing separator" do + expect(example.parse("1,2,3,")).to eq(%w[1 2 3]) + end + + it "raises an error when nothing to parse" do + expect { example.skip(remainder).parse("paco") }.to raise_error(Paco::ParseError) + end + + it "raises an error when parser fails" do + expect { example.parse(",2,3") }.to raise_error(Paco::ParseError) + end + end + + describe "#wrap" do + let(:example) { wrap(string("{"), string("}"), letters) } + + it "returns parser result" do + expect(example.parse("{Paco}")).to eq("Paco") + end + + it "raises an error when wrapped parser fails" do + expect { example.parse("{Пако}") }.to raise_error(Paco::ParseError) + end + + it "raises an error when wrapping parser fails" do + expect { example.parse("{Paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#lazy" do + let(:example) { lazy { failed("message") } } + + it "doesn't call the block on reference" do + expect { example }.not_to raise_error + end + + it "calls the block on parsing" do + expect { example.parse("Paco") }.to raise_error(Paco::ParseError) + end + end +end diff --git a/spec/paco/parser_spec.rb b/spec/paco/parser_spec.rb new file mode 100644 index 0000000..84a7afe --- /dev/null +++ b/spec/paco/parser_spec.rb @@ -0,0 +1,222 @@ +# frozen_string_literal: true + +require "spec_helper" + +RSpec.describe Paco::Parser, :include_combinators do + let(:parser) { string("Paco") } + + describe "#parse" do + it "parses string" do + expect(string("Paco").parse("Paco")).to eq("Paco") + end + + it "parses Paco::Context" do + context = Paco::Context.new("Paco") + expect(string("Paco").parse(context)).to eq("Paco") + end + end + + describe "#or" do + let(:example) { failed("msg").or(string("Paco")) } + + it "returns passed parser result" do + expect(example.parse("Paco")).to eq("Paco") + end + + it "raises an error when no parsers passed" do + expect { example.parse("paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#skip" do + let(:example) { string("Pa").skip(string("co")) } + + it "returns left parser result" do + expect(example.parse("Paco")).to eq("Pa") + end + + it "raises an error when no parsers passed" do + expect { example.parse("paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#next" do + let(:example) { string("Pa").next(string("co")) } + + it "returns right parser result" do + expect(example.parse("Paco")).to eq("co") + end + + it "raises an error when no parsers passed" do + expect { example.parse("paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#fmap" do + let(:example) { string("Paco").fmap(&:upcase) } + + it "returns block result" do + expect(example.parse("Paco")).to eq("PACO") + end + end + + describe "#bind" do + let(:example) { letters.bind { |res| ws.next(string(res.upcase)) } } + + it "returns parser result" do + expect(example.parse("Paco PACO")).to eq("PACO") + end + end + + describe "#many" do + let(:example) { digit.many.skip(remainder) } + + it "returns parsed result" do + expect(example.parse("123")).to eq(%w[1 2 3]) + end + + it "returns empty array when parser fails" do + expect(example.parse("Paco")).to eq([]) + end + end + + describe "#result" do + let(:example) { string("Paco").result(true) } + + it "returns the result" do + expect(example.parse("Paco")).to eq(true) + end + + it "raises an error when no parsers passed" do + expect { example.parse("paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#fallback" do + let(:example) { string("Paco").fallback("").skip(remainder) } + + it "returns parsers result" do + expect(example.parse("Paco")).to eq("Paco") + end + + it "returns fallback when parser fails" do + expect(example.parse("paco")).to eq("") + end + end + + describe "#trim" do + let(:example) { letters.trim(string(" ")) } + + it "returns parser result" do + expect(example.parse(" Paco ")).to eq("Paco") + end + + it "raises an error when wrapped parser fails" do + expect { example.parse(" Пако ") }.to raise_error(Paco::ParseError) + end + + it "raises an error when wrapping parser fails" do + expect { example.parse(" Paco ") }.to raise_error(Paco::ParseError) + end + end + + describe "#wrap" do + let(:example) { letters.wrap(string("{"), string("}")) } + + it "returns parser result" do + expect(example.parse("{Paco}")).to eq("Paco") + end + + it "raises an error when wrapped parser fails" do + expect { example.parse("{Пако}") }.to raise_error(Paco::ParseError) + end + + it "raises an error when wrapping parser fails" do + expect { example.parse("{Paco") }.to raise_error(Paco::ParseError) + end + end + + describe "#not_followed_by" do + let(:example) { string("a").not_followed_by(string("b")).skip(remainder) } + + it "returns result of the parser" do + expect(example.parse("ac")).to eq("a") + end + + it "raises an error" do + expect { example.parse("ab") }.to raise_error(Paco::ParseError) + end + end + + describe "#join" do + it "returns joined result of the parser" do + expect(letter.many.join.parse("abc")).to eq("abc") + end + + it "returns joined result of the parser" do + expect(letter.many.join(",").parse("abc")).to eq("a,b,c") + end + end + + describe "#times" do + let(:example) { digit.times(2).skip(remainder) } + + it "returns array of results of the parser" do + expect(example.parse("1111")).to eq(%w[1 1]) + end + + it "raises an error" do + expect { example.parse("1a") }.to raise_error(Paco::ParseError) + end + + context "with min and max are specified" do + let(:example) { digit.times(2, 3).skip(remainder) } + + it "returns array of results of the parser" do + expect(example.parse("1111")).to eq(%w[1 1 1]) + end + + it "returns array of results of the parser" do + expect(example.parse("11a")).to eq(%w[1 1]) + end + + it "raises an error" do + expect { example.parse("1a") }.to raise_error(Paco::ParseError) + end + end + + context "with invalid arguments" do + it "raises an error" do + expect { digit.times(-1, 3).parse("11") }.to raise_error(ArgumentError) + end + + it "raises an error" do + expect { digit.times(3, 2).parse("11") }.to raise_error(ArgumentError) + end + end + end + + describe "#at_least" do + let(:example) { digit.at_least(2).skip(remainder) } + + it "returns array of results of the parser" do + expect(example.parse("111a")).to eq(%w[1 1 1]) + end + + it "raises an error" do + expect { example.parse("1a") }.to raise_error(Paco::ParseError) + end + end + + describe "#at_most" do + let(:example) { digit.at_most(2).skip(remainder) } + + it "returns array of results of the parser" do + expect(example.parse("1111")).to eq(%w[1 1]) + end + + it "returns array of results of the parser" do + expect(example.parse("Paco")).to eq(%w[]) + end + end +end diff --git a/spec/paco_spec.rb b/spec/paco_spec.rb new file mode 100644 index 0000000..758bb45 --- /dev/null +++ b/spec/paco_spec.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +RSpec.describe Paco do + it "has a version number" do + expect(Paco::VERSION).not_to be nil + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..550fd18 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require "simplecov" + +SimpleCov.start do + add_filter "/spec/" + enable_coverage :branch +end + +require "paco" + +RSpec.configure do |config| + # Enable flags like --only-failures and --next-failure + config.example_status_persistence_file_path = ".rspec_status" + + # Disable RSpec exposing methods globally on `Module` and `main` + config.disable_monkey_patching! + + config.expect_with :rspec do |c| + c.syntax = :expect + end + + config.order = :random + Kernel.srand config.seed + + config.include Paco::Combinators, :include_combinators +end