From d605312d64648e440e58dd19ea1f6566a1d2c10a Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Mon, 8 Aug 2022 09:39:07 -0400
Subject: [PATCH] Initial commit

---
 .github/workflows/main.yml |   24 +
 .gitignore                 |   12 +
 .gitmodules                |    3 +
 CHANGELOG.md               |    5 +
 Gemfile                    |    5 +
 Gemfile.lock               |   30 +
 KNOWN_FAILURES             |  652 +++++++++++++++++++++
 LICENSE                    |   21 +
 README.md                  |   25 +
 Rakefile                   |   64 +++
 bin/console                |    8 +
 bin/lex                    |   20 +
 ext/yarp/extconf.rb        |    4 +
 ext/yarp/yarp.c            | 1106 ++++++++++++++++++++++++++++++++++++
 ext/yarp/yarp.h            |  260 +++++++++
 lib/yarp.rb                |  199 +++++++
 lib/yarp/version.rb        |    5 +
 test/fixtures/lex.rb       |  183 ++++++
 test/lex_test.rb           |   33 ++
 test/test_helper.rb        |    6 +
 vendor/spec                |    1 +
 yarp.gemspec               |   32 ++
 22 files changed, 2698 insertions(+)
 create mode 100644 .github/workflows/main.yml
 create mode 100644 .gitignore
 create mode 100644 .gitmodules
 create mode 100644 CHANGELOG.md
 create mode 100644 Gemfile
 create mode 100644 Gemfile.lock
 create mode 100644 KNOWN_FAILURES
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100644 Rakefile
 create mode 100755 bin/console
 create mode 100755 bin/lex
 create mode 100644 ext/yarp/extconf.rb
 create mode 100644 ext/yarp/yarp.c
 create mode 100644 ext/yarp/yarp.h
 create mode 100644 lib/yarp.rb
 create mode 100644 lib/yarp/version.rb
 create mode 100644 test/fixtures/lex.rb
 create mode 100644 test/lex_test.rb
 create mode 100644 test/test_helper.rb
 create mode 160000 vendor/spec
 create mode 100644 yarp.gemspec

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 000000000..8d89d26bf
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,24 @@
+name: Ruby
+
+on:
+  push:
+    branches:
+      - main
+
+  pull_request:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Ruby
+      uses: ruby/setup-ruby@v1
+      with:
+        ruby-version: head
+        bundler-cache: true
+
+    - name: Run the default task
+      run: bundle exec rake
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..a5fbc8cc6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+/.bundle/
+/.vscode/
+/.yardoc
+/_yardoc/
+/coverage/
+/doc/
+/pkg/
+/spec/reports/
+/tmp/
+
+/lib/yarp/yarp.*
+test.rb
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..5975e2e26
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "vendor/spec"]
+	path = vendor/spec
+	url = git@github.com:ruby/spec.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 000000000..ace59941d
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,5 @@
+## [Unreleased]
+
+## [0.1.0] - 2022-08-08
+
+- Initial release
diff --git a/Gemfile b/Gemfile
new file mode 100644
index 000000000..be173b205
--- /dev/null
+++ b/Gemfile
@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+
+source "https://rubygems.org"
+
+gemspec
diff --git a/Gemfile.lock b/Gemfile.lock
new file mode 100644
index 000000000..bfb8e4425
--- /dev/null
+++ b/Gemfile.lock
@@ -0,0 +1,30 @@
+PATH
+  remote: .
+  specs:
+    yarp (0.1.0)
+
+GEM
+  remote: https://rubygems.org/
+  specs:
+    minitest (5.16.2)
+    power_assert (2.0.1)
+    rake (13.0.6)
+    rake-compiler (1.2.0)
+      rake
+    test-unit (3.5.3)
+      power_assert
+
+PLATFORMS
+  arm64-darwin-21
+  x86_64-linux
+
+DEPENDENCIES
+  bundler (~> 2)
+  minitest (~> 5)
+  rake (~> 13)
+  rake-compiler (~> 1)
+  test-unit (~> 3)
+  yarp!
+
+BUNDLED WITH
+   2.3.6
diff --git a/KNOWN_FAILURES b/KNOWN_FAILURES
new file mode 100644
index 000000000..a0ef0b424
--- /dev/null
+++ b/KNOWN_FAILURES
@@ -0,0 +1,652 @@
+vendor/spec/command_line/backtrace_limit_spec.rb
+vendor/spec/command_line/dash_n_spec.rb
+vendor/spec/command_line/feature_spec.rb
+vendor/spec/command_line/fixtures/debug.rb
+vendor/spec/command_line/fixtures/freeze_flag_required_diff_enc.rb
+vendor/spec/command_line/rubyopt_spec.rb
+vendor/spec/core/argf/readpartial_spec.rb
+vendor/spec/core/array/comparison_spec.rb
+vendor/spec/core/array/cycle_spec.rb
+vendor/spec/core/array/element_reference_spec.rb
+vendor/spec/core/array/element_set_spec.rb
+vendor/spec/core/array/equal_value_spec.rb
+vendor/spec/core/array/fill_spec.rb
+vendor/spec/core/array/fixtures/classes.rb
+vendor/spec/core/array/fixtures/encoded_strings.rb
+vendor/spec/core/array/hash_spec.rb
+vendor/spec/core/array/include_spec.rb
+vendor/spec/core/array/intersection_spec.rb
+vendor/spec/core/array/minus_spec.rb
+vendor/spec/core/array/multiply_spec.rb
+vendor/spec/core/array/pack/m_spec.rb
+vendor/spec/core/array/pack/u_spec.rb
+vendor/spec/core/array/rindex_spec.rb
+vendor/spec/core/array/shared/inspect.rb
+vendor/spec/core/array/sum_spec.rb
+vendor/spec/core/array/union_spec.rb
+vendor/spec/core/array/uniq_spec.rb
+vendor/spec/core/basicobject/basicobject_spec.rb
+vendor/spec/core/basicobject/equal_spec.rb
+vendor/spec/core/basicobject/equal_value_spec.rb
+vendor/spec/core/basicobject/instance_eval_spec.rb
+vendor/spec/core/basicobject/instance_exec_spec.rb
+vendor/spec/core/basicobject/not_equal_spec.rb
+vendor/spec/core/basicobject/not_spec.rb
+vendor/spec/core/binding/local_variable_get_spec.rb
+vendor/spec/core/binding/local_variable_set_spec.rb
+vendor/spec/core/comparable/clamp_spec.rb
+vendor/spec/core/comparable/equal_value_spec.rb
+vendor/spec/core/comparable/gt_spec.rb
+vendor/spec/core/comparable/gte_spec.rb
+vendor/spec/core/comparable/lt_spec.rb
+vendor/spec/core/comparable/lte_spec.rb
+vendor/spec/core/complex/divide_spec.rb
+vendor/spec/core/complex/equal_value_spec.rb
+vendor/spec/core/complex/multiply_spec.rb
+vendor/spec/core/complex/to_f_spec.rb
+vendor/spec/core/complex/to_i_spec.rb
+vendor/spec/core/complex/to_r_spec.rb
+vendor/spec/core/complex/uminus_spec.rb
+vendor/spec/core/dir/children_spec.rb
+vendor/spec/core/dir/element_reference_spec.rb
+vendor/spec/core/dir/entries_spec.rb
+vendor/spec/core/dir/fixtures/common.rb
+vendor/spec/core/dir/glob_spec.rb
+vendor/spec/core/dir/pwd_spec.rb
+vendor/spec/core/dir/shared/exist.rb
+vendor/spec/core/dir/shared/glob.rb
+vendor/spec/core/encoding/converter/convert_spec.rb
+vendor/spec/core/encoding/converter/last_error_spec.rb
+vendor/spec/core/encoding/converter/primitive_convert_spec.rb
+vendor/spec/core/encoding/converter/replacement_spec.rb
+vendor/spec/core/encoding/find_spec.rb
+vendor/spec/core/enumerable/grep_v_spec.rb
+vendor/spec/core/enumerable/shared/inject.rb
+vendor/spec/core/enumerable/slice_after_spec.rb
+vendor/spec/core/enumerable/slice_before_spec.rb
+vendor/spec/core/enumerable/sum_spec.rb
+vendor/spec/core/env/element_set_spec.rb
+vendor/spec/core/env/fetch_spec.rb
+vendor/spec/core/env/to_s_spec.rb
+vendor/spec/core/exception/hierarchy_spec.rb
+vendor/spec/core/exception/inspect_spec.rb
+vendor/spec/core/exception/interrupt_spec.rb
+vendor/spec/core/exception/name_spec.rb
+vendor/spec/core/exception/signal_exception_spec.rb
+vendor/spec/core/exception/top_level_spec.rb
+vendor/spec/core/fiber/resume_spec.rb
+vendor/spec/core/file/atime_spec.rb
+vendor/spec/core/file/basename_spec.rb
+vendor/spec/core/file/expand_path_spec.rb
+vendor/spec/core/file/extname_spec.rb
+vendor/spec/core/file/flock_spec.rb
+vendor/spec/core/file/mkfifo_spec.rb
+vendor/spec/core/file/open_spec.rb
+vendor/spec/core/file/printf_spec.rb
+vendor/spec/core/file/readlink_spec.rb
+vendor/spec/core/file/shared/fnmatch.rb
+vendor/spec/core/float/case_compare_spec.rb
+vendor/spec/core/float/ceil_spec.rb
+vendor/spec/core/float/divide_spec.rb
+vendor/spec/core/float/equal_value_spec.rb
+vendor/spec/core/float/exponent_spec.rb
+vendor/spec/core/float/fixtures/coerce.rb
+vendor/spec/core/float/floor_spec.rb
+vendor/spec/core/float/gt_spec.rb
+vendor/spec/core/float/gte_spec.rb
+vendor/spec/core/float/lt_spec.rb
+vendor/spec/core/float/lte_spec.rb
+vendor/spec/core/float/minus_spec.rb
+vendor/spec/core/float/modulo_spec.rb
+vendor/spec/core/float/multiply_spec.rb
+vendor/spec/core/float/next_float_spec.rb
+vendor/spec/core/float/plus_spec.rb
+vendor/spec/core/float/prev_float_spec.rb
+vendor/spec/core/float/round_spec.rb
+vendor/spec/core/float/shared/modulo.rb
+vendor/spec/core/float/shared/to_i.rb
+vendor/spec/core/float/uminus_spec.rb
+vendor/spec/core/float/uplus_spec.rb
+vendor/spec/core/hash/default_proc_spec.rb
+vendor/spec/core/hash/element_reference_spec.rb
+vendor/spec/core/hash/element_set_spec.rb
+vendor/spec/core/hash/equal_value_spec.rb
+vendor/spec/core/hash/fetch_spec.rb
+vendor/spec/core/hash/fetch_values_spec.rb
+vendor/spec/core/hash/gt_spec.rb
+vendor/spec/core/hash/gte_spec.rb
+vendor/spec/core/hash/lt_spec.rb
+vendor/spec/core/hash/lte_spec.rb
+vendor/spec/core/hash/ruby2_keywords_hash_spec.rb
+vendor/spec/core/hash/shared/each.rb
+vendor/spec/core/hash/shared/eql.rb
+vendor/spec/core/hash/shared/equal.rb
+vendor/spec/core/hash/shared/index.rb
+vendor/spec/core/hash/shared/store.rb
+vendor/spec/core/hash/shared/to_s.rb
+vendor/spec/core/hash/slice_spec.rb
+vendor/spec/core/hash/to_proc_spec.rb
+vendor/spec/core/hash/transform_keys_spec.rb
+vendor/spec/core/integer/case_compare_spec.rb
+vendor/spec/core/integer/div_spec.rb
+vendor/spec/core/integer/divide_spec.rb
+vendor/spec/core/integer/dup_spec.rb
+vendor/spec/core/integer/equal_value_spec.rb
+vendor/spec/core/integer/exponent_spec.rb
+vendor/spec/core/integer/gt_spec.rb
+vendor/spec/core/integer/gte_spec.rb
+vendor/spec/core/integer/lt_spec.rb
+vendor/spec/core/integer/lte_spec.rb
+vendor/spec/core/integer/minus_spec.rb
+vendor/spec/core/integer/modulo_spec.rb
+vendor/spec/core/integer/multiply_spec.rb
+vendor/spec/core/integer/plus_spec.rb
+vendor/spec/core/integer/remainder_spec.rb
+vendor/spec/core/integer/round_spec.rb
+vendor/spec/core/integer/shared/abs.rb
+vendor/spec/core/integer/shared/equal.rb
+vendor/spec/core/integer/shared/exponent.rb
+vendor/spec/core/integer/to_f_spec.rb
+vendor/spec/core/integer/uminus_spec.rb
+vendor/spec/core/io/close_spec.rb
+vendor/spec/core/io/fixtures/classes.rb
+vendor/spec/core/io/getc_spec.rb
+vendor/spec/core/io/gets_spec.rb
+vendor/spec/core/io/inspect_spec.rb
+vendor/spec/core/io/read_spec.rb
+vendor/spec/core/io/shared/chars.rb
+vendor/spec/core/io/shared/write.rb
+vendor/spec/core/io/ungetc_spec.rb
+vendor/spec/core/io/write_spec.rb
+vendor/spec/core/kernel/Complex_spec.rb
+vendor/spec/core/kernel/at_exit_spec.rb
+vendor/spec/core/kernel/backtick_spec.rb
+vendor/spec/core/kernel/chomp_spec.rb
+vendor/spec/core/kernel/chop_spec.rb
+vendor/spec/core/kernel/comparison_spec.rb
+vendor/spec/core/kernel/eval_spec.rb
+vendor/spec/core/kernel/fixtures/chop.rb
+vendor/spec/core/kernel/fixtures/chop_f.rb
+vendor/spec/core/kernel/fixtures/classes.rb
+vendor/spec/core/kernel/freeze_spec.rb
+vendor/spec/core/kernel/global_variables_spec.rb
+vendor/spec/core/kernel/instance_variable_defined_spec.rb
+vendor/spec/core/kernel/instance_variable_get_spec.rb
+vendor/spec/core/kernel/instance_variable_set_spec.rb
+vendor/spec/core/kernel/instance_variables_spec.rb
+vendor/spec/core/kernel/lambda_spec.rb
+vendor/spec/core/kernel/match_spec.rb
+vendor/spec/core/kernel/not_match_spec.rb
+vendor/spec/core/kernel/open_spec.rb
+vendor/spec/core/kernel/p_spec.rb
+vendor/spec/core/kernel/printf_spec.rb
+vendor/spec/core/kernel/remove_instance_variable_spec.rb
+vendor/spec/core/kernel/shared/kind_of.rb
+vendor/spec/core/kernel/shared/sprintf.rb
+vendor/spec/core/kernel/shared/sprintf_encoding.rb
+vendor/spec/core/kernel/sprintf_spec.rb
+vendor/spec/core/kernel/trace_var_spec.rb
+vendor/spec/core/kernel/warn_spec.rb
+vendor/spec/core/main/using_spec.rb
+vendor/spec/core/marshal/dump_spec.rb
+vendor/spec/core/marshal/fixtures/marshal_data.rb
+vendor/spec/core/marshal/shared/load.rb
+vendor/spec/core/matchdata/begin_spec.rb
+vendor/spec/core/matchdata/dup_spec.rb
+vendor/spec/core/matchdata/end_spec.rb
+vendor/spec/core/matchdata/equal_value_spec.rb
+vendor/spec/core/matchdata/offset_spec.rb
+vendor/spec/core/math/asin_spec.rb
+vendor/spec/core/math/atan_spec.rb
+vendor/spec/core/math/cos_spec.rb
+vendor/spec/core/math/sin_spec.rb
+vendor/spec/core/method/arity_spec.rb
+vendor/spec/core/method/case_compare_spec.rb
+vendor/spec/core/method/compose_spec.rb
+vendor/spec/core/method/element_reference_spec.rb
+vendor/spec/core/method/equal_value_spec.rb
+vendor/spec/core/method/fixtures/classes.rb
+vendor/spec/core/method/parameters_spec.rb
+vendor/spec/core/method/to_proc_spec.rb
+vendor/spec/core/module/attr_writer_spec.rb
+vendor/spec/core/module/autoload_spec.rb
+vendor/spec/core/module/class_variable_defined_spec.rb
+vendor/spec/core/module/class_variable_get_spec.rb
+vendor/spec/core/module/class_variable_set_spec.rb
+vendor/spec/core/module/class_variables_spec.rb
+vendor/spec/core/module/comparison_spec.rb
+vendor/spec/core/module/const_added_spec.rb
+vendor/spec/core/module/const_defined_spec.rb
+vendor/spec/core/module/const_get_spec.rb
+vendor/spec/core/module/const_missing_spec.rb
+vendor/spec/core/module/const_source_location_spec.rb
+vendor/spec/core/module/define_method_spec.rb
+vendor/spec/core/module/define_singleton_method_spec.rb
+vendor/spec/core/module/equal_value_spec.rb
+vendor/spec/core/module/fixtures/classes.rb
+vendor/spec/core/module/fixtures/constant_unicode.rb
+vendor/spec/core/module/fixtures/name.rb
+vendor/spec/core/module/fixtures/repeated_concurrent_autoload.rb
+vendor/spec/core/module/module_function_spec.rb
+vendor/spec/core/module/refine_spec.rb
+vendor/spec/core/module/remove_class_variable_spec.rb
+vendor/spec/core/module/ruby2_keywords_spec.rb
+vendor/spec/core/module/shared/set_visibility.rb
+vendor/spec/core/numeric/abs2_spec.rb
+vendor/spec/core/numeric/div_spec.rb
+vendor/spec/core/numeric/divmod_spec.rb
+vendor/spec/core/numeric/eql_spec.rb
+vendor/spec/core/numeric/modulo_spec.rb
+vendor/spec/core/numeric/negative_spec.rb
+vendor/spec/core/numeric/positive_spec.rb
+vendor/spec/core/numeric/remainder_spec.rb
+vendor/spec/core/numeric/shared/abs.rb
+vendor/spec/core/numeric/shared/arg.rb
+vendor/spec/core/numeric/shared/quo.rb
+vendor/spec/core/numeric/shared/step.rb
+vendor/spec/core/numeric/step_spec.rb
+vendor/spec/core/numeric/uminus_spec.rb
+vendor/spec/core/numeric/uplus_spec.rb
+vendor/spec/core/numeric/zero_spec.rb
+vendor/spec/core/objectspace/define_finalizer_spec.rb
+vendor/spec/core/objectspace/fixtures/classes.rb
+vendor/spec/core/objectspace/weakmap/each_key_spec.rb
+vendor/spec/core/objectspace/weakmap/each_pair_spec.rb
+vendor/spec/core/objectspace/weakmap/each_spec.rb
+vendor/spec/core/objectspace/weakmap/each_value_spec.rb
+vendor/spec/core/objectspace/weakmap/keys_spec.rb
+vendor/spec/core/objectspace/weakmap/values_spec.rb
+vendor/spec/core/proc/arity_spec.rb
+vendor/spec/core/proc/case_compare_spec.rb
+vendor/spec/core/proc/compose_spec.rb
+vendor/spec/core/proc/curry_spec.rb
+vendor/spec/core/proc/element_reference_spec.rb
+vendor/spec/core/proc/equal_value_spec.rb
+vendor/spec/core/proc/lambda_spec.rb
+vendor/spec/core/proc/new_spec.rb
+vendor/spec/core/proc/parameters_spec.rb
+vendor/spec/core/proc/ruby2_keywords_spec.rb
+vendor/spec/core/proc/shared/call.rb
+vendor/spec/core/proc/shared/call_arguments.rb
+vendor/spec/core/proc/source_location_spec.rb
+vendor/spec/core/process/clock_getres_spec.rb
+vendor/spec/core/process/egid_spec.rb
+vendor/spec/core/process/euid_spec.rb
+vendor/spec/core/process/exec_spec.rb
+vendor/spec/core/process/fixtures/kill.rb
+vendor/spec/core/process/spawn_spec.rb
+vendor/spec/core/process/uid_spec.rb
+vendor/spec/core/queue/append_spec.rb
+vendor/spec/core/range/case_compare_spec.rb
+vendor/spec/core/range/each_spec.rb
+vendor/spec/core/range/equal_value_spec.rb
+vendor/spec/core/range/fixtures/classes.rb
+vendor/spec/core/range/max_spec.rb
+vendor/spec/core/range/min_spec.rb
+vendor/spec/core/range/minmax_spec.rb
+vendor/spec/core/range/new_spec.rb
+vendor/spec/core/range/shared/cover.rb
+vendor/spec/core/range/shared/cover_and_include.rb
+vendor/spec/core/range/step_spec.rb
+vendor/spec/core/range/to_a_spec.rb
+vendor/spec/core/rational/comparison_spec.rb
+vendor/spec/core/rational/divide_spec.rb
+vendor/spec/core/rational/equal_value_spec.rb
+vendor/spec/core/rational/exponent_spec.rb
+vendor/spec/core/rational/minus_spec.rb
+vendor/spec/core/rational/modulo_spec.rb
+vendor/spec/core/rational/multiply_spec.rb
+vendor/spec/core/rational/plus_spec.rb
+vendor/spec/core/regexp/encoding_spec.rb
+vendor/spec/core/regexp/equal_value_spec.rb
+vendor/spec/core/regexp/fixed_encoding_spec.rb
+vendor/spec/core/regexp/inspect_spec.rb
+vendor/spec/core/regexp/match_spec.rb
+vendor/spec/core/regexp/shared/new.rb
+vendor/spec/core/regexp/source_spec.rb
+vendor/spec/core/regexp/union_spec.rb
+vendor/spec/core/signal/trap_spec.rb
+vendor/spec/core/sizedqueue/append_spec.rb
+vendor/spec/core/sizedqueue/enq_spec.rb
+vendor/spec/core/sizedqueue/max_spec.rb
+vendor/spec/core/sizedqueue/new_spec.rb
+vendor/spec/core/sizedqueue/num_waiting_spec.rb
+vendor/spec/core/sizedqueue/push_spec.rb
+vendor/spec/core/string/append_spec.rb
+vendor/spec/core/string/ascii_only_spec.rb
+vendor/spec/core/string/b_spec.rb
+vendor/spec/core/string/bytes_spec.rb
+vendor/spec/core/string/capitalize_spec.rb
+vendor/spec/core/string/case_compare_spec.rb
+vendor/spec/core/string/casecmp_spec.rb
+vendor/spec/core/string/center_spec.rb
+vendor/spec/core/string/chomp_spec.rb
+vendor/spec/core/string/chop_spec.rb
+vendor/spec/core/string/comparison_spec.rb
+vendor/spec/core/string/delete_spec.rb
+vendor/spec/core/string/downcase_spec.rb
+vendor/spec/core/string/dump_spec.rb
+vendor/spec/core/string/element_reference_spec.rb
+vendor/spec/core/string/element_set_spec.rb
+vendor/spec/core/string/encode_spec.rb
+vendor/spec/core/string/equal_value_spec.rb
+vendor/spec/core/string/fixtures/classes.rb
+vendor/spec/core/string/fixtures/iso-8859-9-encoding.rb
+vendor/spec/core/string/fixtures/utf-8-encoding.rb
+vendor/spec/core/string/force_encoding_spec.rb
+vendor/spec/core/string/gsub_spec.rb
+vendor/spec/core/string/include_spec.rb
+vendor/spec/core/string/index_spec.rb
+vendor/spec/core/string/insert_spec.rb
+vendor/spec/core/string/inspect_spec.rb
+vendor/spec/core/string/ljust_spec.rb
+vendor/spec/core/string/lstrip_spec.rb
+vendor/spec/core/string/match_spec.rb
+vendor/spec/core/string/modulo_spec.rb
+vendor/spec/core/string/multiply_spec.rb
+vendor/spec/core/string/next_spec.rb
+vendor/spec/core/string/ord_spec.rb
+vendor/spec/core/string/plus_spec.rb
+vendor/spec/core/string/reverse_spec.rb
+vendor/spec/core/string/rindex_spec.rb
+vendor/spec/core/string/rjust_spec.rb
+vendor/spec/core/string/rpartition_spec.rb
+vendor/spec/core/string/rstrip_spec.rb
+vendor/spec/core/string/scan_spec.rb
+vendor/spec/core/string/scrub_spec.rb
+vendor/spec/core/string/shared/dedup.rb
+vendor/spec/core/string/shared/each_line.rb
+vendor/spec/core/string/shared/encode.rb
+vendor/spec/core/string/shared/equal_value.rb
+vendor/spec/core/string/shared/length.rb
+vendor/spec/core/string/shared/slice.rb
+vendor/spec/core/string/shared/to_sym.rb
+vendor/spec/core/string/slice_spec.rb
+vendor/spec/core/string/split_spec.rb
+vendor/spec/core/string/sub_spec.rb
+vendor/spec/core/string/succ_spec.rb
+vendor/spec/core/string/swapcase_spec.rb
+vendor/spec/core/string/to_f_spec.rb
+vendor/spec/core/string/tr_s_spec.rb
+vendor/spec/core/string/tr_spec.rb
+vendor/spec/core/string/uminus_spec.rb
+vendor/spec/core/string/undump_spec.rb
+vendor/spec/core/string/unicode_normalize_spec.rb
+vendor/spec/core/string/unpack/m_spec.rb
+vendor/spec/core/string/unpack/u_spec.rb
+vendor/spec/core/string/upcase_spec.rb
+vendor/spec/core/string/upto_spec.rb
+vendor/spec/core/struct/element_reference_spec.rb
+vendor/spec/core/struct/equal_value_spec.rb
+vendor/spec/core/struct/hash_spec.rb
+vendor/spec/core/struct/instance_variable_get_spec.rb
+vendor/spec/core/struct/instance_variables_spec.rb
+vendor/spec/core/struct/new_spec.rb
+vendor/spec/core/symbol/all_symbols_spec.rb
+vendor/spec/core/symbol/capitalize_spec.rb
+vendor/spec/core/symbol/casecmp_spec.rb
+vendor/spec/core/symbol/downcase_spec.rb
+vendor/spec/core/symbol/element_reference_spec.rb
+vendor/spec/core/symbol/empty_spec.rb
+vendor/spec/core/symbol/encoding_spec.rb
+vendor/spec/core/symbol/equal_value_spec.rb
+vendor/spec/core/symbol/inspect_spec.rb
+vendor/spec/core/symbol/match_spec.rb
+vendor/spec/core/symbol/name_spec.rb
+vendor/spec/core/symbol/shared/id2name.rb
+vendor/spec/core/symbol/shared/length.rb
+vendor/spec/core/symbol/shared/succ.rb
+vendor/spec/core/symbol/swapcase_spec.rb
+vendor/spec/core/symbol/to_proc_spec.rb
+vendor/spec/core/symbol/to_sym_spec.rb
+vendor/spec/core/symbol/upcase_spec.rb
+vendor/spec/core/thread/fixtures/classes.rb
+vendor/spec/core/thread/raise_spec.rb
+vendor/spec/core/thread/report_on_exception_spec.rb
+vendor/spec/core/time/at_spec.rb
+vendor/spec/core/time/comparison_spec.rb
+vendor/spec/core/time/fixtures/classes.rb
+vendor/spec/core/time/getlocal_spec.rb
+vendor/spec/core/time/minus_spec.rb
+vendor/spec/core/time/new_spec.rb
+vendor/spec/core/time/plus_spec.rb
+vendor/spec/core/time/shared/gm.rb
+vendor/spec/core/time/shared/inspect.rb
+vendor/spec/core/time/shared/now.rb
+vendor/spec/core/time/strftime_spec.rb
+vendor/spec/core/tracepoint/eval_script_spec.rb
+vendor/spec/core/tracepoint/parameters_spec.rb
+vendor/spec/core/unboundmethod/arity_spec.rb
+vendor/spec/core/warning/warn_spec.rb
+vendor/spec/fixtures/class.rb
+vendor/spec/fixtures/class_variables.rb
+vendor/spec/language/alias_spec.rb
+vendor/spec/language/array_spec.rb
+vendor/spec/language/block_spec.rb
+vendor/spec/language/case_spec.rb
+vendor/spec/language/class_spec.rb
+vendor/spec/language/class_variable_spec.rb
+vendor/spec/language/comment_spec.rb
+vendor/spec/language/constants_spec.rb
+vendor/spec/language/def_spec.rb
+vendor/spec/language/defined_spec.rb
+vendor/spec/language/delegation_spec.rb
+vendor/spec/language/ensure_spec.rb
+vendor/spec/language/fixtures/binary_symbol.rb
+vendor/spec/language/fixtures/bytes_magic_comment.rb
+vendor/spec/language/fixtures/classes.rb
+vendor/spec/language/fixtures/freeze_magic_comment_required_diff_enc.rb
+vendor/spec/language/fixtures/rescue_captures.rb
+vendor/spec/language/fixtures/send.rb
+vendor/spec/language/fixtures/squiggly_heredoc.rb
+vendor/spec/language/fixtures/utf16-le-nobom.rb
+vendor/spec/language/fixtures/utf8-bom.rb
+vendor/spec/language/fixtures/variables.rb
+vendor/spec/language/hash_spec.rb
+vendor/spec/language/heredoc_spec.rb
+vendor/spec/language/keyword_arguments_spec.rb
+vendor/spec/language/lambda_spec.rb
+vendor/spec/language/line_spec.rb
+vendor/spec/language/magic_comment_spec.rb
+vendor/spec/language/match_spec.rb
+vendor/spec/language/method_spec.rb
+vendor/spec/language/numbered_parameters_spec.rb
+vendor/spec/language/numbers_spec.rb
+vendor/spec/language/optional_assignments_spec.rb
+vendor/spec/language/pattern_matching_spec.rb
+vendor/spec/language/precedence_spec.rb
+vendor/spec/language/predefined/data_spec.rb
+vendor/spec/language/predefined/fixtures/data1.rb
+vendor/spec/language/predefined/fixtures/data3.rb
+vendor/spec/language/predefined/fixtures/data4.rb
+vendor/spec/language/predefined/fixtures/data5.rb
+vendor/spec/language/predefined/fixtures/data_offset.rb
+vendor/spec/language/predefined/fixtures/data_only.rb
+vendor/spec/language/predefined/fixtures/empty_data.rb
+vendor/spec/language/predefined/toplevel_binding_spec.rb
+vendor/spec/language/predefined_spec.rb
+vendor/spec/language/proc_spec.rb
+vendor/spec/language/regexp/character_classes_spec.rb
+vendor/spec/language/regexp/encoding_spec.rb
+vendor/spec/language/regexp/escapes_spec.rb
+vendor/spec/language/regexp/interpolation_spec.rb
+vendor/spec/language/regexp/modifiers_spec.rb
+vendor/spec/language/regexp/repetition_spec.rb
+vendor/spec/language/regexp_spec.rb
+vendor/spec/language/rescue_spec.rb
+vendor/spec/language/return_spec.rb
+vendor/spec/language/safe_navigator_spec.rb
+vendor/spec/language/send_spec.rb
+vendor/spec/language/string_spec.rb
+vendor/spec/language/super_spec.rb
+vendor/spec/language/symbol_spec.rb
+vendor/spec/language/undef_spec.rb
+vendor/spec/language/variables_spec.rb
+vendor/spec/language/yield_spec.rb
+vendor/spec/library/abbrev/abbrev_spec.rb
+vendor/spec/library/base64/decode64_spec.rb
+vendor/spec/library/bigdecimal/BigDecimal_spec.rb
+vendor/spec/library/bigdecimal/case_compare_spec.rb
+vendor/spec/library/bigdecimal/divide_spec.rb
+vendor/spec/library/bigdecimal/divmod_spec.rb
+vendor/spec/library/bigdecimal/equal_value_spec.rb
+vendor/spec/library/bigdecimal/exponent_spec.rb
+vendor/spec/library/bigdecimal/modulo_spec.rb
+vendor/spec/library/bigdecimal/multiply_spec.rb
+vendor/spec/library/bigdecimal/precs_spec.rb
+vendor/spec/library/bigdecimal/to_s_spec.rb
+vendor/spec/library/bigdecimal/truncate_spec.rb
+vendor/spec/library/bigdecimal/uminus_spec.rb
+vendor/spec/library/bigdecimal/uplus_spec.rb
+vendor/spec/library/bigmath/log_spec.rb
+vendor/spec/library/cgi/cookie/initialize_spec.rb
+vendor/spec/library/cgi/cookie/parse_spec.rb
+vendor/spec/library/cgi/cookie/to_s_spec.rb
+vendor/spec/library/cgi/cookie/value_spec.rb
+vendor/spec/library/cgi/escapeHTML_spec.rb
+vendor/spec/library/cgi/escape_spec.rb
+vendor/spec/library/cgi/htmlextension/a_spec.rb
+vendor/spec/library/cgi/out_spec.rb
+vendor/spec/library/cgi/pretty_spec.rb
+vendor/spec/library/cgi/queryextension/multipart_spec.rb
+vendor/spec/library/cgi/shared/http_header.rb
+vendor/spec/library/cgi/unescapeHTML_spec.rb
+vendor/spec/library/cgi/unescape_spec.rb
+vendor/spec/library/cmath/math/shared/asin.rb
+vendor/spec/library/cmath/math/shared/atan.rb
+vendor/spec/library/cmath/math/shared/cos.rb
+vendor/spec/library/cmath/math/shared/sin.rb
+vendor/spec/library/coverage/fixtures/eval_code.rb
+vendor/spec/library/csv/generate_line_spec.rb
+vendor/spec/library/csv/parse_spec.rb
+vendor/spec/library/date/strftime_spec.rb
+vendor/spec/library/datetime/strftime_spec.rb
+vendor/spec/library/delegate/delegator/case_compare_spec.rb
+vendor/spec/library/delegate/delegator/compare_spec.rb
+vendor/spec/library/delegate/delegator/complement_spec.rb
+vendor/spec/library/delegate/delegator/equal_value_spec.rb
+vendor/spec/library/delegate/delegator/marshal_spec.rb
+vendor/spec/library/delegate/delegator/not_equal_spec.rb
+vendor/spec/library/delegate/delegator/not_spec.rb
+vendor/spec/library/digest/instance/append_spec.rb
+vendor/spec/library/digest/md5/append_spec.rb
+vendor/spec/library/digest/sha256/append_spec.rb
+vendor/spec/library/digest/sha384/append_spec.rb
+vendor/spec/library/digest/sha384/shared/constants.rb
+vendor/spec/library/digest/sha512/append_spec.rb
+vendor/spec/library/erb/def_class_spec.rb
+vendor/spec/library/erb/def_method_spec.rb
+vendor/spec/library/erb/def_module_spec.rb
+vendor/spec/library/erb/defmethod/def_erb_method_spec.rb
+vendor/spec/library/erb/new_spec.rb
+vendor/spec/library/erb/result_spec.rb
+vendor/spec/library/erb/run_spec.rb
+vendor/spec/library/erb/src_spec.rb
+vendor/spec/library/erb/util/shared/html_escape.rb
+vendor/spec/library/erb/util/shared/url_encode.rb
+vendor/spec/library/find/fixtures/common.rb
+vendor/spec/library/matrix/build_spec.rb
+vendor/spec/library/matrix/coerce_spec.rb
+vendor/spec/library/matrix/constructor_spec.rb
+vendor/spec/library/matrix/divide_spec.rb
+vendor/spec/library/matrix/equal_value_spec.rb
+vendor/spec/library/net/ftp/close_spec.rb
+vendor/spec/library/net/ftp/closed_spec.rb
+vendor/spec/library/net/ftp/fixtures/server.rb
+vendor/spec/library/net/ftp/initialize_spec.rb
+vendor/spec/library/net/ftp/mkdir_spec.rb
+vendor/spec/library/net/ftp/pwd_spec.rb
+vendor/spec/library/net/http/httpgenericrequest/exec_spec.rb
+vendor/spec/library/net/http/httpresponse/read_new_spec.rb
+vendor/spec/library/objectspace/memsize_of_spec.rb
+vendor/spec/library/openstruct/shared/inspect.rb
+vendor/spec/library/pathname/divide_spec.rb
+vendor/spec/library/pathname/plus_spec.rb
+vendor/spec/library/rbconfig/rbconfig_spec.rb
+vendor/spec/library/rexml/attributes/append_spec.rb
+vendor/spec/library/rexml/document/add_spec.rb
+vendor/spec/library/rexml/document/write_spec.rb
+vendor/spec/library/rexml/text/normalize_spec.rb
+vendor/spec/library/rexml/text/read_with_substitution_spec.rb
+vendor/spec/library/rexml/text/unnormalize_spec.rb
+vendor/spec/library/rexml/text/value_spec.rb
+vendor/spec/library/ripper/sexp_spec.rb
+vendor/spec/library/securerandom/base64_spec.rb
+vendor/spec/library/set/append_spec.rb
+vendor/spec/library/set/case_compare_spec.rb
+vendor/spec/library/set/case_equality_spec.rb
+vendor/spec/library/set/comparison_spec.rb
+vendor/spec/library/set/intersection_spec.rb
+vendor/spec/library/set/minus_spec.rb
+vendor/spec/library/set/plus_spec.rb
+vendor/spec/library/set/shared/include.rb
+vendor/spec/library/set/sortedset/add_spec.rb
+vendor/spec/library/set/sortedset/append_spec.rb
+vendor/spec/library/set/sortedset/case_equality_spec.rb
+vendor/spec/library/set/sortedset/intersection_spec.rb
+vendor/spec/library/set/sortedset/minus_spec.rb
+vendor/spec/library/set/sortedset/plus_spec.rb
+vendor/spec/library/set/sortedset/union_spec.rb
+vendor/spec/library/set/union_spec.rb
+vendor/spec/library/shellwords/shellwords_spec.rb
+vendor/spec/library/socket/addrinfo/getaddrinfo_spec.rb
+vendor/spec/library/socket/addrinfo/marshal_dump_spec.rb
+vendor/spec/library/socket/ancillarydata/initialize_spec.rb
+vendor/spec/library/socket/ancillarydata/unix_rights_spec.rb
+vendor/spec/library/socket/basicsocket/local_address_spec.rb
+vendor/spec/library/socket/basicsocket/remote_address_spec.rb
+vendor/spec/library/socket/basicsocket/sendmsg_nonblock_spec.rb
+vendor/spec/library/socket/basicsocket/sendmsg_spec.rb
+vendor/spec/library/socket/shared/pack_sockaddr.rb
+vendor/spec/library/socket/socket/gethostbyaddr_spec.rb
+vendor/spec/library/socket/socket/getifaddrs_spec.rb
+vendor/spec/library/stringio/fixtures/classes.rb
+vendor/spec/library/stringio/getch_spec.rb
+vendor/spec/library/stringio/printf_spec.rb
+vendor/spec/library/stringio/puts_spec.rb
+vendor/spec/library/stringio/shared/codepoints.rb
+vendor/spec/library/stringio/shared/each_char.rb
+vendor/spec/library/stringscanner/append_spec.rb
+vendor/spec/library/stringscanner/inspect_spec.rb
+vendor/spec/library/stringscanner/shared/peek.rb
+vendor/spec/library/stringscanner/unscan_spec.rb
+vendor/spec/library/syslog/inspect_spec.rb
+vendor/spec/library/time/to_date_spec.rb
+vendor/spec/library/tmpdir/dir/mktmpdir_spec.rb
+vendor/spec/library/uri/equality_spec.rb
+vendor/spec/library/yaml/fixtures/strings.rb
+vendor/spec/library/yaml/shared/load.rb
+vendor/spec/library/yaml/to_yaml_spec.rb
+vendor/spec/library/zlib/gzipfile/close_spec.rb
+vendor/spec/library/zlib/gzipfile/comment_spec.rb
+vendor/spec/library/zlib/gzipfile/orig_name_spec.rb
+vendor/spec/library/zlib/gzipreader/ungetc_spec.rb
+vendor/spec/library/zlib/gzipwriter/mtime_spec.rb
+vendor/spec/optional/capi/bignum_spec.rb
+vendor/spec/optional/capi/class_spec.rb
+vendor/spec/optional/capi/encoding_spec.rb
+vendor/spec/optional/capi/hash_spec.rb
+vendor/spec/optional/capi/io_spec.rb
+vendor/spec/optional/capi/kernel_spec.rb
+vendor/spec/optional/capi/numeric_spec.rb
+vendor/spec/optional/capi/object_spec.rb
+vendor/spec/optional/capi/range_spec.rb
+vendor/spec/optional/capi/spec_helper.rb
+vendor/spec/optional/capi/string_spec.rb
+vendor/spec/optional/capi/symbol_spec.rb
+vendor/spec/optional/capi/thread_spec.rb
+vendor/spec/security/cve_2014_8080_spec.rb
+vendor/spec/security/cve_2019_8322_spec.rb
+vendor/spec/shared/process/exit.rb
+vendor/spec/shared/rational/comparison.rb
+vendor/spec/shared/rational/divide.rb
+vendor/spec/shared/rational/equal_value.rb
+vendor/spec/shared/rational/exponent.rb
+vendor/spec/shared/rational/multiply.rb
+vendor/spec/shared/rational/plus.rb
+vendor/spec/shared/rational/round.rb
+vendor/spec/shared/string/end_with.rb
+vendor/spec/shared/string/start_with.rb
+vendor/spec/shared/time/strftime_for_date.rb
+vendor/spec/shared/time/strftime_for_time.rb
+vendor/spec/spec_helper.rb
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..a5efe60fc
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2022 Kevin Newton
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..0f4d5496a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,25 @@
+# Yet Another Ruby Parser
+
+## Installation
+
+Add this line to your application's Gemfile:
+
+```ruby
+gem "yarp"
+```
+
+And then execute:
+
+    $ bundle install
+
+Or install it yourself as:
+
+    $ gem install yarp
+
+## Contributing
+
+Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-syntax-tree/yarp.
+
+## License
+
+The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
diff --git a/Rakefile b/Rakefile
new file mode 100644
index 000000000..04ffe10b5
--- /dev/null
+++ b/Rakefile
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+require "bundler/gem_tasks"
+require "rake/extensiontask"
+require "rake/testtask"
+
+Rake::ExtensionTask.new(:compile) do |ext|
+  ext.name = "yarp"
+  ext.ext_dir = "ext/yarp"
+  ext.lib_dir = "lib/yarp"
+  ext.gem_spec = Gem::Specification.load("yarp.gemspec")
+end
+
+Rake::TestTask.new(test: :compile) do |t|
+  t.libs << "test"
+  t.libs << "lib"
+  t.test_files = FileList["test/**/*_test.rb"]
+end
+
+desc "Lex ruby/spec files and compare with compat_lex"
+task lex: :compile do
+  require "bundler/setup"
+  require "yarp"
+  require "ripper"
+
+  filepath = File.expand_path("KNOWN_FAILURES", __dir__)
+  known_failures = File.readlines(filepath, chomp: true)
+
+  results = { passing: 0, failing: 0 }
+  colorize = ->(code, string) { "\033[#{code}m#{string}\033[0m" }
+
+  passing = 0
+  failing = 0
+
+  filepaths =
+    if ENV["FILEPATHS"]
+      Dir[ENV["FILEPATHS"]]
+    else
+      Dir["vendor/spec/**/*.rb"]
+    end
+
+  filepaths.each do |filepath|
+    result =
+      YARP.ripper_lex(filepath).zip(YARP.compat_lex(filepath)).all? do |(ripper, yarp)|
+        break false if yarp.nil?
+        ripper[0...-1] == yarp[0...-1]
+      end
+
+    print result ? colorize.call(32, ".") : colorize.call(31, "E")
+
+    if result
+      known_failures.delete(filepath) if known_failures.include?(filepath)
+      passing += 1
+    else
+      known_failures << filepath unless known_failures.include?(filepath)
+      failing += 1
+    end
+  end
+
+  File.write(filepath, known_failures.sort.join("\n") + "\n") unless ENV["FILEPATHS"]  
+  puts "\n\nPASS=#{passing}\nFAIL=#{failing}"
+end
+
+task default: :test
diff --git a/bin/console b/bin/console
new file mode 100755
index 000000000..84f54072d
--- /dev/null
+++ b/bin/console
@@ -0,0 +1,8 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "yarp"
+
+require "irb"
+IRB.start(__FILE__)
diff --git a/bin/lex b/bin/lex
new file mode 100755
index 000000000..9f5d44877
--- /dev/null
+++ b/bin/lex
@@ -0,0 +1,20 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "ripper"
+require "yarp"
+
+filepath = ARGV.first
+pattern = "%-70s %-70s"
+
+puts pattern % ["Ripper lex", "YARP lex"]
+puts pattern % ["-" * 70, "-" * 70]
+
+YARP.ripper_lex(filepath).zip(YARP.compat_lex(filepath)).each do |(ripper, yarp)|
+  left = ripper[...-1].inspect
+  right = (yarp || [])[...-1].inspect
+
+  color = left == right ? "38;5;102" : "1;31"
+  puts "\033[#{color}m#{pattern}\033[0m" % [ripper.inspect, yarp.inspect]
+end
diff --git a/ext/yarp/extconf.rb b/ext/yarp/extconf.rb
new file mode 100644
index 000000000..132fe710a
--- /dev/null
+++ b/ext/yarp/extconf.rb
@@ -0,0 +1,4 @@
+# frozen_string_literal: true
+
+require "mkmf"
+create_makefile "yarp/yarp"
diff --git a/ext/yarp/yarp.c b/ext/yarp/yarp.c
new file mode 100644
index 000000000..b5f03adb7
--- /dev/null
+++ b/ext/yarp/yarp.c
@@ -0,0 +1,1106 @@
+#include "yarp.h"
+
+/******************************************************************************/
+/* Basic character checks                                                     */
+/******************************************************************************/
+
+static inline bool
+is_binary_number_char(const char *c) {
+  return *c == '0' || *c == '1';
+}
+
+static inline bool
+is_octal_number_char(const char *c) {
+  return *c >= '0' && *c <= '7';
+}
+
+static inline bool
+is_decimal_number_char(const char *c) {
+  return *c >= '0' && *c <= '9';
+}
+
+static inline bool
+is_hexadecimal_number_char(const char *c) {
+  return (*c >= '0' && *c <= '9') || (*c >= 'a' && *c <= 'f') || (*c >= 'A' && *c <= 'F');
+}
+
+static inline bool
+is_identifier_start_char(const char *c) {
+  return (*c >= 'a' && *c <= 'z') || (*c >= 'A' && *c <= 'Z') || (*c == '_');
+}
+
+static inline bool
+is_identifier_char(const char *c) {
+  return is_identifier_start_char(c) || is_decimal_number_char(c);
+}
+
+static inline bool
+is_non_newline_whitespace_char(const char *c) {
+  return *c == ' ' || *c == '\t' || *c == '\f' || *c == '\r' || *c == '\v';
+}
+
+static inline bool
+is_whitespace_char(const char *c) {
+  return is_non_newline_whitespace_char(c) || *c == '\n';
+}
+
+/******************************************************************************/
+/* Lexer check helpers                                                        */
+/******************************************************************************/
+
+// If the character to be read matches the given value, then returns true and
+// advanced the current pointer.
+static inline bool
+match(yp_parser_t *parser, char value) {
+  if (*parser->current.end == value) {
+    parser->current.end++;
+    return true;
+  }
+  return false;
+}
+
+// Returns the matching character that should be used to terminate a list
+// beginning with the given character.
+static char
+terminator(const char start) {
+  switch (start) {
+    case '(': return ')';
+    case '[': return ']';
+    case '{': return '}';
+    case '<': return '>';
+    default: return start;
+  }
+}
+
+/******************************************************************************/
+/* Lex mode manipulations                                                     */
+/******************************************************************************/
+
+// Push a new lex state onto the stack. If we're still within the pre-allocated
+// space of the lex state stack, then we'll just use a new slot. Otherwise we'll
+// allocate a new pointer and use that.
+static void
+push_lex_mode(yp_parser_t *parser, yp_lex_mode_t lex_mode) {
+  lex_mode.prev = parser->lex_modes.current;
+  parser->lex_modes.index++;
+
+  if (parser->lex_modes.index > YP_LEX_STACK_SIZE - 1) {
+    parser->lex_modes.current = (yp_lex_mode_t *) malloc(sizeof(yp_lex_mode_t));
+  } else {
+    parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
+    parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
+  }
+}
+
+// Pop the current lex state off the stack. If we're within the pre-allocated
+// space of the lex state stack, then we'll just decrement the index. Otherwise
+// we'll free the current pointer and use the previous pointer.
+static void
+pop_lex_mode(yp_parser_t *parser) {
+  if (parser->lex_modes.index == 0) {
+    parser->lex_modes.current->mode = YP_LEX_DEFAULT;
+  } else if (parser->lex_modes.index < YP_LEX_STACK_SIZE) {
+    parser->lex_modes.index--;
+    parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
+  } else {
+    parser->lex_modes.index--;
+    yp_lex_mode_t *prev = parser->lex_modes.current->prev;
+    free(parser->lex_modes.current);
+    parser->lex_modes.current = prev;
+  }
+}
+
+/******************************************************************************/
+/* Specific token lexers                                                      */
+/******************************************************************************/
+
+static yp_token_type_t
+lex_optional_float_suffix(yp_parser_t *parser) {
+  yp_token_type_t type = YP_TOKEN_INTEGER;
+
+  // Here we're going to attempt to parse the optional decimal portion of a
+  // float. If it's not there, then it's okay and we'll just continue on.
+  if (*parser->current.end == '.') {
+    if ((parser->current.end + 1 < parser->end) && is_decimal_number_char(parser->current.end + 1)) {
+      parser->current.end += 2;
+      while (is_decimal_number_char(parser->current.end)) {
+        parser->current.end++;
+        match(parser, '_');
+      }
+
+      type = YP_TOKEN_FLOAT;
+    } else {
+      // If we had a . and then something else, then it's not a float suffix on
+      // a number it's a method call or something else.
+      return type;
+    }
+  }
+
+  // Here we're going to attempt to parse the optional exponent portion of a
+  // float. If it's not there, it's okay and we'll just continue on.
+  if (match(parser, 'e') || match(parser, 'E')) {
+    (void) (match(parser, '+') || match(parser, '-'));
+
+    if (is_decimal_number_char(parser->current.end)) {
+      parser->current.end++;
+      while (is_decimal_number_char(parser->current.end)) {
+        parser->current.end++;
+        match(parser, '_');
+      }
+
+      type = YP_TOKEN_FLOAT;
+    } else {
+      return YP_TOKEN_INVALID;
+    }
+  }
+
+  return type;
+}
+
+static yp_token_type_t
+lex_numeric_prefix(yp_parser_t *parser) {
+  yp_token_type_t type = YP_TOKEN_INTEGER;
+
+  if (parser->current.end[-1] == '0') {
+    switch (*parser->current.end) {
+      // 0d1111 is a decimal number
+      case 'd': case 'D':
+        if (!is_decimal_number_char(++parser->current.end)) return YP_TOKEN_INVALID;
+        while (is_decimal_number_char(parser->current.end)) {
+          parser->current.end++;
+          match(parser, '_');
+        }
+        break;
+
+      // 0b1111 is a binary number
+      case 'b': case 'B':
+        if (!is_binary_number_char(++parser->current.end)) return YP_TOKEN_INVALID;
+        while (is_binary_number_char(parser->current.end)) {
+          parser->current.end++;
+          match(parser, '_');
+        }
+        break;
+
+      // 0o1111 is an octal number
+      case 'o': case 'O':
+        if (!is_octal_number_char(++parser->current.end)) return YP_TOKEN_INVALID;
+        // fall through
+
+      // 01111 is an octal number
+      case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+        while (is_octal_number_char(parser->current.end)) {
+          parser->current.end++;
+          match(parser, '_');
+        }
+        break;
+
+      // 0x1111 is a hexadecimal number
+      case 'x': case 'X':
+        if (!is_hexadecimal_number_char(++parser->current.end)) return YP_TOKEN_INVALID;
+        while (is_hexadecimal_number_char(parser->current.end)) {
+          parser->current.end++;
+          match(parser, '_');
+        }
+        break;
+
+      // 0.xxx is a float
+      case '.': {
+        type = lex_optional_float_suffix(parser);
+        break;
+      }
+
+      // 0exxx is a float
+      case 'e': case 'E': {
+        type = lex_optional_float_suffix(parser);
+        break;
+      }
+    }
+  } else {
+    // If it didn't start with a 0, then we'll lex as far as we can into a
+    // decimal number.
+    while (is_decimal_number_char(parser->current.end)) {
+      parser->current.end++;
+      match(parser, '_');
+    }
+
+    // Afterward, we'll lex as far as we can into an optional float suffix.
+    type = lex_optional_float_suffix(parser);
+  }
+
+  // If the last character that we consumed was an underscore, then this is
+  // actually an invalid integer value, and we should return an invalid token.
+  if (parser->current.end[-1] == '_') return YP_TOKEN_INVALID;
+  return type;
+}
+
+static yp_token_type_t
+lex_numeric(yp_parser_t *parser) {
+  yp_token_type_t type = lex_numeric_prefix(parser);
+
+  if (type != YP_TOKEN_INVALID) {
+    if (match(parser, 'r')) type = YP_TOKEN_RATIONAL_NUMBER;
+    if (match(parser, 'i')) type = YP_TOKEN_IMAGINARY_NUMBER;
+  }
+
+  return type;
+}
+
+static yp_token_type_t
+lex_global_variable(yp_parser_t *parser) {
+  switch (*parser->current.end) {
+    case '~':   // $~: match-data
+    case '*':   // $*: argv
+    case '$':   // $$: pid
+    case '?':   // $?: last status
+    case '!':   // $!: error string
+    case '@':   // $@: error position
+    case '/':   // $/: input record separator
+    case '\\':  // $\: output record separator
+    case ';':   // $;: field separator
+    case ',':   // $,: output field separator
+    case '.':   // $.: last read line number
+    case '=':   // $=: ignorecase
+    case ':':   // $:: load path
+    case '<':   // $<: reading filename
+    case '>':   // $>: default output handle
+    case '\"':  // $": already loaded files
+      parser->current.end++;
+      return YP_TOKEN_GLOBAL_VARIABLE;
+
+    case '&':   // $&: last match
+    case '`':   // $`: string before last match
+    case '\'':  // $': string after last match
+    case '+':   // $+: string matches last paren.
+      parser->current.end++;
+      return YP_TOKEN_BACK_REFERENCE;
+  
+    case '1': case '2': case '3': case '4': case '5':
+    case '6': case '7': case '8': case '9':
+      do { parser->current.end++; } while (is_decimal_number_char(parser->current.end));
+      return YP_TOKEN_NTH_REFERENCE;
+
+    default:
+      if (is_identifier_char(parser->current.end)) {
+        do { parser->current.end++; } while (is_identifier_char(parser->current.end));
+        return YP_TOKEN_GLOBAL_VARIABLE;
+      }
+
+      // If we get here, then we have a $ followed by something that isn't
+      // recognized as a global variable.
+      return YP_TOKEN_INVALID;
+  }
+}
+
+static yp_token_type_t
+lex_identifier(yp_parser_t *parser) {
+  // Lex as far as we can into the current identifier.
+  while (is_identifier_char(parser->current.end)) {
+    parser->current.end++;
+  }
+
+  off_t width = parser->current.end - parser->current.start;
+
+#define KEYWORD(value, size, token) if (width == size && strncmp(parser->current.start, value, size) == 0) return YP_TOKEN_KEYWORD_##token;
+
+  if ((parser->current.end + 1 < parser->end) && (parser->current.end[1] != '=') && (match(parser, '!') || match(parser, '?'))) {
+    width++;
+    if (parser->previous.type != YP_TOKEN_DOT) {
+      KEYWORD("defined?", 8, DEFINED)
+    }
+    return YP_TOKEN_IDENTIFIER;
+  }
+
+  if (parser->previous.type != YP_TOKEN_DOT) {
+    KEYWORD("__ENCODING__", 12, __ENCODING__)
+    KEYWORD("__LINE__", 8, __LINE__)
+    KEYWORD("__FILE__", 8, __FILE__)
+    KEYWORD("alias", 5, ALIAS)
+    KEYWORD("and", 3, AND)
+    KEYWORD("begin", 5, BEGIN)
+    KEYWORD("BEGIN", 5, BEGIN_UPCASE)
+    KEYWORD("break", 5, BREAK)
+    KEYWORD("case", 4, CASE)
+    KEYWORD("class", 5, CLASS)
+    KEYWORD("def", 3, DEF)
+    KEYWORD("do", 2, DO)
+    KEYWORD("else", 4, ELSE)
+    KEYWORD("elsif", 5, ELSIF)
+    KEYWORD("end", 3, END)
+    KEYWORD("END", 3, END_UPCASE)
+    KEYWORD("ensure", 6, ENSURE)
+    KEYWORD("false", 5, FALSE)
+    KEYWORD("for", 3, FOR)
+    KEYWORD("if", 2, IF)
+    KEYWORD("in", 2, IN)
+    KEYWORD("module", 6, MODULE)
+    KEYWORD("next", 4, NEXT)
+    KEYWORD("nil", 3, NIL)
+    KEYWORD("not", 3, NOT)
+    KEYWORD("or", 2, OR)
+    KEYWORD("redo", 4, REDO)
+    KEYWORD("rescue", 6, RESCUE)
+    KEYWORD("retry", 5, RETRY)
+    KEYWORD("return", 6, RETURN)
+    KEYWORD("self", 4, SELF)
+    KEYWORD("super", 5, SUPER)
+    KEYWORD("then", 4, THEN)
+    KEYWORD("true", 4, TRUE)
+    KEYWORD("undef", 5, UNDEF)
+    KEYWORD("unless", 6, UNLESS)
+    KEYWORD("until", 5, UNTIL)
+    KEYWORD("when", 4, WHEN)
+    KEYWORD("while", 5, WHILE)
+    KEYWORD("yield", 5, YIELD)
+  }
+
+#undef KEYWORD
+
+  char start = parser->current.start[0];
+  return start >= 'A' && start <= 'Z' ? YP_TOKEN_CONSTANT : YP_TOKEN_IDENTIFIER;
+}
+
+// This is the overall lexer function. It is responsible for advancing both
+// parser->current.start and parser->current.end such that they point to the
+// beginning and end of the next token. It should return the type of token that
+// was found.
+static yp_token_type_t
+lex_token_type(yp_parser_t *parser) {
+  switch (parser->lex_modes.current->mode) {
+    case YP_LEX_DEFAULT:
+    case YP_LEX_EMBEXPR: {
+      // First, we're going to skip past any whitespace at the front of the next
+      // token.
+      while (is_non_newline_whitespace_char(parser->current.end)) {
+        parser->current.end++;
+      }
+
+      // Next, we'll set to start of this token to be the current end.
+      parser->current.start = parser->current.end;
+
+      // Finally, we'll check the current character to determine the next token.
+      switch (*parser->current.end++) {
+        case '\0': // NUL or end of script
+        case '\004': // ^D
+        case '\032': // ^Z
+          return YP_TOKEN_EOF;
+
+        case '#': // comments
+          while (*parser->current.end != '\n' && *parser->current.end != '\0') {
+            parser->current.end++;
+          }
+          (void) match(parser, '\n');
+          return YP_TOKEN_COMMENT;
+
+        case '\n': {
+          parser->lineno++;
+          return YP_TOKEN_NEWLINE;
+        }
+
+        // , ( ) ;
+        case ',': return YP_TOKEN_COMMA;
+        case '(': return YP_TOKEN_PARENTHESIS_LEFT;
+        case ')': return YP_TOKEN_PARENTHESIS_RIGHT;
+        case ';': return YP_TOKEN_SEMICOLON;
+
+        // [ []
+        case '[':
+          if (parser->previous.type == YP_TOKEN_DOT && match(parser, ']')) {
+            return YP_TOKEN_BRACKET_LEFT_RIGHT;
+          }
+          return YP_TOKEN_BRACKET_LEFT;
+
+        // ]
+        case ']': return YP_TOKEN_BRACKET_RIGHT;
+
+        // {
+        case '{':
+          if (parser->previous.type == YP_TOKEN_MINUS_GREATER) return YP_TOKEN_LAMBDA_BEGIN;
+          return YP_TOKEN_BRACE_LEFT;
+
+        // }
+        case '}':
+          if (parser->lex_modes.current->mode == YP_LEX_EMBEXPR) {
+            pop_lex_mode(parser);
+            return YP_TOKEN_EMBEXPR_END;
+          }
+          return YP_TOKEN_BRACE_RIGHT;
+
+        // * ** **= *=
+        case '*':
+          if (match(parser, '*')) return match(parser, '=') ? YP_TOKEN_STAR_STAR_EQUAL : YP_TOKEN_STAR_STAR;
+          return match(parser, '=') ? YP_TOKEN_STAR_EQUAL : YP_TOKEN_STAR;
+
+        // ! != !~ !@
+        case '!':
+          if (match(parser, '=')) return YP_TOKEN_BANG_EQUAL;
+          if (match(parser, '~')) return YP_TOKEN_BANG_TILDE;
+          if ((parser->previous.type == YP_TOKEN_KEYWORD_DEF || parser->previous.type == YP_TOKEN_DOT) && match(parser, '@')) return YP_TOKEN_BANG_AT;
+          return YP_TOKEN_BANG;
+
+        // = => =~ == === =begin
+        case '=':
+          if (parser->current.end[-2] == '\n' && (strncmp(parser->current.end, "begin\n", 6) == 0)) {
+            parser->current.end += 6;
+            push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_EMBDOC, .term = '\0', .interp = false });
+            return YP_TOKEN_EMBDOC_BEGIN;
+          }
+
+          if (match(parser, '>')) return YP_TOKEN_EQUAL_GREATER;
+          if (match(parser, '~')) return YP_TOKEN_EQUAL_TILDE;
+          if (match(parser, '=')) return match(parser, '=') ? YP_TOKEN_EQUAL_EQUAL_EQUAL : YP_TOKEN_EQUAL_EQUAL;
+          return YP_TOKEN_EQUAL;
+
+        // < << <<= <= <=>
+        case '<':
+          if (match(parser, '<')) {
+            if (match(parser, '=')) return YP_TOKEN_LESS_LESS_EQUAL;
+
+            // We don't yet handle heredocs.
+            if (match(parser, '-') || match(parser, '~')) return YP_TOKEN_EOF;
+
+            return YP_TOKEN_LESS_LESS;
+          }
+          if (match(parser, '=')) return match(parser, '>') ? YP_TOKEN_LESS_EQUAL_GREATER : YP_TOKEN_LESS_EQUAL;
+          return YP_TOKEN_LESS;
+
+        // > >> >>= >=
+        case '>':
+          if (match(parser, '>')) return match(parser, '=') ? YP_TOKEN_GREATER_GREATER_EQUAL : YP_TOKEN_GREATER_GREATER;
+          return match(parser, '=') ? YP_TOKEN_GREATER_EQUAL : YP_TOKEN_GREATER;
+
+        // double-quoted string literal
+        case '"':
+          push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_STRING, .term = '"', .interp = true });
+          return YP_TOKEN_STRING_BEGIN;
+
+        // xstring literal
+        case '`':
+          push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_STRING, .term = '`', .interp = true });
+          return YP_TOKEN_BACKTICK;
+
+        // single-quoted string literal
+        case '\'':
+          push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_STRING, .term = '\'', .interp = false });
+          return YP_TOKEN_STRING_BEGIN;
+
+        // ? character literal
+        case '?':
+          if (is_identifier_char(parser->current.end)) {
+            parser->current.end++;
+            return YP_TOKEN_CHARACTER_LITERAL;
+          }
+          return YP_TOKEN_QUESTION_MARK;
+
+        // & && &&= &=
+        case '&':
+          if (match(parser, '&')) return match(parser, '=') ? YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL : YP_TOKEN_AMPERSAND_AMPERSAND;
+          return match(parser, '=') ? YP_TOKEN_AMPERSAND_EQUAL : YP_TOKEN_AMPERSAND;
+
+        // | || ||= |=
+        case '|':
+          if (match(parser, '|')) return match(parser, '=') ? YP_TOKEN_PIPE_PIPE_EQUAL : YP_TOKEN_PIPE_PIPE;
+          return match(parser, '=') ? YP_TOKEN_PIPE_EQUAL : YP_TOKEN_PIPE;
+
+        // + += +@
+        case '+':
+          if (match(parser, '=')) return YP_TOKEN_PLUS_EQUAL;
+          if ((parser->previous.type == YP_TOKEN_KEYWORD_DEF || parser->previous.type == YP_TOKEN_DOT) && match(parser, '@')) return YP_TOKEN_PLUS_AT;
+          return YP_TOKEN_PLUS;
+
+        // - -= -@
+        case '-':
+          if (match(parser, '>')) return YP_TOKEN_MINUS_GREATER;
+          if (match(parser, '=')) return YP_TOKEN_MINUS_EQUAL;
+          if ((parser->previous.type == YP_TOKEN_KEYWORD_DEF || parser->previous.type == YP_TOKEN_DOT) && match(parser, '@')) return YP_TOKEN_MINUS_AT;
+          return YP_TOKEN_MINUS;
+
+        // . .. ...
+        case '.':
+          if (!match(parser, '.')) return YP_TOKEN_DOT;
+          return match(parser, '.') ? YP_TOKEN_DOT_DOT_DOT : YP_TOKEN_DOT_DOT;
+
+        // integer
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+          return lex_numeric(parser);
+
+        // :: symbol
+        case ':':
+          if (match(parser, ':')) return YP_TOKEN_COLON_COLON;
+          if (is_identifier_char(parser->current.end)) {
+            push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_SYMBOL, .term = '\0' });
+            return YP_TOKEN_SYMBOL_BEGIN;
+          }
+          return YP_TOKEN_COLON;
+
+        // / /=
+        case '/':
+          if (match(parser, '=')) return YP_TOKEN_SLASH_EQUAL;
+          if (*parser->current.end == ' ') return YP_TOKEN_SLASH;
+
+          push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_REGEXP, .term = '/' });
+          return YP_TOKEN_REGEXP_BEGIN;
+
+        // ^ ^=
+        case '^': return match(parser, '=') ? YP_TOKEN_CARET_EQUAL : YP_TOKEN_CARET;
+
+        // ~ ~@
+        case '~':
+          if ((parser->previous.type == YP_TOKEN_KEYWORD_DEF || parser->previous.type == YP_TOKEN_DOT) && match(parser, '@')) return YP_TOKEN_TILDE_AT;
+          return YP_TOKEN_TILDE;
+
+        // TODO
+        case '\\':
+          return YP_TOKEN_INVALID;
+
+        // % %= %i %I %q %Q %w %W
+        case '%':
+          switch (*parser->current.end) {
+            case '=':
+              parser->current.end++;
+              return YP_TOKEN_PERCENT_EQUAL;
+            case 'i':
+              parser->current.end++;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_LIST, .term = terminator(*parser->current.end++), .interp = false });
+              return YP_TOKEN_PERCENT_LOWER_I;
+            case 'I':
+              parser->current.end++;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_LIST, .term = terminator(*parser->current.end++), .interp = true });
+              return YP_TOKEN_PERCENT_UPPER_I;
+            case 'r':
+              parser->current.end++;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_REGEXP, .term = terminator(*parser->current.end++), .interp = true });
+              return YP_TOKEN_REGEXP_BEGIN;
+            case 'q':
+              parser->current.end++;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_STRING, .term = terminator(*parser->current.end++), .interp = false });
+              return YP_TOKEN_STRING_BEGIN;
+            case 'Q':
+              parser->current.end++;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_STRING, .term = terminator(*parser->current.end++), .interp = true });
+              return YP_TOKEN_STRING_BEGIN;
+            case 'w':
+              parser->current.end++;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_LIST, .term = terminator(*parser->current.end++), .interp = false });
+              return YP_TOKEN_PERCENT_LOWER_W;
+            case 'W':
+              parser->current.end++;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_LIST, .term = terminator(*parser->current.end++), .interp = true });
+              return YP_TOKEN_PERCENT_UPPER_W;
+            case 'x':
+              parser->current.end++;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_STRING, .term = terminator(*parser->current.end++), .interp = true });
+              return YP_TOKEN_PERCENT_LOWER_X;
+            default:
+              return YP_TOKEN_PERCENT;
+          }
+
+        // global variable
+        case '$': return lex_global_variable(parser);
+
+        // instance variable, class variable
+        case '@': {
+          yp_token_type_t type = match(parser, '@') ? YP_TOKEN_CLASS_VARIABLE : YP_TOKEN_INSTANCE_VARIABLE;
+
+          if (is_identifier_start_char(parser->current.end)) {
+            do { parser->current.end++; } while (is_identifier_char(parser->current.end));
+            return type;
+          }
+
+          return YP_TOKEN_INVALID;
+        }
+
+        default: {
+          // If this isn't the beginning of an identifier, then it's an invalid
+          // token as we've exhausted all of the other options.
+          if (!is_identifier_start_char(parser->current.start)) {
+            return YP_TOKEN_INVALID;
+          }
+
+          yp_token_type_t type = lex_identifier(parser);
+
+          // If we're lexing in a place that allows labels and we've hit a
+          // colon, then we can return a label token.
+          if ((parser->current.end[0] == ':') && (parser->current.end[1] != ':')) {
+            parser->current.end++;
+            return YP_TOKEN_LABEL;
+          }
+
+          return type;
+        }
+      }
+    }
+    case YP_LEX_EMBDOC: {
+      parser->current.start = parser->current.end;
+
+      // If we've hit the end of the embedded documentation then we'll return that token here.
+      if (strncmp(parser->current.end, "=end\n", 5) == 0) {
+        parser->current.end += 5;
+        pop_lex_mode(parser);
+        return YP_TOKEN_EMBDOC_END;
+      }
+
+      // Otherwise, we'll parse until the end of the line and return a line of
+      // embedded documentation.
+      while ((parser->current.end < parser->end) && (*parser->current.end++ != '\n'));
+
+      // If we've still got content, then we'll return a line of embedded
+      // documentation.
+      if (parser->current.end < parser->end) {
+        parser->lineno++;
+        return YP_TOKEN_EMBDOC_LINE;
+      }
+
+      // Otherwise, fall back to error recovery.
+      return parser->error_handler->unterminated_embdoc(parser);
+    }
+    case YP_LEX_LIST: {
+      // If there's any whitespace at the start of the list, then we're going to
+      // trim it off the beginning and create a new token.
+      if (is_whitespace_char(parser->current.end)) {
+        parser->current.start = parser->current.end;
+
+        do {
+          if (*parser->current.end == '\n') parser->lineno++;
+          parser->current.end++;
+        } while (is_whitespace_char(parser->current.end));
+
+        return YP_TOKEN_WORDS_SEP;
+      }
+
+      // Next, we'll set to start of this token to be the current end.
+      parser->current.start = parser->current.end;
+
+      // Lex as far as we can into the word.
+      while (parser->current.end < parser->end) {
+        // If we've hit whitespace, then we must have received content by now,
+        // so we can return an element of the list.
+        if (is_whitespace_char(parser->current.end)) {
+          return YP_TOKEN_STRING_CONTENT;
+        }
+
+        if (*parser->current.end == parser->lex_modes.current->term) {
+          // If we've hit the terminator and we've already skipped past content,
+          // then we can return a list node.
+          if (parser->current.start < parser->current.end) {
+            return YP_TOKEN_STRING_CONTENT;
+          }
+
+          // Otherwise, switch back to the default state and return the end of
+          // the list.
+          parser->current.end++;
+          pop_lex_mode(parser);
+          return YP_TOKEN_STRING_END;
+        }
+
+        // Otherwise, just skip past the content as it's part of an element of
+        // the list.
+        parser->current.end++;
+      }
+
+      // Otherwise, fall back to error recovery.
+      return parser->error_handler->unterminated_list(parser);
+    }
+    case YP_LEX_REGEXP: {
+      // First, we'll set to start of this token to be the current end.
+      parser->current.start = parser->current.end;
+
+      // If we've hit the end of the string, then we can return to the default
+      // state of the lexer and return a string ending token.
+      if (match(parser, parser->lex_modes.current->term)) {
+        // Since we've hit the terminator of the regular expression, we now need
+        // to parse the options.
+        bool options = true;
+        while (options) {
+          switch (*parser->current.end) {
+            case 'e': case 'i': case 'm': case 'n': case 's': case 'u': case 'x':
+              parser->current.end++;
+              break;
+            default:
+              options = false;
+              break;
+          }
+        }
+
+        pop_lex_mode(parser);
+        return YP_TOKEN_REGEXP_END;
+      }
+
+      // Otherwise, we'll lex as far as we can into the regular expression. If
+      // we hit the end of the regular expression, then we'll return everything
+      // up to that point.
+      while (parser->current.end < parser->end) {
+        // If we hit the terminator, then return this element of the string.
+        if (*parser->current.end == parser->lex_modes.current->term) {
+          return YP_TOKEN_STRING_CONTENT;
+        }
+
+        // If we hit a newline, make sure to do the required bookkeeping.
+        if (*parser->current.end == '\n') parser->lineno++;
+
+        // If we've hit a #, then check if it's used as the beginning of either
+        // an embedded variable or an embedded expression.
+        if (*parser->current.end == '#') {
+          switch (parser->current.end[1]) {
+            case '{':
+              // In this case it's the start of an embedded expression.
+
+              // If we have already consumed content, then we need to return
+              // that content as string content first.
+              if (parser->current.end > parser->current.start) {
+                return YP_TOKEN_STRING_CONTENT;
+              }
+
+              parser->current.end += 2;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_EMBEXPR });
+              return YP_TOKEN_EMBEXPR_BEGIN;
+          }
+        }
+
+        parser->current.end++;
+      }
+
+      // Otherwise, fall back to error recovery.
+      return parser->error_handler->unterminated_regexp(parser);
+    }
+    case YP_LEX_STRING: {
+      // First, we'll set to start of this token to be the current end.
+      parser->current.start = parser->current.end;
+
+      // If we've hit the end of the string, then we can return to the default
+      // state of the lexer and return a string ending token.
+      if (match(parser, parser->lex_modes.current->term)) {
+        pop_lex_mode(parser);
+        return YP_TOKEN_STRING_END;
+      }
+
+      // Otherwise, we'll lex as far as we can into the string. If we hit the
+      // end of the string, then we'll return everything up to that point.
+      while (parser->current.end < parser->end) {
+        // If we hit the terminator, then return this element of the string.
+        if (*parser->current.end == parser->lex_modes.current->term) {
+          return YP_TOKEN_STRING_CONTENT;
+        }
+
+        // If we hit a newline, make sure to do the required bookkeeping.
+        if (*parser->current.end == '\n') parser->lineno++;
+
+        // If our current lex state allows interpolation and we've hit a #, then
+        // check if it's used as the beginning of either an embedded variable or
+        // an embedded expression.
+        if (parser->lex_modes.current->interp && *parser->current.end == '#') {
+          switch (parser->current.end[1]) {
+            case '@':
+              // In this case it could be an embedded instance or class
+              // variable.
+              break;
+            case '$':
+              // In this case it could be an embedded global variable.
+              break;
+            case '{':
+              // In this case it's the start of an embedded expression.
+
+              // If we have already consumed content, then we need to return
+              // that content as string content first.
+              if (parser->current.end > parser->current.start) {
+                return YP_TOKEN_STRING_CONTENT;
+              }
+
+              parser->current.end += 2;
+              push_lex_mode(parser, (yp_lex_mode_t) { .mode = YP_LEX_EMBEXPR });
+              return YP_TOKEN_EMBEXPR_BEGIN;
+          }
+        }
+
+        parser->current.end++;
+      }
+
+      // Otherwise, fall back to error recovery.
+      return parser->error_handler->unterminated_string(parser);
+    }
+    case YP_LEX_SYMBOL: {
+      // First, we'll set to start of this token to be the current end.
+      parser->current.start = parser->current.end;
+
+      // Lex as far as we can into the symbol.
+      if (parser->current.end < parser->end && is_identifier_start_char(parser->current.end++)) {
+        pop_lex_mode(parser);
+
+        yp_token_type_t type = lex_identifier(parser);
+        return match(parser, '=') ? YP_TOKEN_IDENTIFIER : type;
+      }
+
+      // If we get here then we have the start of a symbol with no content. In
+      // that case return an invalid token.
+      return YP_TOKEN_INVALID;
+    }
+  }
+
+  // We shouldn't be able to get here at all, but some compilers can't figure
+  // that out, so just returning a value here to make them happy.
+  return YP_TOKEN_INVALID;
+}
+
+/******************************************************************************/
+/* External functions                                                         */
+/******************************************************************************/
+
+// Initialize a parser with the given start and end pointers.
+void
+yp_parser_init(yp_parser_t *parser, const char *source, off_t size, yp_error_handler_t *error_handler) {
+  *parser = (yp_parser_t) {
+    .lex_modes = {
+      .index = 0,
+      .stack = {{ .mode = YP_LEX_DEFAULT }},
+      .current = &parser->lex_modes.stack[0]
+    },
+    .start = source,
+    .end = source + size,
+    .current = { .start = source, .end = source },
+    .lineno = 1,
+    .error_handler = error_handler
+  };
+}
+
+// Get the next token type and set its value on the current pointer.
+void
+yp_lex_token(yp_parser_t *parser) {
+  parser->previous = parser->current;
+  parser->current.type = lex_token_type(parser);
+}
+
+/******************************************************************************/
+/* C-extension functions                                                      */
+/******************************************************************************/
+
+// By default, the lexer won't attempt to recover from lexer errors at all. This
+// function provides that implementation.
+static yp_token_type_t
+unrecoverable(yp_parser_t *parser) {
+  return YP_TOKEN_EOF;
+}
+
+static VALUE
+token_inspect(yp_parser_t *parser) {
+  yp_token_t token = parser->current;
+  VALUE parts = rb_ary_new();
+
+  // First, we're going to push on the location information.
+  VALUE location = rb_ary_new();
+  rb_ary_push(location, LONG2FIX(token.start - parser->start));
+  rb_ary_push(location, LONG2FIX(token.end - parser->start));
+  rb_ary_push(parts, location);
+
+  // Next, we're going to push on a symbol that represents the type of token.
+  switch (token.type) {
+    // We're going to special-case the invalid token here since that doesn't
+    // actually exist in Ripper. This is going to give us a little more
+    // information when our tests fail.
+    case YP_TOKEN_INVALID:
+      rb_ary_push(parts, ID2SYM(rb_intern("INVALID")));
+      // fprintf(stderr, "Invalid token: %.*s\n", (int) (token.end - token.start), token.start);
+      break;
+
+#define CASE(type) case YP_TOKEN_##type: rb_ary_push(parts, ID2SYM(rb_intern(#type))); break;
+
+    CASE(AMPERSAND)
+    CASE(AMPERSAND_AMPERSAND)
+    CASE(AMPERSAND_AMPERSAND_EQUAL)
+    CASE(AMPERSAND_EQUAL)
+    CASE(BACK_REFERENCE)
+    CASE(BACKTICK)
+    CASE(BANG)
+    CASE(BANG_AT)
+    CASE(BANG_EQUAL)
+    CASE(BANG_TILDE)
+    CASE(BRACE_LEFT)
+    CASE(BRACE_RIGHT)
+    CASE(BRACKET_LEFT)
+    CASE(BRACKET_LEFT_RIGHT)
+    CASE(BRACKET_RIGHT)
+    CASE(CARET)
+    CASE(CARET_EQUAL)
+    CASE(CHARACTER_LITERAL)
+    CASE(CLASS_VARIABLE)
+    CASE(COLON)
+    CASE(COLON_COLON)
+    CASE(COMMA)
+    CASE(COMMENT)
+    CASE(CONSTANT)
+    CASE(DOT)
+    CASE(DOT_DOT)
+    CASE(DOT_DOT_DOT)
+    CASE(EMBDOC_BEGIN)
+    CASE(EMBDOC_END)
+    CASE(EMBDOC_LINE)
+    CASE(EMBEXPR_BEGIN)
+    CASE(EMBEXPR_END)
+    CASE(EQUAL)
+    CASE(EQUAL_EQUAL)
+    CASE(EQUAL_EQUAL_EQUAL)
+    CASE(EQUAL_GREATER)
+    CASE(EQUAL_TILDE)
+    CASE(FLOAT)
+    CASE(GREATER)
+    CASE(GREATER_EQUAL)
+    CASE(GREATER_GREATER)
+    CASE(GREATER_GREATER_EQUAL)
+    CASE(GLOBAL_VARIABLE)
+    CASE(IDENTIFIER)
+    CASE(IMAGINARY_NUMBER)
+    CASE(INTEGER)
+    CASE(INSTANCE_VARIABLE)
+    CASE(KEYWORD___ENCODING__)
+    CASE(KEYWORD___LINE__)
+    CASE(KEYWORD___FILE__)
+    CASE(KEYWORD_ALIAS)
+    CASE(KEYWORD_AND)
+    CASE(KEYWORD_BEGIN)
+    CASE(KEYWORD_BEGIN_UPCASE)
+    CASE(KEYWORD_BREAK)
+    CASE(KEYWORD_CASE)
+    CASE(KEYWORD_CLASS)
+    CASE(KEYWORD_DEF)
+    CASE(KEYWORD_DEFINED)
+    CASE(KEYWORD_DO)
+    CASE(KEYWORD_ELSE)
+    CASE(KEYWORD_ELSIF)
+    CASE(KEYWORD_END)
+    CASE(KEYWORD_END_UPCASE)
+    CASE(KEYWORD_ENSURE)
+    CASE(KEYWORD_FALSE)
+    CASE(KEYWORD_FOR)
+    CASE(KEYWORD_IF)
+    CASE(KEYWORD_IN)
+    CASE(KEYWORD_MODULE)
+    CASE(KEYWORD_NEXT)
+    CASE(KEYWORD_NIL)
+    CASE(KEYWORD_NOT)
+    CASE(KEYWORD_OR)
+    CASE(KEYWORD_REDO)
+    CASE(KEYWORD_RESCUE)
+    CASE(KEYWORD_RETRY)
+    CASE(KEYWORD_RETURN)
+    CASE(KEYWORD_SELF)
+    CASE(KEYWORD_SUPER)
+    CASE(KEYWORD_THEN)
+    CASE(KEYWORD_TRUE)
+    CASE(KEYWORD_UNDEF)
+    CASE(KEYWORD_UNLESS)
+    CASE(KEYWORD_UNTIL)
+    CASE(KEYWORD_WHEN)
+    CASE(KEYWORD_WHILE)
+    CASE(KEYWORD_YIELD)
+    CASE(LABEL)
+    CASE(LAMBDA_BEGIN)
+    CASE(LESS)
+    CASE(LESS_EQUAL)
+    CASE(LESS_EQUAL_GREATER)
+    CASE(LESS_LESS)
+    CASE(LESS_LESS_EQUAL)
+    CASE(MINUS)
+    CASE(MINUS_AT)
+    CASE(MINUS_EQUAL)
+    CASE(MINUS_GREATER)
+    CASE(NEWLINE)
+    CASE(NTH_REFERENCE)
+    CASE(PARENTHESIS_LEFT)
+    CASE(PARENTHESIS_RIGHT)
+    CASE(PERCENT)
+    CASE(PERCENT_EQUAL)
+    CASE(PERCENT_LOWER_I)
+    CASE(PERCENT_LOWER_W)
+    CASE(PERCENT_LOWER_X)
+    CASE(PERCENT_UPPER_I)
+    CASE(PERCENT_UPPER_W)
+    CASE(PIPE)
+    CASE(PIPE_EQUAL)
+    CASE(PIPE_PIPE)
+    CASE(PIPE_PIPE_EQUAL)
+    CASE(PLUS)
+    CASE(PLUS_AT)
+    CASE(PLUS_EQUAL)
+    CASE(QUESTION_MARK)
+    CASE(RATIONAL_NUMBER)
+    CASE(REGEXP_BEGIN)
+    CASE(REGEXP_END)
+    CASE(SEMICOLON)
+    CASE(SLASH)
+    CASE(SLASH_EQUAL)
+    CASE(STAR)
+    CASE(STAR_EQUAL)
+    CASE(STAR_STAR)
+    CASE(STAR_STAR_EQUAL)
+    CASE(STRING_BEGIN)
+    CASE(STRING_CONTENT)
+    CASE(STRING_END)
+    CASE(SYMBOL_BEGIN)
+    CASE(TILDE)
+    CASE(TILDE_AT)
+    CASE(WORDS_SEP)
+
+#undef CASE
+
+    default:
+      rb_bug("Unknown token type: %d", token.type);
+  }
+
+  rb_ary_push(parts, rb_str_new(token.start, token.end - token.start));
+  return parts;
+}
+
+static VALUE
+each_token(VALUE self, VALUE rb_filepath) {
+  char *filepath = StringValueCStr(rb_filepath);
+
+  // Open the file for reading
+  int fd = open(filepath, O_RDONLY);
+  if (fd == -1) {
+    perror("open");
+    return Qnil;
+  }
+
+  // Stat the file to get the file size
+  struct stat sb;
+  if (fstat(fd, &sb) == -1) {
+    close(fd);
+    perror("fstat");
+    return Qnil;
+  }
+
+  // mmap the file descriptor to virtually get the contents
+  off_t size = sb.st_size;
+  const char *source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+  close(fd);
+  if (source == MAP_FAILED) {
+    perror("mmap");
+    return Qnil;
+  }
+
+  yp_error_handler_t default_error_handler = {
+    .unterminated_embdoc = unrecoverable,
+    .unterminated_list = unrecoverable,
+    .unterminated_regexp = unrecoverable,
+    .unterminated_string = unrecoverable
+  };
+
+  // Instantiate the parser struct with all of the necessary information
+  yp_parser_t parser;
+  yp_parser_init(&parser, source, size, &default_error_handler);
+
+  // Create an array and populate it with the tokens from the filepath
+  for (yp_lex_token(&parser); parser.current.type != YP_TOKEN_EOF; yp_lex_token(&parser)) {
+    rb_yield(token_inspect(&parser));
+  }
+
+  // Clean up and free
+  munmap((void *) source, size);
+  return Qnil;
+}
+
+void
+Init_yarp(void) {
+  VALUE rb_cYARP = rb_define_module("YARP");
+  rb_define_singleton_method(rb_cYARP, "each_token", each_token, 1);
+}
diff --git a/ext/yarp/yarp.h b/ext/yarp/yarp.h
new file mode 100644
index 000000000..ec8eb055b
--- /dev/null
+++ b/ext/yarp/yarp.h
@@ -0,0 +1,260 @@
+#ifndef YARP_H
+#define YARP_H
+
+#include <ruby.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+typedef enum {
+  YP_TOKEN_EOF = 0,                   // final token in the file
+  YP_TOKEN_INVALID,                   // an invalid token
+  YP_TOKEN_AMPERSAND,                 // &
+  YP_TOKEN_AMPERSAND_AMPERSAND,       // &&
+  YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL, // &&=
+  YP_TOKEN_AMPERSAND_EQUAL,           // &=
+  YP_TOKEN_BACK_REFERENCE,            // a back reference
+  YP_TOKEN_BACKTICK,                  // `
+  YP_TOKEN_BANG,                      // !
+  YP_TOKEN_BANG_AT,                   // !@
+  YP_TOKEN_BANG_EQUAL,                // !=
+  YP_TOKEN_BANG_TILDE,                // !~
+  YP_TOKEN_BRACE_LEFT,                // {
+  YP_TOKEN_BRACE_RIGHT,               // }
+  YP_TOKEN_BRACKET_LEFT,              // [
+  YP_TOKEN_BRACKET_LEFT_RIGHT,        // []
+  YP_TOKEN_BRACKET_RIGHT,             // ]
+  YP_TOKEN_CARET,                     // ^
+  YP_TOKEN_CARET_EQUAL,               // ^=
+  YP_TOKEN_CHARACTER_LITERAL,         // a character literal
+  YP_TOKEN_CLASS_VARIABLE,            // a class variable
+  YP_TOKEN_COLON,                     // :
+  YP_TOKEN_COLON_COLON,               // ::
+  YP_TOKEN_COMMA,                     // ,
+  YP_TOKEN_COMMENT,                   // a comment
+  YP_TOKEN_CONSTANT,                  // a constant
+  YP_TOKEN_DOT,                       // .
+  YP_TOKEN_DOT_DOT,                   // ..
+  YP_TOKEN_DOT_DOT_DOT,               // ...
+  YP_TOKEN_EMBDOC_BEGIN,              // =begin
+  YP_TOKEN_EMBDOC_END,                // =end
+  YP_TOKEN_EMBDOC_LINE,               // a line inside of embedded documentation
+  YP_TOKEN_EMBEXPR_BEGIN,             // #{
+  YP_TOKEN_EMBEXPR_END,               // }
+  YP_TOKEN_EQUAL,                     // =
+  YP_TOKEN_EQUAL_EQUAL,               // ==
+  YP_TOKEN_EQUAL_EQUAL_EQUAL,         // ===
+  YP_TOKEN_EQUAL_GREATER,             // =>
+  YP_TOKEN_EQUAL_TILDE,               // =~
+  YP_TOKEN_FLOAT,                     // a floating point number
+  YP_TOKEN_GREATER,                   // >
+  YP_TOKEN_GREATER_EQUAL,             // >=
+  YP_TOKEN_GREATER_GREATER,           // >>
+  YP_TOKEN_GREATER_GREATER_EQUAL,     // >>=
+  YP_TOKEN_GLOBAL_VARIABLE,           // a global variable
+  YP_TOKEN_IDENTIFIER,                // an identifier
+  YP_TOKEN_IMAGINARY_NUMBER,          // an imaginary number literal
+  YP_TOKEN_INSTANCE_VARIABLE,         // an instance variable
+  YP_TOKEN_INTEGER,                   // an integer (any base)
+  YP_TOKEN_KEYWORD___ENCODING__,      // __ENCODING__
+  YP_TOKEN_KEYWORD___LINE__,          // __LINE__
+  YP_TOKEN_KEYWORD___FILE__,          // __FILE__
+  YP_TOKEN_KEYWORD_ALIAS,             // alias
+  YP_TOKEN_KEYWORD_AND,               // and
+  YP_TOKEN_KEYWORD_BEGIN,             // begin
+  YP_TOKEN_KEYWORD_BEGIN_UPCASE,      // BEGIN
+  YP_TOKEN_KEYWORD_BREAK,             // break
+  YP_TOKEN_KEYWORD_CASE,              // case
+  YP_TOKEN_KEYWORD_CLASS,             // class
+  YP_TOKEN_KEYWORD_DEF,               // def
+  YP_TOKEN_KEYWORD_DEFINED,           // defined?
+  YP_TOKEN_KEYWORD_DO,                // do
+  YP_TOKEN_KEYWORD_ELSE,              // else
+  YP_TOKEN_KEYWORD_ELSIF,             // elsif
+  YP_TOKEN_KEYWORD_END,               // end
+  YP_TOKEN_KEYWORD_END_UPCASE,        // END
+  YP_TOKEN_KEYWORD_ENSURE,            // ensure
+  YP_TOKEN_KEYWORD_FALSE,             // false
+  YP_TOKEN_KEYWORD_FOR,               // for
+  YP_TOKEN_KEYWORD_IF,                // if
+  YP_TOKEN_KEYWORD_IN,                // in
+  YP_TOKEN_KEYWORD_MODULE,            // module
+  YP_TOKEN_KEYWORD_NEXT,              // next
+  YP_TOKEN_KEYWORD_NIL,               // nil
+  YP_TOKEN_KEYWORD_NOT,               // not
+  YP_TOKEN_KEYWORD_OR,                // or
+  YP_TOKEN_KEYWORD_REDO,              // redo
+  YP_TOKEN_KEYWORD_RESCUE,            // rescue
+  YP_TOKEN_KEYWORD_RETRY,             // retry
+  YP_TOKEN_KEYWORD_RETURN,            // return
+  YP_TOKEN_KEYWORD_SELF,              // self
+  YP_TOKEN_KEYWORD_SUPER,             // super
+  YP_TOKEN_KEYWORD_THEN,              // then
+  YP_TOKEN_KEYWORD_TRUE,              // true
+  YP_TOKEN_KEYWORD_UNDEF,             // undef
+  YP_TOKEN_KEYWORD_UNLESS,            // unless
+  YP_TOKEN_KEYWORD_UNTIL,             // until
+  YP_TOKEN_KEYWORD_WHEN,              // when
+  YP_TOKEN_KEYWORD_WHILE,             // while
+  YP_TOKEN_KEYWORD_YIELD,             // yield
+  YP_TOKEN_LABEL,                     // a label
+  YP_TOKEN_LAMBDA_BEGIN,              // {
+  YP_TOKEN_LESS,                      // <
+  YP_TOKEN_LESS_EQUAL,                // <=
+  YP_TOKEN_LESS_EQUAL_GREATER,        // <=>
+  YP_TOKEN_LESS_LESS,                 // <<
+  YP_TOKEN_LESS_LESS_EQUAL,           // <<=
+  YP_TOKEN_MINUS,                     // -
+  YP_TOKEN_MINUS_AT,                  // -@
+  YP_TOKEN_MINUS_EQUAL,               // -=
+  YP_TOKEN_MINUS_GREATER,             // ->
+  YP_TOKEN_NEWLINE,                   // a newline character outside of other tokens
+  YP_TOKEN_NTH_REFERENCE,             // an nth global variable reference
+  YP_TOKEN_PARENTHESIS_LEFT,          // (
+  YP_TOKEN_PARENTHESIS_RIGHT,         // )
+  YP_TOKEN_PERCENT,                   // %
+  YP_TOKEN_PERCENT_EQUAL,             // %=
+  YP_TOKEN_PERCENT_LOWER_I,           // %i
+  YP_TOKEN_PERCENT_LOWER_W,           // %w
+  YP_TOKEN_PERCENT_LOWER_X,           // %x
+  YP_TOKEN_PERCENT_UPPER_I,           // %I
+  YP_TOKEN_PERCENT_UPPER_W,           // %W
+  YP_TOKEN_PIPE,                      // |
+  YP_TOKEN_PIPE_EQUAL,                // |=
+  YP_TOKEN_PIPE_PIPE,                 // ||
+  YP_TOKEN_PIPE_PIPE_EQUAL,           // ||=
+  YP_TOKEN_PLUS,                      // +
+  YP_TOKEN_PLUS_AT,                   // +@
+  YP_TOKEN_PLUS_EQUAL,                // +=
+  YP_TOKEN_QUESTION_MARK,             // ?
+  YP_TOKEN_RATIONAL_NUMBER,           // a rational number literal
+  YP_TOKEN_REGEXP_BEGIN,              // the beginning of a regular expression
+  YP_TOKEN_REGEXP_END,                // the end of a regular expression
+  YP_TOKEN_SEMICOLON,                 // ;
+  YP_TOKEN_SLASH,                     // /
+  YP_TOKEN_SLASH_EQUAL,               // /=
+  YP_TOKEN_STAR,                      // *
+  YP_TOKEN_STAR_EQUAL,                // *=
+  YP_TOKEN_STAR_STAR,                 // **
+  YP_TOKEN_STAR_STAR_EQUAL,           // **=
+  YP_TOKEN_STRING_BEGIN,              // the beginning of a string
+  YP_TOKEN_STRING_CONTENT,            // the contents of a string
+  YP_TOKEN_STRING_END,                // the end of a string
+  YP_TOKEN_SYMBOL_BEGIN,              // the beginning of a symbol
+  YP_TOKEN_TILDE,                     // ~
+  YP_TOKEN_TILDE_AT,                  // ~@
+  YP_TOKEN_WORDS_SEP,                 // a separator between words in a list
+} yp_token_type_t;
+
+// This struct represents a token in the Ruby source. We use it to track both
+// type and location information.
+typedef struct {
+  yp_token_type_t type;
+  const char *start;
+  const char *end;
+} yp_token_t;
+
+// When lexing Ruby source, the lexer has a small amount of state to tell which
+// kind of token it is currently lexing. For example, when we find the start of
+// a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
+// that the lexer is now in the YP_LEX_STRING mode, and will return tokens that
+// are found as part of a string.
+typedef struct yp_lex_mode {
+  enum {
+    // This state is used when any given token is being lexed.
+    YP_LEX_DEFAULT,
+
+    // This state is used when we're lexing an embdoc (a =begin..=end comment).
+    YP_LEX_EMBDOC,
+
+    // This state is used when we're lexing as normal but inside an embedded
+    // expression of a string.
+    YP_LEX_EMBEXPR,
+
+    // This state is used when we are lexing a list of tokens, as in a %w word
+    // list literal or a %i symbol list literal.
+    YP_LEX_LIST,
+
+    // This state is used when a regular expression has been begun and we are
+    // looking for the terminator.
+    YP_LEX_REGEXP,
+
+    // This state is used when we are lexing a string or a string-like token, as
+    // in string content with either quote or an xstring.
+    YP_LEX_STRING,
+
+    // This state is used when a symbol has already been begun (e.g., by a
+    // colon) and we still need to lex the rest of the symbol.
+    YP_LEX_SYMBOL,
+  } mode;
+
+  // This is the terminator of the current state. It is used when lexing a
+  // string (either single or double quoted) and an xstring.
+  char term;
+
+  // Whether or not interpolation is allowed in this lex state. This corresponds
+  // to some LEX_LIST states (e.g., %W) and LEX_STRING states (e.g., double
+  // quotes).
+  bool interp;
+
+  // The previous lex state so that it knows how to pop.
+  struct yp_lex_mode *prev;
+} yp_lex_mode_t;
+
+// We pre-allocate a certain number of lex states in order to avoid having to
+// call malloc too many times while parsing. You really shouldn't need more than
+// this because you only really nest deeply when doing string interpolation.
+#define YP_LEX_STACK_SIZE 4
+
+// A forward declaration since our error handler struct accepts a parser for
+// each of its function calls.
+typedef struct yp_parser yp_parser_t;
+
+// This struct is for handling error recovery. We're going to provide our own
+// implementation for default, but this is an extension point if folks want to
+// provide their own.
+//
+// Each function is going to be provided with a pointer to the struct itself, at
+// which point it is expected to set the parsers state to whatever it should be
+// in order to recover from the error. If it can't recover, it should return
+// TOKEN_INVALID.
+typedef struct {
+  yp_token_type_t (*unterminated_embdoc)(yp_parser_t *parser);
+  yp_token_type_t (*unterminated_list)(yp_parser_t *parser);
+  yp_token_type_t (*unterminated_regexp)(yp_parser_t *parser);
+  yp_token_type_t (*unterminated_string)(yp_parser_t *parser);
+} yp_error_handler_t;
+
+// This struct represents the overall parser. It contains a reference to the
+// source file, as well as pointers that indicate where in the source it's
+// currently parsing. It also contains the most recent and current token that
+// it's considering.
+struct yp_parser {
+  struct {
+    yp_lex_mode_t *current; // the current state of the lexer
+    yp_lex_mode_t stack[YP_LEX_STACK_SIZE]; // the stack of lexer states
+    size_t index;            // the current index into the lexer state stack
+  } lex_modes;
+
+  const char *start;         // the pointer to the start of the source
+  const char *end;           // the pointer to the end of the source
+  yp_token_t previous;       // the previous token we were considering
+  yp_token_t current;        // the current token we're considering
+  int lineno;                // the current line number we're looking at
+
+  yp_error_handler_t *error_handler; // the error handler
+};
+
+// Initialize a parser with the given start and end pointers.
+void
+yp_parser_init(yp_parser_t *parser, const char *source, off_t size, yp_error_handler_t *error_handler);
+
+// Get the next token type and set its value on the current pointer.
+void
+yp_lex_token(yp_parser_t *parser);
+
+#endif
diff --git a/lib/yarp.rb b/lib/yarp.rb
new file mode 100644
index 000000000..35220bd92
--- /dev/null
+++ b/lib/yarp.rb
@@ -0,0 +1,199 @@
+# frozen_string_literal: true
+
+require_relative "yarp/yarp"
+require_relative "yarp/version"
+
+module YARP
+  # This lexes with the Ripper lex. It drops any space events and normalizes all
+  # ignored newlines into regular newlines.
+  def self.ripper_lex(filepath)
+    Ripper.lex(File.read(filepath)).each_with_object([]) do |token, tokens|
+      case token[1]
+      when :on_ignored_nl
+        tokens << [token[0], :on_nl, token[2], token[3]]
+      when :on_sp
+        # skip
+      else
+        tokens << token
+      end
+    end
+  end
+
+  # Returns an array of tokens that closely resembles that of the Ripper lexer.
+  # The only difference is that since we don't keep track of lexer state in the
+  # same way, it's going to always return the NONE state.
+  def self.compat_lex(filepath)
+    offsets = [0]
+    File.foreach(filepath) { |line| offsets << offsets.last + line.bytesize }
+
+    lexer_state = Ripper::Lexer::State.new(0)
+    tokens = []
+
+    each_token(filepath) do |((start_char, _), type, value)|
+      line_number, line_offset =
+        offsets.each_with_index.detect do |(offset, line)|
+          break [line, offsets[line - 1]] if start_char < offset
+        end
+
+      line_number ||= offsets.length + 1
+      line_offset ||= offsets.last
+
+      line_byte = start_char - line_offset
+      event = RIPPER.fetch(type)
+
+      unescaped =
+        if %i[on_comment on_tstring_content].include?(event) && value.include?("\\")
+          # Ripper unescapes string content and comments, so we need to do the
+          # same here.
+          value.force_encoding("UTF-8").unicode_normalize
+        else
+          value
+        end
+
+      tokens << [[line_number, line_byte], event, unescaped, lexer_state]
+    end
+
+    tokens
+  end
+
+  RIPPER = {
+    AMPERSAND: :on_op,
+    AMPERSAND_AMPERSAND: :on_op,
+    AMPERSAND_AMPERSAND_EQUAL: :on_op,
+    AMPERSAND_EQUAL: :on_op,
+    BACK_REFERENCE: :on_backref,
+    BACKTICK: :on_backtick,
+    BANG: :on_op,
+    BANG_AT: :on_op,
+    BANG_EQUAL: :on_op,
+    BANG_TILDE: :on_op,
+    BRACE_LEFT: :on_lbrace,
+    BRACE_RIGHT: :on_rbrace,
+    BRACKET_LEFT: :on_lbracket,
+    BRACKET_LEFT_RIGHT: :on_op,
+    BRACKET_RIGHT: :on_rbracket,
+    CARET: :on_op,
+    CARET_EQUAL: :on_op,
+    CHARACTER_LITERAL: :on_CHAR,
+    CLASS_VARIABLE: :on_cvar,
+    COLON: :on_op,
+    COLON_COLON: :on_op,
+    COMMA: :on_comma,
+    COMMENT: :on_comment,
+    CONSTANT: :on_const,
+    DOT: :on_period,
+    DOT_DOT: :on_op,
+    DOT_DOT_DOT: :on_op,
+    EMBDOC_BEGIN: :on_embdoc_beg,
+    EMBDOC_END: :on_embdoc_end,
+    EMBDOC_LINE: :on_embdoc,
+    EMBEXPR_BEGIN: :on_embexpr_beg,
+    EMBEXPR_END: :on_embexpr_end,
+    EQUAL: :on_op,
+    EQUAL_EQUAL: :on_op,
+    EQUAL_EQUAL_EQUAL: :on_op,
+    EQUAL_GREATER: :on_op,
+    EQUAL_TILDE: :on_op,
+    FLOAT: :on_float,
+    GREATER: :on_op,
+    GREATER_EQUAL: :on_op,
+    GREATER_GREATER: :on_op,
+    GREATER_GREATER_EQUAL: :on_op,
+    GLOBAL_VARIABLE: :on_gvar,
+    IDENTIFIER: :on_ident,
+    IMAGINARY_NUMBER: :on_imaginary,
+    INTEGER: :on_int,
+    INSTANCE_VARIABLE: :on_ivar,
+    INVALID: :INVALID,
+    KEYWORD___ENCODING__: :on_kw,
+    KEYWORD___LINE__: :on_kw,
+    KEYWORD___FILE__: :on_kw,
+    KEYWORD_ALIAS: :on_kw,
+    KEYWORD_AND: :on_kw,
+    KEYWORD_BEGIN: :on_kw,
+    KEYWORD_BEGIN_UPCASE: :on_kw,
+    KEYWORD_BREAK: :on_kw,
+    KEYWORD_CASE: :on_kw,
+    KEYWORD_CLASS: :on_kw,
+    KEYWORD_DEF: :on_kw,
+    KEYWORD_DEFINED: :on_kw,
+    KEYWORD_DO: :on_kw,
+    KEYWORD_ELSE: :on_kw,
+    KEYWORD_ELSIF: :on_kw,
+    KEYWORD_END: :on_kw,
+    KEYWORD_END_UPCASE: :on_kw,
+    KEYWORD_ENSURE: :on_kw,
+    KEYWORD_FALSE: :on_kw,
+    KEYWORD_FOR: :on_kw,
+    KEYWORD_IF: :on_kw,
+    KEYWORD_IN: :on_kw,
+    KEYWORD_MODULE: :on_kw,
+    KEYWORD_NEXT: :on_kw,
+    KEYWORD_NIL: :on_kw,
+    KEYWORD_NOT: :on_kw,
+    KEYWORD_OR: :on_kw,
+    KEYWORD_REDO: :on_kw,
+    KEYWORD_RESCUE: :on_kw,
+    KEYWORD_RETRY: :on_kw,
+    KEYWORD_RETURN: :on_kw,
+    KEYWORD_SELF: :on_kw,
+    KEYWORD_SUPER: :on_kw,
+    KEYWORD_THEN: :on_kw,
+    KEYWORD_TRUE: :on_kw,
+    KEYWORD_UNDEF: :on_kw,
+    KEYWORD_UNLESS: :on_kw,
+    KEYWORD_UNTIL: :on_kw,
+    KEYWORD_WHEN: :on_kw,
+    KEYWORD_WHILE: :on_kw,
+    KEYWORD_YIELD: :on_kw,
+    LABEL: :on_label,
+    LAMBDA_BEGIN: :on_tlambeg,
+    LESS: :on_op,
+    LESS_EQUAL: :on_op,
+    LESS_EQUAL_GREATER: :on_op,
+    LESS_LESS: :on_op,
+    LESS_LESS_EQUAL: :on_op,
+    MINUS: :on_op,
+    MINUS_AT: :on_op,
+    MINUS_EQUAL: :on_op,
+    MINUS_GREATER: :on_tlambda,
+    NEWLINE: :on_nl,
+    NTH_REFERENCE: :on_backref,
+    PARENTHESIS_LEFT: :on_lparen,
+    PARENTHESIS_RIGHT: :on_rparen,
+    PERCENT: :on_op,
+    PERCENT_EQUAL: :on_op,
+    PERCENT_LOWER_I: :on_qsymbols_beg,
+    PERCENT_LOWER_W: :on_qwords_beg,
+    PERCENT_LOWER_X: :on_backtick,
+    PERCENT_UPPER_I: :on_symbols_beg,
+    PERCENT_UPPER_W: :on_words_beg,
+    PIPE: :on_op,
+    PIPE_EQUAL: :on_op,
+    PIPE_PIPE: :on_op,
+    PIPE_PIPE_EQUAL: :on_op,
+    PLUS: :on_op,
+    PLUS_AT: :on_op,
+    PLUS_EQUAL: :on_op,
+    QUESTION_MARK: :on_op,
+    RATIONAL_NUMBER: :on_rational,
+    REGEXP_BEGIN: :on_regexp_beg,
+    REGEXP_END: :on_regexp_end,
+    SEMICOLON: :on_semicolon,
+    SLASH: :on_op,
+    SLASH_EQUAL: :on_op,
+    STAR: :on_op,
+    STAR_EQUAL: :on_op,
+    STAR_STAR: :on_op,
+    STAR_STAR_EQUAL: :on_op,
+    STRING_BEGIN: :on_tstring_beg,
+    STRING_CONTENT: :on_tstring_content,
+    STRING_END: :on_tstring_end,
+    SYMBOL_BEGIN: :on_symbeg,
+    TILDE: :on_op,
+    TILDE_AT: :on_op,
+    WORDS_SEP: :on_words_sep,
+  }.freeze
+
+  private_constant :RIPPER
+end
diff --git a/lib/yarp/version.rb b/lib/yarp/version.rb
new file mode 100644
index 000000000..a1bdd04ae
--- /dev/null
+++ b/lib/yarp/version.rb
@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+
+module YARP
+  VERSION = "0.1.0"
+end
diff --git a/test/fixtures/lex.rb b/test/fixtures/lex.rb
new file mode 100644
index 000000000..a1b0b38df
--- /dev/null
+++ b/test/fixtures/lex.rb
@@ -0,0 +1,183 @@
+# This file isn't actually valid Ruby. It's used to exercise the lexer.
+
+&
+&&
+&&=
+&=
+!
+
+# If the lexer is in a state where it can accept a method name (either by
+# defining a method or by calling a method), it will accept a !@.
+def !@() end
+foo.!@
+
+!=
+!~
+{}
+[]
+^
+^=
+?a
+@@abc
+,
+..
+...
+
+=begin
+embdoc
+content
+=end
+
+"#{abc}"
+
+=
+==
+===
+100 => 100
+=~
+>
+>=
+>>
+>>=
+$~
+$*
+$$
+$?
+$!
+$@
+$/
+$\
+$;
+$,
+$.
+$=
+$:
+$<
+$>
+$"
+abc
+1i
+1ri
+0
+0d100
+0d100_100
+0D100
+0D100_100
+0b100
+0b100_100
+0B100
+0B100_100
+0o100
+0o100_100
+0O100
+0O100_100
+0100
+0100_100
+0x100
+0x100_100
+0X100
+0X100_100
+@abc
+__ENCODING__
+__LINE__
+__FILE__
+alias
+and
+begin
+BEGIN
+break
+case
+class
+def
+defined?
+do
+else
+elsif
+end
+END
+ensure
+false
+for
+if
+in
+module
+next
+nil
+not
+or
+redo
+rescue
+retry
+return
+self
+super
+then
+true
+undef
+unless
+until
+when
+while
+yield
+{ label: abc }
+<
+<=
+<=>
+<<
+<<=
+-
+
+# If the lexer is in a state where it can accept a method name (either by
+# defining a method or by calling a method), it will accept a -@.
+def -@() end
+abc.-@
+
+-=
+()
+100 % 100
+100 %= 100
+%i[abc def   ghi]
+%w[abc def   ghi]
+%I[abc def   ghi]
+%W[abc def   ghi]
+|
+|=
+100 || 100
+||=
++
++=
+
+# If the lexer is in a state where it can accept a method name (either by
+# defining a method or by calling a method), it will accept a +@.
+def +@() end
+abc.+@
+
+?
+1r
+%r{abc}
+;
+100 / 100
+100 /= 100
+*
+*=
+**
+**=
+
+# Lexing strings involves a whole state change. It ends up being at minimum
+# three tokens: the beginning, the content, and then end. It gets more
+# complicated if you have interpolation.
+"abc"
+%q[abc]
+%Q[abc]
+
+:abc
+:ABC
+
+~
+
+# If the lexer is in a state where it can accept a method name (either by
+# defining a method or by calling a method), it will accept a ~@.
+def ~@() end
+abc.~@
+
+`abc`
diff --git a/test/lex_test.rb b/test/lex_test.rb
new file mode 100644
index 000000000..d149b1d1a
--- /dev/null
+++ b/test/lex_test.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class LexTest < Test::Unit::TestCase
+  test "lex ext/yarp/extconf.rb" do
+    assert_lex File.expand_path("../ext/yarp/extconf.rb", __dir__)
+  end
+
+  test "lex test/fixtures/lex.rb" do
+    assert_lex File.expand_path("fixtures/lex.rb", __dir__)
+  end
+
+  test "lex test/test_helper.rb" do
+    assert_lex File.expand_path("test_helper.rb", __dir__)
+  end
+
+  test "lex test/yarp_test.rb" do
+    assert_lex __FILE__
+  end
+
+  test "lex yarp.gemspec" do
+    assert_lex File.expand_path("../yarp.gemspec", __dir__)
+  end
+
+  private
+
+  def assert_lex(filepath)
+    YARP.ripper_lex(filepath).zip(YARP.compat_lex(filepath)).each do |(ripper, yarp)|
+      assert_equal ripper[0...-1], yarp[0...-1]
+    end
+  end
+end
diff --git a/test/test_helper.rb b/test/test_helper.rb
new file mode 100644
index 000000000..a178814fb
--- /dev/null
+++ b/test/test_helper.rb
@@ -0,0 +1,6 @@
+# frozen_string_literal: true
+
+$LOAD_PATH.unshift File.expand_path("../lib", __dir__)
+require "yarp"
+
+require "test-unit"
diff --git a/vendor/spec b/vendor/spec
new file mode 160000
index 000000000..b8a82400c
--- /dev/null
+++ b/vendor/spec
@@ -0,0 +1 @@
+Subproject commit b8a82400c04d8badf1f455a4d36809592d2328d7
diff --git a/yarp.gemspec b/yarp.gemspec
new file mode 100644
index 000000000..576a2c09f
--- /dev/null
+++ b/yarp.gemspec
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+require_relative "lib/yarp/version"
+
+Gem::Specification.new do |spec|
+  spec.name = "yarp"
+  spec.version = YARP::VERSION
+  spec.authors = ["Kevin Newton"]
+  spec.email = ["kddnewton@gmail.com"]
+
+  spec.summary = "Yet Another Ruby Parser"
+  spec.homepage = "https://github.com/ruby-syntax-tree/yarp"
+  spec.license = "MIT"
+  spec.required_ruby_version = ">= 2.6.0"
+
+  spec.files = Dir.chdir(File.expand_path(__dir__)) do
+    `git ls-files -z`.split("\x0").reject do |f|
+      (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
+    end
+  end
+
+  spec.bindir = "exe"
+  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
+  spec.require_paths = ["lib"]
+  spec.extensions    = ["ext/yarp/extconf.rb"]
+
+  spec.add_development_dependency "bundler", "~> 2"
+  spec.add_development_dependency "minitest", "~> 5"
+  spec.add_development_dependency "rake", "~> 13"
+  spec.add_development_dependency "rake-compiler", "~> 1"
+  spec.add_development_dependency "test-unit", "~> 3"
+end