forked from elixir-lang/elixir
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3f4455a
commit f2ea47f
Showing
2 changed files
with
173 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
defmodule EEx::Tokenizer do | ||
@doc """ | ||
Tokenizes the given char list. It returns 4 tokens as result: | ||
* { :text, contents } | ||
* { :expr, marker, contents} | ||
* { :start_expr, marker, contents} | ||
* { :end_expr, marker, contents} | ||
""" | ||
def tokenize(list) do | ||
List.reverse(tokenize(list, [], [])) | ||
end | ||
|
||
defp tokenize('<%' ++ t, buffer, acc) do | ||
{ marker, t } = retrieve_marker(t) | ||
{ expr, rest } = tokenize_expr t, [] | ||
|
||
token = tip_expr_token_name(expr) | ||
expr = List.reverse(expr) | ||
|
||
# If it isn't a start or end token, it may be a middle token. | ||
if token == :expr, do: | ||
token = middle_expr_token_name(expr) | ||
|
||
acc = tokenize_text(buffer, acc) | ||
tokenize rest, [], [ { token, marker, expr } | acc] | ||
end | ||
|
||
defp tokenize([h|t], buffer, acc) do | ||
tokenize t, [h|buffer], acc | ||
end | ||
|
||
defp tokenize([], buffer, acc) do | ||
tokenize_text(buffer, acc) | ||
end | ||
|
||
# Retrieve marker for <% | ||
|
||
defp retrieve_marker('=' ++ t) do | ||
{ '=', t } | ||
end | ||
|
||
defp retrieve_marker(t) do | ||
{ '', t } | ||
end | ||
|
||
# Tokenize an expression until we find %> | ||
|
||
defp tokenize_expr('%>' ++ t, buffer) do | ||
{ buffer, t } | ||
end | ||
|
||
defp tokenize_expr([h|t], buffer) do | ||
tokenize_expr t, [h|buffer] | ||
end | ||
|
||
# Receive an expression content and check | ||
# if it is a start or an end token. | ||
# Start tokens finish with `do` or `->` | ||
# while end tokens contain only the end word. | ||
|
||
defp tip_expr_token_name([h|t]) when h == ?\s orelse h == ?\t do | ||
tip_expr_token_name(t) | ||
end | ||
|
||
defp tip_expr_token_name('od' ++ [h|_]) when h == ?\s orelse h == ?\t orelse h == ?) do | ||
:start_expr | ||
end | ||
|
||
defp tip_expr_token_name('>-' ++ [h|_]) when h == ?\s orelse h == ?\t orelse h == ?) do | ||
:start_expr | ||
end | ||
|
||
defp tip_expr_token_name('dne' ++ t) do | ||
if only_spaces?(t), do: :end_expr, else: :expr | ||
end | ||
|
||
defp tip_expr_token_name(_) do | ||
:expr | ||
end | ||
|
||
# Receive an expression contents and see if it matches | ||
# a key-value arg syntax, like elsif: foo. | ||
|
||
defp middle_expr_token_name([h|t]) when h == ?\s orelse h == ?\t do | ||
middle_expr_token_name(t) | ||
end | ||
|
||
defp middle_expr_token_name([h|t]) when h >= ?a andalso h <= ?z do | ||
if valid_key_identifier?(t), do: :middle_expr, else: :expr | ||
end | ||
|
||
defp middle_expr_token_name(_) do | ||
:expr | ||
end | ||
|
||
defp valid_key_identifier?([h|t]) \ | ||
when h >= ?a andalso h <= ?z \ | ||
when h >= ?A andalso h <= ?Z \ | ||
when h >= ?0 andalso h <= ?9 do | ||
valid_key_identifier?(t) | ||
end | ||
|
||
defp valid_key_identifier?([?:|_]) do | ||
true | ||
end | ||
|
||
defp valid_key_identifier?(_) do | ||
false | ||
end | ||
|
||
defp only_spaces?([h|t]) when h == ?\s orelse h == ?\t, do: only_spaces?(t) | ||
defp only_spaces?(other), do: other == [] | ||
|
||
# Tokenize the buffered text by appending | ||
# it to the given accumulator. | ||
|
||
defp tokenize_text([], acc) do | ||
acc | ||
end | ||
|
||
defp tokenize_text(buffer, acc) do | ||
[{ :text, List.reverse buffer } | acc] | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
Code.require_file "../../test_helper", __FILE__ | ||
|
||
defmodule EEx::TokenizerTest do | ||
use ExUnit::Case | ||
require EEx::Tokenizer, as: T | ||
|
||
test "simple strings" do | ||
assert_equal [ { :text, 'foo' } ], T.tokenize('foo') | ||
end | ||
|
||
test "strings with embedded code" do | ||
assert_equal [ { :text, 'foo ' }, { :expr, [], ' bar ' }], T.tokenize('foo <% bar %>') | ||
end | ||
|
||
test "strings with embedded equals code" do | ||
assert_equal [ { :text, 'foo ' }, { :expr, '=', ' bar ' }], T.tokenize('foo <%= bar %>') | ||
end | ||
|
||
test "strings with embedded do end" do | ||
assert_equal [ | ||
{ :text, 'foo ' }, | ||
{ :start_expr, '', ' if true do ' }, | ||
{ :text, 'bar' }, | ||
{ :end_expr, '', ' end ' } | ||
], T.tokenize('foo <% if true do %>bar<% end %>') | ||
end | ||
|
||
test "strings with embedded -> end" do | ||
assert_equal [ | ||
{ :text, 'foo ' }, | ||
{ :start_expr, '', ' if(true)-> ' }, | ||
{ :text, 'bar' }, | ||
{ :end_expr, '', ' end ' } | ||
], T.tokenize('foo <% if(true)-> %>bar<% end %>') | ||
end | ||
|
||
test "strings with embedded key-value blocks" do | ||
assert_equal [ | ||
{ :text, 'foo ' }, | ||
{ :start_expr, '', ' if true do ' }, | ||
{ :text, 'bar' }, | ||
{ :middle_expr, '', ' elsif: false ' }, | ||
{ :text, 'baz' }, | ||
{ :end_expr, '', ' end ' } | ||
], T.tokenize('foo <% if true do %>bar<% elsif: false %>baz<% end %>') | ||
end | ||
end |