Zeta is an HTTP access log parser that provides a formatting syntax for parsing data.
Erlang/OTP 14 or newer.
Clone the repository and run make
or add the following to your
rebar configuration file.
{deps, [ ... {zeta, ".*", {git, "git://github.com/s1n4/zeta.git", "master"}} ]}.
The formatting syntax is simillar to Apache web server access log format.
The syntax Zeta provides for parsing the Common Log Format is like this:
~h ~l ~u ~t "~r" ~s ~B "~{referer}" "~{user-agent}"
~h
=> host~l
=> hyphen (which is skipped when parsing)~u
=> user~t
=> date, time and timezone (e.g. [27/May/2014:19:28:31 +0200])~r
=> request line (e.g. GET /resource HTTP/1.1)~s
=> status code~b
=> content-length in bytes or “-”~B
=> content-length in bytes~{...}
=> header name
Parsing one line of data:
> Fmt = "~h ~l ~u ~t \"~r\" ~s ~B \"~{referer}\" \"~{user-agent}\"",
> Data = "127.0.0.1 - - [22/Jun/2014:19:28:31 -0200] \"GET / HTTP/1.1\" 200 5 \"-\" \"curl/7.33.0\"",
> zeta:parse(Fmt, Data).
{ok, [{host, <<"127.0.0.1">>},
{user, <<"-">>},
{datetime, <<"22/Jun/2014:19:28:31">>},
{timezone, <<"-0200">>},
{method, <<"GET">>},
{url, <<"/">>},
{version, <<"HTTP/1.1">>},
{status, <<"200">>},
{content_length, <<"5">>},
{<<"referer">>, <<"-">>},
{<<"user-agent">>, <<"curl/7.33.0">>}]}
> zeta:parse(Fmt, Data, auto).
{ok, [{host, {127, 0, 0, 1}},
{user, <<"-">>},
{datetime, {{2014, 6, 22}, {19, 28, 31}}},
{timezone, <<"-0200">>},
{method, <<"GET">>},
{url, <<"/">>},
{version, <<"HTTP/1.1">>},
{status, 200},
{content_length, 5},
{<<"referer">>, <<"-">>},
{<<"user-agent">>, <<"curl/7.33.0">>}]}
Parsing a file in streaming fashion:
> Fmt = "~h ~l ~u ~t \"~r\" ~s ~B \"~{referer}\" \"~{user-agent}\"",
> {ok, Line1, F1} = zeta:parse_file("access.log", Fmt).
> {ok, Line2, F2} = F1().
> ...
> eof = FX().
Parses log data with a parse.
parse(Fmt, Data) -> parse(Fmt, Data, binary)
parse(Fmt, Data, TypeOrFun) -> {ok, LogData} | {error, any()}
Fmt = Data = list() | binary()
TypeOrFun = binary | list | auto | fun((Key = atom(), Value = binary()) -> Value1)
KV = {host, binary() | list() | term()}
| {user, binary() | list() | term()}
| {datetime, binary() | list() | calendar:datetime() | term()}
| {timezone, binary() | list() | term()}
| {method, binary() | list() | term()}
| {url, binary() | list() | term()}
| {version, binary() | list() | term()}
| {status, binary() | list() | integer() | term()}
| {content_length, binary() | list() | integer() | term()}
| {HeaderName = binary() | list() | term(), HeaderValue = binary() | list() | term()}
LogData = [KV]
Passing binary
or list
as the third argument will convert values to
Binary or List.
By passing auto
as the third argument, Zeta will convert host to
inet:ip_address()
, datetime to calendar:datetime()
,
status/content_length to Integer, and it will return anything else as is
(Binary).
By passing a function application as the third argument, the function will be applied to each key/value and the returned value will be used in the LogData.
The following example illustrates usage of a function application.
F = fun(user, _) -> nil;
(host, X) -> X;
(header_name, X) -> list_to_atom(binary_to_list(X)); %% but don't do this
(_, X) -> X
end,
parse(Fmt, Data, F).
Parses a file in streaming fashion.
parse_file(Filename) ->
parse_file(Filename, <<"~h ~l ~u ~t \"~r\" ~s ~B\"~{referer}\" \"~{user-agent}\"">>)
parse_file(Filename, Fmt) ->
parse_file(Filename, Fmt, binary)
parse_file(Filename, Fmt, TypeOrFun) ->
{ok, LogData, ParserFun} | {error, any(), ParserFun} | {error, any()}
Filename = file:name_all()
Fmt = list() | binary()
TypeOrFun = binary | list | auto | fun((Key = atom(), Value = binary()) -> Value1)
LogData = LogData
ParserFun = fun(() -> {ok, LogData, ParserFun} | {error, any(), ParserFun} | eof)
Parses the first line of the file and produces an ananymous function to parse the next line. Each function call produces a function that can be called to parse the next line of the file.
Sina Samavati (@sinasamavati)
MIT, see LICENSE file for more details.