Skip to content
Browse files

First commit of UBF update, only the encoder/decoder so far.

  • Loading branch information...
0 parents commit e9181d5a4c0b616e3cbc54d8687935c31aa21587 @komone committed Mar 29, 2009
Showing with 310 additions and 0 deletions.
  1. +7 −0 .gitignore
  2. +2 −0 Emakefile
  3. +15 −0 README
  4. +286 −0 src/ubf.erl
7 .gitignore
@@ -0,0 +1,7 @@
+*.dump
+*.beam
+*.out
+*.log
+Thumbs.db
+work/*
+.DS_Store
2 Emakefile
@@ -0,0 +1,2 @@
+% -*- mode: erlang -*-
+{"src/*", [{i, "include"}, {outdir, "ebin"}, debug_info, strict_record_tests]}.
15 README
@@ -0,0 +1,15 @@
+======
+README
+======
+
+UNIVERSAL BINARY FORMAT
+
+This is the original UBF(A) encoder/decoder by Joe Armstrong. All I have done
+is to update it to remove compile time warnings generated by the later versions
+of Erlang/OTP (Currently R13A).
+
+SEE: http://www.sics.se/~joe/ubf/site/home.html
+
+Updating and organizing the remainder of these files is a work in progress.
+
+/sd
286 src/ubf.erl
@@ -0,0 +1,286 @@
+%% This is the original UBF(A) encoder/decoder by
+%% Joe Armstrong. All I have done is to update it
+%% to remove compile time warnings.
+%% SEE: http://www.sics.se/~joe/ubf/site/home.html
+
+-module(ubf).
+-vsn("1.12").
+-author("Joe Armstrong").
+-updated_by("Steve Davis").
+
+%-compile(export_all).
+
+-export([decode/1, decode/2, encode/1, encode/2]).
+-export([encode_print/1, ubf2term/1, deabstract/1]).
+
+-import(lists, [foldl/3, reverse/1, map/2, seq/2, sort/1]).
+
+bug() ->
+ C = decode("{'abc"),
+ decode("d'}$", C).
+
+%% Decoding rules
+%% {'#S', String} -> String
+%% Int -> Int
+%% [ ] -> List
+%% {...} -> Tuple
+
+%% decode_init() -> Cont
+%% decode(Str, Cont) -> {more, Cont'} | {done, Term, Str}
+%% encode(Str) -> Bytes
+
+%% intro() -> single line terminated with \n
+
+ubf2term(Str) ->
+ {done, Term, _} = decode(Str),
+ Term.
+
+decode_init() ->
+ { more, fun(I) -> ubf:decode(I, [[]], dict:new()) end }.
+
+decode(Str) ->
+ %% io:format("decode:~s~n",[Str]),
+ decode(Str, decode_init()). %get_init()).
+
+decode(S, {more, Fun}) ->
+ %% io:format("decode:~s~n",[S]),
+ Fun(S).
+
+decode1(S, Stack, D) ->
+ %% io:format("AAAHere:|~s| Stack=~p~n", [S,Stack]),
+ decode(S, Stack, D).
+
+decode([$'|T], Stack, Dict) ->
+ get_stuff(T, $', [], Stack, Dict);
+decode([$~|T], [[Int|Stack]|S1], Dict) when is_integer(Int), Int >= 0 ->
+ collect_binary(Int, T, [], [Stack|S1], Dict);
+decode([$~|_T], _Stack, _Dict) ->
+ exit(tilde);
+decode([$%|T], Stack, Dict) ->
+ get_stuff(T, $%, [], Stack, Dict);
+decode([$"|T], Stack, Dict) ->
+ get_stuff(T, $", [], Stack, Dict);
+decode([$`|T], Stack, Dict) ->
+ get_stuff(T, $`, [], Stack, Dict);
+decode([$-|T], Stack, Dict) ->
+ collect_int(T, 0, '-', Stack, Dict);
+decode([H|T], Stack, Dict) when $0 =< H, H =< $9 ->
+ collect_int(T, H-$0, '+', Stack, Dict);
+decode([${|T], Stack, Dict) ->
+ decode1(T, [[]|Stack], Dict);
+decode([$}|T], [H|Stack], Dict) ->
+ decode1(T, push(list_to_tuple(reverse(H)),Stack), Dict);
+decode([$&|T], [ [H1,H2|T1] | Stack], Dict) ->
+ decode1(T, [[[H1|H2]|T1]|Stack], Dict);
+decode([$#|T], Stack, Dict) ->
+ decode1(T, push([], Stack), Dict);
+decode([$$|T], [[X]], _Dict) ->
+ {done, X, T};
+decode([$>,Key|T], [[Val|R]|Stack], Dict) ->
+ decode1(T, [R|Stack], dict:store(Key,Val,Dict));
+decode([H|T], Stack, Dict) ->
+ case special(H) of
+ true ->
+ decode1(T, Stack, Dict);
+ false ->
+ decode1(T, push(dict:fetch(H, Dict), Stack), Dict)
+ end;
+decode([], Stack, Dict) ->
+ {more, fun(I) -> decode1(I, Stack, Dict) end};
+decode(X, Stack, Dict) ->
+ io:format("OOOOO:~p ~p ~p~n",[X, Stack, Dict]),
+ exit(aaaaaa).
+
+get_stuff([$\\,H|T], Stop, L, Stack, Dict) ->
+ get_stuff(T, Stop, [H|L], Stack, Dict);
+get_stuff([$'|T], $', L, Stack, Dict) ->
+ decode1(T, push(list_to_atom(reverse(L)),Stack), Dict);
+get_stuff([$"|T], $", L, Stack, Dict) ->
+ decode1(T, push({'#S',reverse(L)},Stack), Dict);
+get_stuff([$`|T], $`, L, [Top|Stack], Dict) ->
+ decode1(T, push({'$TYPE', reverse(L), Top},Stack), Dict);
+get_stuff([$%|T], $%, _L, Stack, Dict) ->
+ decode1(T, Stack, Dict);
+get_stuff([H|T], Stop, L, Stack, Dict) ->
+ get_stuff(T, Stop, [H|L], Stack, Dict);
+get_stuff([], Stop, L, Stack, Dict) ->
+ {more, fun(I) ->
+ get_stuff(I, Stop, L, Stack, Dict) end}.
+
+collect_binary(0, T, L, Stack, Dict) ->
+ expect_tilde(T, push(list_to_binary(L),Stack), Dict);
+collect_binary(N, [H|T], L, Stack, Dict) ->
+ collect_binary(N-1, T, [H|L], Stack, Dict);
+collect_binary(N, [], L, Stack, Dict) ->
+ {more, fun(I) -> collect_binary(N, I, L, Stack, Dict) end}.
+
+expect_tilde([$~|T], Stack, Dict) ->
+ decode(T, Stack, Dict);
+expect_tilde([], Stack, Dict) ->
+ {more, fun(I) -> expect_tilde(I, Stack, Dict) end};
+expect_tilde([H|_], _, _) ->
+ exit({expect_tilde, H}).
+
+push(X, [Top|Rest]) ->
+ [[X|Top]|Rest];
+push(X, Y) ->
+ io:format("** bad push:~p ~p~n",[X,Y]).
+
+special($ ) -> true;
+special(${) -> true;
+special($}) -> true;
+special($,) -> true;
+special($#) -> true;
+special($&) -> true;
+special($%) -> true;
+special($>) -> true;
+special($\n) -> true;
+special($\r) -> true;
+special($\t) -> true;
+special($$) -> true;
+special($") -> true;
+special($') -> true;
+special($~) -> true;
+special(_) -> false.
+
+special_chars() ->
+ " 0123456789{},~%#>\n\r\s\t\"'-&$".
+
+collect_int([H|T], N, Sign, Stack, Dict) when $0 =< H, H =< $9 ->
+ collect_int(T, N*10 + H - $0, Sign, Stack, Dict);
+collect_int([], N, Sign, Stack, Dict) ->
+ {more, fun(I) -> collect_int(I, N, Sign, Stack, Dict) end};
+collect_int(T, N, '+', Stack, Dict) ->
+ decode1(T, push(N, Stack), Dict);
+collect_int(T, N, '-', Stack, Dict) ->
+ decode1(T, push(-N, Stack), Dict).
+
+%%---------------------------------------------------------------------
+
+
+encode_print(X) ->
+ io:format("~s~n",[encode(X)]).
+
+
+encode(X) ->
+ element(1, encode(X, dict:new())).
+
+
+encode(X, Dict0) ->
+ {Dict1, L1} = initial_dict(X, Dict0),
+ case (catch do_encode(X, Dict1)) of
+ {'EXIT', What} ->
+ io:format("What=~p~n",[What]),
+ exit(encode);
+ L ->
+ {flatten([L1, L,$$]), Dict1}
+ end.
+
+initial_dict(X, Dict0) ->
+ Free = seq(32,255) -- special_chars(),
+ Most = analyse(X),
+ %% io:format("Analysis:~p~n",[Most]),
+ load_dict(Most, Free, Dict0, []).
+
+load_dict([{N,X}|T], [Key|T1], Dict0, L) when N > 0->
+ load_dict(T, T1, dict:store(X, Key, Dict0),
+ [encode_obj(X),">",Key|L]);
+load_dict(_, _, Dict, L) ->
+ {Dict, L}.
+
+analyse(T) ->
+ KV = dict:to_list(analyse(T, dict:new())),
+ %% The Range is the Number of things times its size
+ %% If the size is greater than 0
+ KV1 = map(fun rank/1, KV),
+ reverse(sort(KV1)).
+
+rank({X, K}) when is_atom(X) ->
+ case length(atom_to_list(X)) of
+ N when N > 1, K > 1 ->
+ {(N-1) * K, X};
+ _ ->
+ {0, X}
+ end;
+rank({X, K}) when is_integer(X) ->
+ case length(integer_to_list(X)) of
+ N when N > 1, K > 1 ->
+ {(N-1) * K, X};
+ _ ->
+ {0, X}
+ end;
+rank({X, _}) ->
+ {0, X}.
+
+analyse({'#S', Str}, Dict) ->
+ analyse(Str, Dict);
+analyse(T, Dict) when is_tuple(T) ->
+ foldl(fun analyse/2, Dict, tuple_to_list(T));
+analyse(X, Dict) ->
+ case dict:find(X, Dict) of
+ {ok, Val} ->
+ dict:store(X, Val+1, Dict);
+ error ->
+ dict:store(X, 1, Dict)
+ end.
+
+flatten(L) ->
+ binary_to_list(list_to_binary(L)).
+
+encode_obj(X) when is_atom(X) -> encode_atom(X);
+encode_obj(X) when is_integer(X) -> integer_to_list(X);
+encode_obj(X) when is_binary(X) -> encode_binary(X).
+
+encode_string(S) -> [$",add_string(S, $"), $"].
+encode_atom(X) -> [$',add_string(atom_to_list(X), $'), $'].
+encode_binary(X) -> [integer_to_list(size(X)), $~,X,$~].
+
+do_encode(X, Dict) when is_atom(X); is_integer(X); is_binary(X) ->
+ case dict:find(X, Dict) of
+ {ok, Y} ->
+ Y;
+ error ->
+ encode_obj(X)
+ end;
+do_encode({'#S', Str}, _Dict) ->
+ %% This *is* a string
+ encode_string(Str);
+do_encode([H|T], Dict) ->
+ S1 = do_encode(T, Dict),
+ S2 = do_encode(H, Dict),
+ [S1,S2,$&];
+do_encode(T, Dict) when is_tuple(T) ->
+ S1 = encode_tuple(1, T, Dict),
+ [${,S1,$}];
+do_encode([], _Dict) ->
+ $#.
+
+encode_list([H|T], Dict, L) ->
+ encode_list(T, Dict, [$&,do_encode(H, Dict)|L]);
+encode_list([], _Dict, L) ->
+ reverse(L).
+
+encode_tuple(N, T, _Dict) when N > size(T) ->
+ "";
+encode_tuple(N, T, Dict) ->
+ S1 = do_encode(element(N, T), Dict),
+ S2 = encode_tuple(N+1, T, Dict),
+ [S1,possible_comma(N, T),S2].
+
+possible_comma(N, T) when N < size(T) -> $,;
+possible_comma(_, _) -> [].
+
+%% The ascii printables are in the range 32..126 includive
+
+add_string([$\\|T], Quote) -> [$\\,$\\|add_string(T, Quote)];
+add_string([Quote|T], Quote) -> [$\\,Quote|add_string(T, Quote)];
+add_string([H|T], Quote) when H >= 0, H=< 255 -> [H|add_string(T, Quote)];
+add_string([H|_], _Quote) -> exit({string_character,H});
+add_string([], _) -> [].
+
+deabstract({'#S',S}) -> S;
+deabstract(T) when is_tuple(T) ->
+ list_to_tuple(map(fun deabstract/1, tuple_to_list(T)));
+deabstract([H|T]) -> [deabstract(H)|deabstract(T)];
+deabstract(T) -> T.
+

0 comments on commit e9181d5

Please sign in to comment.
Something went wrong with that request. Please try again.