Permalink
Browse files

* Replaces the first atom of the atom table (since that's the module …

…name).

* Replaces all occurences of the atom in the literal table (i.e. the
  constant pool).
  • Loading branch information...
1 parent 6d8c8cb commit 7655f35c0ca615174e7380863be12d5eb7deb215 @klajo committed Nov 8, 2009
Showing with 219 additions and 0 deletions.
  1. +171 −0 beam/src/beam_renamer.erl
  2. +48 −0 beam/test/beam_renamer_tests.erl
View
@@ -0,0 +1,171 @@
+-module(beam_renamer).
+
+-export([rename/2]).
+
+-define(beam_num_bytes_alignment, 4). %% according to spec below
+
+%% In order to load a module under a different name, the module name
+%% has to be changed within the beam file itself. The following code
+%% snippet does just that. It's based on a specification of the beam
+%% format (a fairly old one, from March 1 2000, but it seems there are
+%% not changes changes which affect the code below):
+%%
+%% http://www.erlang.se/~bjorn/beam_file_format.html
+%%
+%% BEWARE of modules which refer to themselves! This is where things
+%% start to become interesting... If ?MODULE is used in a function
+%% call, things should be ok (the module name is replaced in the
+%% function call). The same goes for a ?MODULE which stands on its
+%% own in a statement (like the sole return value). But if it's
+%% embedded for example within a tuple or list with only constant
+%% values, it's added to the constant pool which is a separate chunk
+%% within the beam file. The current code doesn't replace occurences
+%% within the constant pool. Although possible, I'll leave that for
+%% later. :-)
+%%
+%% The rename function does two things: It replaces the first atom of
+%% the atom table (since apparently that's where the module name is).
+%% Since the new name may be shorter or longer than the old name, one
+%% might have to adjust the length of the atom table chunk
+%% accordingly. Finally it updates the top-level form size, since the
+%% atom table chunk might have grown or shrunk.
+%%
+%% From the above beam format specification:
+%%
+%% This file format is based on EA IFF 85 - Standard for
+%% Interchange Format Files. This "standard" is not widely used;
+%% the only uses I know of is the IFF graphic file format for the
+%% Amiga and Blorb (a resource file format for Interactive Fiction
+%% games). Despite of this, I decided to use IFF instead of
+%% inventing my of own format, because IFF is almost right.
+%%
+%% The only thing that is not right is the even alignment of
+%% chunks. I use four-byte alignment instead. Because of this
+%% change, Beam files starts with 'FOR1' instead of 'FORM' to
+%% allow reader programs to distinguish "classic" IFF from "beam"
+%% IFF. The name 'FOR1' is included in the IFF document as a
+%% future way to extend IFF.
+%%
+%% In the description of the chunks that follow, the word
+%% mandatory means that the module cannot be loaded without it.
+%%
+%%
+%% FORM HEADER
+%%
+%% 4 bytes 'FOR1' Magic number indicating an IFF form. This is an
+%% extension to IFF indicating that all chunks are
+%% four-byte aligned.
+%% 4 bytes n Form length (file length - 8)
+%% 4 bytes 'BEAM' Form type
+%% n-8 bytes ... The chunks, concatenated.
+%%
+%%
+%% ATOM TABLE CHUNK
+%%
+%% The atom table chunk is mandatory. The first atom in the table must
+%% be the module name.
+%%
+%% 4 bytes 'Atom' chunk ID
+%% 4 bytes size total chunk length
+%% 4 bytes n number of atoms
+%% xx bytes ... Atoms. Each atom is a string preceeded
+%% by the length in a byte.
+%%
+%% The following section about the constant pool (literal table) was
+%% reverse engineered from the source (beam_lib etc), since it wasn't
+%% included in the beam format specification referred above.
+%%
+%% CONSTANT POOL/LITERAL TABLE CHUNK
+%%
+%% The literal table chunk is optional.
+%%
+%% 4 bytes 'LitT' chunk ID
+%% 4 bytes size total chunk length
+%% 4 bytes size size of uncompressed constants
+%% xx bytes ... zlib compressed constants
+%%
+%% Once uncompressed, the format of the constants are as follows:
+%%
+%% 4 bytes size unknown
+%% 4 bytes size size of first literal
+%% xx bytes ... term_to_binary encoded literal
+%% 4 bytes size size of next literal
+%% ...
+rename(BeamBin0, Name) ->
+ Name0 = get_module_name(BeamBin0),
+ NameBin = atom_to_binary(Name, latin1),
+ BeamBin1 = replace_in_atab(BeamBin0, NameBin),
+ BeamBin = replace_in_const_pool(BeamBin1, Name0, Name),
+ update_form_size(BeamBin).
+
+%% Replace the first atom of the atom table with the new name
+get_module_name(<<"Atom", _CnkSz:32, _NumAtoms:32,
+ NameSz:8, Name:NameSz/binary, _Rest/binary>>) ->
+ binary_to_atom(Name, latin1);
+get_module_name(<<_, Rest/binary>>) ->
+ get_module_name(Rest).
+
+%% Replace the first atom of the atom table with the new name
+replace_in_atab(<<"Atom", CnkSz0:32, Cnk:CnkSz0/binary, Rest/binary>>, Name) ->
+ <<NumAtoms:32, NameSz0:8, _Name0:NameSz0/binary, CnkRest/binary>> = Cnk,
+ NumPad0 = num_pad_bytes(CnkSz0),
+ <<_:NumPad0/unit:8, NextCnks/binary>> = Rest,
+ NameSz = size(Name),
+ CnkSz = CnkSz0 + NameSz - NameSz0,
+ NumPad = num_pad_bytes(CnkSz),
+ <<"Atom", CnkSz:32, NumAtoms:32, NameSz:8, Name:NameSz/binary,
+ CnkRest/binary, 0:NumPad/unit:8, NextCnks/binary>>;
+replace_in_atab(<<C, Rest/binary>>, Name) ->
+ <<C, (replace_in_atab(Rest, Name))/binary>>.
+
+%% Replace all occurences of the atom in the constant pool
+replace_in_const_pool(<<"LitT", CnkSz0:32, Cnk0:CnkSz0/binary, Rest/binary>>,
+ Name0, Name) ->
+ <<_UncompSz0:32, Consts0/binary>> = Cnk0,
+ NumPad0 = num_pad_bytes(CnkSz0),
+ <<_:NumPad0/unit:8, NextCnks/binary>> = Rest,
+ <<N:32, Pool0/binary>> = zlib:uncompress(Consts0),
+ Pool = replace_in_const_pool_1(Pool0, Name0, Name),
+ UncompBin = <<N:32, Pool/binary>>,
+ Consts = zlib:compress(UncompBin),
+ UncompSz = size(UncompBin),
+ Cnk = <<UncompSz:32, Consts/binary>>,
+ CnkSz = size(Cnk),
+ NumPad = num_pad_bytes(CnkSz),
+
+ <<"LitT", CnkSz:32, Cnk:CnkSz/binary, 0:NumPad/unit:8, NextCnks/binary>>;
+replace_in_const_pool(<<C, Rest/binary>>, Name0, Name) ->
+ <<C, (replace_in_const_pool(Rest, Name0, Name))/binary>>;
+replace_in_const_pool(<<>>, _Name0, _Name) ->
+ <<>>.
+
+replace_in_const_pool_1(<<BSz0:32, B0:BSz0/binary, Rest/binary>>, Name0, Name)->
+ B = term_to_binary(replace_in_const_term(binary_to_term(B0), Name0, Name)),
+ BSz = size(B),
+ <<BSz:32, B:BSz/binary, (replace_in_const_pool_1(Rest,Name0,Name))/binary>>;
+replace_in_const_pool_1(<<>>, _Name0, _Name) ->
+ <<>>.
+
+replace_in_const_term(T, Name0, Name) when is_list(T) ->
+ lists:map(fun(SubT) -> replace_in_const_term(SubT, Name0, Name) end,
+ T);
+replace_in_const_term(T, Name0, Name) when is_tuple(T) ->
+ list_to_tuple(replace_in_const_term(tuple_to_list(T), Name0, Name));
+replace_in_const_term(T, Name0, Name) when T =:= Name0 ->
+ Name;
+replace_in_const_term(T, _Name0, _Name) ->
+ T.
+
+%% Calculate the number of padding bytes that have to be added for the
+%% BinSize to be an even multiple of ?beam_num_bytes_alignment.
+num_pad_bytes(BinSize) ->
+ case ?beam_num_bytes_alignment - (BinSize rem ?beam_num_bytes_alignment) of
+ 4 -> 0;
+ N -> N
+ end.
+
+
+%% Update the size within the top-level form
+update_form_size(<<"FOR1", _OldSz:32, Rest/binary>> = Bin) ->
+ Sz = size(Bin) - 8,
+ <<"FOR1", Sz:32, Rest/binary>>.
@@ -0,0 +1,48 @@
+-module(beam_renamer_tests).
+
+-include_lib("eunit/include/eunit.hrl").
+
+replaces_in_atom_table_test() ->
+ 'x^' = run_literal(x, 'x^', x).
+
+replaces_in_constant_pool_test() ->
+ ['x^'] = run_literal(x, 'x^', [x]),
+ ['x^', 'x^'] = run_literal(x, 'x^', [x, x]),
+ {'x^', 'x^'} = run_literal(x, 'x^', {x, x}),
+ {[{'x^'}]} = run_literal(x, 'x^', {[{x}]}).
+
+run_literal(Name0, Name, Term) ->
+ run_with_renamed_module(
+ fun() -> Name:f() end,
+ mk_module(Name0, [erl_syntax:abstract(Term)]),
+ Name).
+
+run_with_renamed_module(Fun, BeamBin, Name) ->
+ Bin = beam_renamer:rename(BeamBin, Name),
+ unload_module(Name),
+ {module, _} = code:load_binary(Name, "dummy.beam", Bin),
+ try Fun()
+ after unload_module(Name)
+ end.
+
+unload_module(ModName) ->
+ code:purge(ModName),
+ code:delete(ModName).
+
+
+mk_module(ModName, FuncBody) ->
+ {ok, ModName, Bin} = compile:forms(mk_module_forms(ModName, FuncBody)),
+ Bin.
+
+mk_module_forms(ModName, FuncBody) ->
+ erl_syntax:revert_forms(
+ [erl_syntax:attribute(
+ erl_syntax:atom(module),
+ [erl_syntax:atom(ModName)]),
+ erl_syntax:attribute(
+ erl_syntax:atom(compile),
+ [erl_syntax:atom(export_all)]),
+ erl_syntax:function(
+ erl_syntax:atom(f),
+ [erl_syntax:clause([], FuncBody)])]).
+

0 comments on commit 7655f35

Please sign in to comment.