Skip to content
Permalink
Browse files

s/revamb/revng/g

  • Loading branch information...
aleclearmind committed Feb 11, 2019
1 parent 2382c5e commit 53db84f7d6a47f239a383e9377bc01333948213c
@@ -4,7 +4,7 @@

cmake_minimum_required(VERSION 2.8)

project(revamb)
project(revng)

include(cmake/Common.cmake)
install(FILES cmake/revngConfig.cmake cmake/Common.cmake
@@ -69,7 +69,7 @@ foreach(ARCH arm mips mipsel x86_64 i386 s390x)
-I"${CMAKE_CURRENT_SOURCE_DIR}/include")
add_custom_target("early-linked-module-${OUTPUT}" ALL DEPENDS "${OUTPUT}")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT}"
DESTINATION share/revamb)
DESTINATION share/revng)

# Enable the support for C exceptions to avoid optimizations that break
# exception support when linking a module with isolated functions
@@ -89,7 +89,7 @@ foreach(ARCH arm mips mipsel x86_64 i386 s390x)
${SUPPORT_MODULES_CONFIG_${CONFIG}})
add_custom_target("support-module-${OUTPUT}" ALL DEPENDS "${OUTPUT}")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT}"
DESTINATION share/revamb)
DESTINATION share/revng)
endforeach()
endforeach()

@@ -160,9 +160,9 @@ configure_file(runtime/early-linked.c "${CMAKE_BINARY_DIR}/early-linked.c" COPYO
configure_file(scripts/revng "${CMAKE_BINARY_DIR}/revng" COPYONLY)
configure_file(scripts/revng-merge-dynamic "${CMAKE_BINARY_DIR}/revng-merge-dynamic" COPYONLY)
install(PROGRAMS scripts/revng scripts/revng-merge-dynamic DESTINATION bin)
install(FILES runtime/support.c DESTINATION share/revamb)
install(FILES runtime/support.h DESTINATION share/revamb)
install(FILES include/revng/Runtime/commonconstants.h DESTINATION share/revamb)
install(FILES runtime/support.c DESTINATION share/revng)
install(FILES runtime/support.h DESTINATION share/revng)
install(FILES include/revng/Runtime/commonconstants.h DESTINATION share/revng)

# Remove -rdynamic
set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS)
@@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
# title of most generated pages and in a few other places.
# The default value is: My Project.

PROJECT_NAME = revamb
PROJECT_NAME = revng

# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
@@ -2,17 +2,17 @@
Purpose
*******

`revamb` is a static binary translator. Given a input ELF binary for one of the
`revng` is a static binary translator. Given a input ELF binary for one of the
supported architectures (currently MIPS, ARM and x86-64) it will analyze it and
emit an equivalent LLVM IR. To do so, `revamb` employs the QEMU intermediate
emit an equivalent LLVM IR. To do so, `revng` employs the QEMU intermediate
representation (a series of TCG instructions) and then translates them to LLVM
IR.

************
How to build
************

`revamb` employs CMake as a build system. The build system will try to
`revng` employs CMake as a build system. The build system will try to
automatically detect the QEMU installation and the GCC toolchains require to
build the test binaries.

@@ -2,7 +2,7 @@
Identifying the required components
***********************************

`revamb` requires three main components: LLVM, QEMU and one or more GCC
`revng` requires three main components: LLVM, QEMU and one or more GCC
toolchains.

LLVM
@@ -31,9 +31,9 @@ using the ``QEMU_INSTALL_PATH`` variable:
GCC
===

The `revamb` build system will try to automatically detect toolchains to compile
The `revng` build system will try to automatically detect toolchains to compile
code for the supported architectures. The toolchains are required to correctly
run the `revamb` test suite.
run the `revng` test suite.

The autodetction mechanism looks in all the directories in the ``PATH``
environment variable for an executable matching the pattern ``$ARCH*-musl*-gcc``
@@ -49,7 +49,7 @@ architecture using the CMake variable ``C_COMPILER_$ARCH``:
-DC_COMPILER_x86_64="/home/me/my-x86_64-compiler" \
../
The `revamb` build system also provides an option to specify additional flags to
The `revng` build system also provides an option to specify additional flags to
employ when using the above mentioned commpilers for compilation and linking of
test binaries. This can be done using the variables ``TEST_CFLAGS_$ARCH`` (for
compile-time flags) and ``TEST_LINK_LIBRARIES_$ARCH`` (for linking).
@@ -68,7 +68,7 @@ Common CMake options
********************

In the following we provide some useful-to-know CMake variables, that are not
specific to `revamb`. They can be specified using the ``-D`` flag, e.g.:
specific to `revng`. They can be specified using the ``-D`` flag, e.g.:

.. code-block:: sh
@@ -31,14 +31,14 @@ if(NOT "${RST2HTML}" STREQUAL RST2HTML-NOTFOUND)
MAIN_DEPENDENCY "${INPUT_FILE}")
set(DOC_DEPS ${DOC_DEPS} ${OUTPUT_FILE})
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_FILE}
DESTINATION ${CMAKE_INSTALL_PREFIX}/share/doc/revamb/)
DESTINATION ${CMAKE_INSTALL_PREFIX}/share/doc/revng/)
endforeach()
endif()

foreach(INPUT_FILE ${DOC_COPY})
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${INPUT_FILE}" "${INPUT_FILE}" COPYONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${INPUT_FILE}
DESTINATION ${CMAKE_INSTALL_PREFIX}/share/doc/revamb/)
DESTINATION ${CMAKE_INSTALL_PREFIX}/share/doc/revng/)
endforeach()

add_custom_target(docs ALL DEPENDS ${DOC_DEPS})
@@ -3,15 +3,15 @@ From the IR to the executable
*****************************

This document aims to describe how to obtain a working executable from the IR
generated by `revamb`. This for documentation purposes only, standard users can
generated by `revng`. This for documentation purposes only, standard users can
simply use the `translate` script, which will take care of everything. In the
following we will assume the output of `revamb` is an LLVM IR file named
following we will assume the output of `revng` is an LLVM IR file named
`translated.ll`.

Support functions
=================

The IR produced by `revamb` is mostly self-contained, since all the QEMU helper
The IR produced by `revng` is mostly self-contained, since all the QEMU helper
functions are statically linked in the output module (unless ``--no-link`` was
specified, in which case the module won't be functional). However, besides the
QEMU helper functions, some additional support functions are required to obtain
@@ -29,7 +29,7 @@ provides a quick summary of the required functions as they are implemented in
kernel. `main` also performs a call to `syscall_init` a function defined
in the generated module, taken from QEMU, which carries on some
syscall-related initializations. Finally, the entry point of the module
generated by `revamb` (the `root` function) is called.
generated by `revng` (the `root` function) is called.

:path: Function invoked by the QEMU Linux syscall emulation layer to *filter*
the path being opened by the translated program. For instance, it is
@@ -60,9 +60,9 @@ be dumped to the path specified by `REVAMB_TRACE_PATH`, if available. This is
optional at compile-time, since it introduces an overhead even if disabled at
run-time.

`revamb` distribution provide a pre-compiled version of both the flavors in the
`revng` distribution provide a pre-compiled version of both the flavors in the
form of LLVM IR: `support-x86_64-normal.ll` and `support-x86_64-trace.ll`. They
have to be linked into the module generated by `revamb`:
have to be linked into the module generated by `revng`:

.. code-block:: sh
@@ -109,7 +109,7 @@ and `librt.so`.
In addition to this, we also have to tell the linker to force the *segment
variables* (see `GeneratedIRReference.rst`_) at the appropriate location in
memory. In fact, each segment of the original binary should be loaded exactly at
the same address where it was originally supposed to be loaded. `revamb` emits
the same address where it was originally supposed to be loaded. `revng` emits
the required mapping automatically in a file with the same name as the output
file plus a ``.li.csv`` suffix. This file is a CSV file composed by three
columns:
@@ -1,8 +1,8 @@
***********************
revamb Output Reference
revng Output Reference
***********************

The main goal of revamb is to produce an LLVM module reproducing the behavior of
The main goal of revng is to produce an LLVM module reproducing the behavior of
the input program. The module should be compilable and work out of the
box. However, such module contains also a rich set of additional information
recovered during the analysis process that the user can exploit to develop any
@@ -55,7 +55,7 @@ And it has been translated as follows:

.. code-block:: sh
./revamb --no-link --functions-boundaries --use-debug-symbols --debug-info ll example example.ll
./revng --no-link --functions-boundaries --use-debug-symbols --debug-info ll example example.ll
Global variables
================
@@ -146,7 +146,7 @@ Input architecture description
==============================

The generated module also contains a *named metadata node*:
`revamb.input.architecture`. Currently it's composed by a metadata tuple with
`revng.input.architecture`. Currently it's composed by a metadata tuple with
two values:

:u32 DelaySlotSize: the size, in number of instructions of the delay slot of the
@@ -157,7 +157,7 @@ Here's how this information appears in our example:

.. code-block:: llvm

!revamb.input.architecture = !{!0}
!revng.input.architecture = !{!0}
!0 = !{i32 0, !"pc"}

There is no delay slot on x86-64 and the CSV representing the program counter is
@@ -248,17 +248,17 @@ Here's how it looks like in our example:
i64 4194536, label %bb.myfunction
i64 4194542, label %bb._start
i64 4194559, label %bb._start.0x11
], !revamb.block.type !1
], !revng.block.type !1

dispatcher.default: ; preds = %dispatcher.entry
call void @unknownPC()
unreachable

anypc: ; preds = %entrypoint
br label %dispatcher.entry, !revamb.block.type !2
br label %dispatcher.entry, !revng.block.type !2

unexpectedpc: ; preds = %entrypoint
br label %dispatcher.entry, !revamb.block.type !3
br label %dispatcher.entry, !revng.block.type !3

; ...

@@ -399,7 +399,7 @@ second call.
Function calls
--------------

revamb can detect function calls. The terminator of a basic block can be
revng can detect function calls. The terminator of a basic block can be
considered a function call if it's preceeded by a call to a function called
`function_call`. This function take three parameters:

@@ -433,7 +433,7 @@ and the third one is the return address (``0x4000ff``).
Function boundaries
-------------------

revamb can identify function boundaries. This information is also encoded in the
revng can identify function boundaries. This information is also encoded in the
generated module by associating two types of metadata (`func_entry` and `func`)
to the terminator instruction of each basic block.

@@ -494,11 +494,11 @@ function handling a syscall or a floating point division. These functions can
take arguments and can read and modify freely all the CSV.

Helper functions are obtained from QEMU in the form of LLVM IR (e.g.,
``libtinycode-helpers-mips.ll``) and are statically linked by revamb before
``libtinycode-helpers-mips.ll``) and are statically linked by revng before
emitting the module.

The presence of helper functions also import a quite large number of data
structures, which are not directly related to revamb's output.
structures, which are not directly related to revng's output.

Note that an helper function might be present multiple times with different
suffixes. This happens every time an helper function takes as an argument a
@@ -509,28 +509,28 @@ which parts of the CPU state is touched by an helper.
Currently, there is no complete documentation of all the helper functions. The
best way to understand which helper function does what, is to create a simple
assembly snippet using a specific feature (e.g., a performing a syscall) and
translate it using revamb.
translate it using revng.

Function isolation pass output reference
========================================

This section of the document aims to describe how to apply the function
isolation pass of revamb-dump to a simple example, to describe what to expect
isolation pass of revng-dump to a simple example, to describe what to expect
as output of this pass and the assumptions made in the isolation pass.

All the following examples originate from the translation of the simple program
already shown in the beginning of this document.

Once we have applied the translation to the original binary we can apply the
function isolation pass using the `revamb-dump` utility like this:
function isolation pass using the `revng-dump` utility like this:

.. code-block:: sh
revamb-dump --functions-isolation=example.isolated-functions.ll example.ll
revng-dump --functions-isolation=example.isolated-functions.ll example.ll
As you can see by comparing the original IR and the one to which the function
isolation pass has been applied the main difference is that, on the basis of the
information recovered by the function boundaries analysis applied by revamb, now
information recovered by the function boundaries analysis applied by revng, now
the code is organized in different LLVM functions.

As a reference we can see that the basic block `bb.myfunction` that belonged to
@@ -568,7 +568,7 @@ Now with the actual call appears like this:
br label %bb._start.0x11

Always on the basis of the information recovered by the analysis performed by
revamb we are able to emit `ret` instructions where needed.
revng we are able to emit `ret` instructions where needed.

As a reference at the end of the basic block ``bb.myfunction`` the branch to the
dispatcher:
@@ -1,10 +1,10 @@
***************************************************
Using `revamb` with Python: a simple instrumenation
Using `revng` with Python: a simple instrumenation
***************************************************

In this document we'll guide the user through using `revamb`'s output from
In this document we'll guide the user through using `revng`'s output from
Python. Among the many possibilities that arouse from the LLVM IR provided by
`revamb`, in this document we'll show how it's possible to perform a simple
`revng`, in this document we'll show how it's possible to perform a simple
instrumentation of an existing binary, by injecting some code in the generated
LLVM IR and recompiling it.

@@ -31,7 +31,7 @@ We can compile `hello.c` for ARM and link it statically:
armv7a-hardfloat-linux-uclibceabi-gcc hello.c -o hello -static
Using the `translate`_ tool we can have `revamb` produce the
Using the `translate`_ tool we can have `revng` produce the
LLVM IR and recompile it for us. The output should be a working
``hello.translated`` program for x86-64 (our host architecture):

@@ -47,7 +47,7 @@ Tracing all the syscall invocations
===================================

For this example, we'll write a simple Python script (``instrument.py``) which
takes in input the `revamb` generated LLVM IR, identifies all the syscalls and
takes in input the `revng` generated LLVM IR, identifies all the syscalls and
instrument them injecting the code to print the number of syscall to be
performed.

@@ -70,7 +70,7 @@ object and load the input LLVM IR:
buffer = llvm.create_memory_buffer_with_contents_of_file(sys.argv[1])
module = context.parse_ir(buffer)
Now that we a reference to the module produced by `revamb` we can collect the
Now that we a reference to the module produced by `revng` we can collect the
objects required to perform the `dprintf` call, i.e., the function itself, the
CSV representing the register `r7`, a constant integer representing `stderr` and
the format string for `dprintf`:
@@ -1,9 +1,9 @@
***********
revamb-dump
revng-dump
***********

----------------------------------------
extract information from `revamb` output
extract information from `revng` output
----------------------------------------

:Author: Alessandro Di Federico <ale+revng@clearmind.me>
@@ -16,20 +16,20 @@ extract information from `revamb` output
SYNOPSIS
========

revamb-dump [options] INFILE
revng-dump [options] INFILE

DESCRIPTION
===========

`revamb-dump` is a simple tool to extract some high level information from the
IR produced by `revamb`.
`revng-dump` is a simple tool to extract some high level information from the
IR produced by `revng`.

OPTIONS
=======

Note that all the options specifying a path support the special value ``-``
which indicates ``stdout``. Note also that `revamb-dump` expresses the *name of
a basic block* as represented by `revamb` in the generated module (typically
which indicates ``stdout``. Note also that `revng-dump` expresses the *name of
a basic block* as represented by `revng` in the generated module (typically
``bb.0xaddress`` or ``bb.symbol.0xoffset``.

:``-c``, ``--cfg``: Path where the control-flow graph should be stored. The

0 comments on commit 53db84f

Please sign in to comment.
You can’t perform that action at this time.