Skip to content

Commit

Permalink
feat: (WIP) changing to use DynamoRIO code coverage info for init cod…
Browse files Browse the repository at this point in the history
…e removal

- See issue #13 for more detail
- Improving the documentation
- Update Lighttpd compilation and running script.
- Add Lighttpd bin files
  • Loading branch information
xjtuwxg committed Mar 20, 2022
1 parent a0d8a06 commit d570013
Show file tree
Hide file tree
Showing 51 changed files with 1,652 additions and 105 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,4 @@ tags
.history/
.vscode/
.gdb_history
peda-session-*
13 changes: 1 addition & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,8 @@

## Intro
This project aims to dynamically customize code of a running process. The major component is an extended CRIU/CRIT tool that can rewrite the saved process images.
This repo contains a modified version of CRIU that can edit a process, disable code path, and insert library pages to a process at arbitrary VMA location.

Table of Contents
---
* [<a href="docs/build_dynacut.md">Build DynaCut</a>](#build-dynacut)
* [<a href="docs/customize_toy_program.md">Dynamically remove unwanted features for a toy program</a>](#dynamically-remove-unwanted-features-for-a-toy-program)
* [<a href="docs/customize_servers.md">Dynamically remove unwanted features for server applications</a>](#dynamically-remove-unwanted-features-for-server-applications)
* [<a href="docs/init_removal_toy_program.md">Remove initialization code for a toy example</a>](#remove-initialization-code-for-a-toy-example)
* [<a href="docs/init_removal_servers.md">Remove initialization code for server applications</a>](#remove-initialization-code-for-server-applications)

Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc)

Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc). (To generate the ToC of README.md, just run `./gh-md-toc README.md`)
This repo contains a modified version of [DynamoRIO](https://dynamorio.org/) to dump code coverage of execution phases, and a modified version of [CRIU](https://criu.org/) that can edit a process, disable code path, and insert library pages to a process at arbitrary VMA location.

## [Build DynaCut](docs/build_dynacut.md)

Expand Down
2 changes: 1 addition & 1 deletion criu/lib/py/add_sig_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def config_add_sig_handler(filepath, library_address_trap, jump_address, pid):
config_list+= ','
config_list += '{{ {0},{1} }}'.format(contents_list[i], offset_to_write)
#Add traps in the binary
pycriu.process_edit.modify_binary_dynamic(filepath, library_address_trap, int(contents_list[i], 16), pid)
pycriu.process_edit.pedit_update_a_byte(filepath, library_address_trap, int(contents_list[i], 16), pid, 0xCC)

with open(os.path.join(filepath, 'config.h'), 'wb+') as config_file:
config_file.write("%s" % config_list)
Expand Down
40 changes: 34 additions & 6 deletions criu/lib/py/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def mbd(opts):
raise Exception("Offset address cannot be empty!")
ps_img = pycriu.images.load(dinf(opts, 'pstree.img'))
for p in ps_img['entries']:
pycriu.process_edit.modify_binary_dynamic(directory, int(start_address, 16), int(offset, 16), get_task_id(p, 'pid'))
pycriu.process_edit.pedit_update_a_byte(directory, int(start_address, 16), int(offset, 16), get_task_id(p, 'pid'), 0xCC)

def remove_init(opts):
start_address=opts['startaddress']
Expand Down Expand Up @@ -533,12 +533,14 @@ def pedit_insert_sighandler(opts):
process_img_dir = opts['dir']
sighandler_lib = opts['sighandler_path'] # optional in crit cmdline
vma_start_address = opts['addr']
#verifier = opts['verify']
if not sighandler_lib:
sys.stderr.write("crit: error: too few arguments (No sighandler path)")
sys.exit(1)

# Retrieve the handler address by adding the base VMA and the handler offset
nm = subprocess.Popen(["nm", opts['sighandler_path']], stdout=subprocess.PIPE)
# TODO: the sighandler MUST use trap_handler as its handler name
grep = subprocess.Popen(["grep", "trap_handler"], stdin=nm.stdout, stdout=subprocess.PIPE)
result, _ = grep.communicate()
handler_offset = int(result.split()[0], 16)
Expand Down Expand Up @@ -570,8 +572,8 @@ def pedit_insert_int3(opts):
sys.exit(1)
ps_img = pycriu.images.load(dinf(opts, 'pstree.img'))
for p in ps_img['entries']:
pycriu.process_edit.modify_binary_dynamic(process_img_dir,
int(base, 16), int(offset, 16), get_task_id(p, 'pid'))
pycriu.process_edit.pedit_update_a_byte(process_img_dir,
int(base, 16), int(offset, 16), get_task_id(p, 'pid'), 0xCC)

# Update (modify) a single byte with the value.
# Use: ./criu/crit/crit edit update byte <img dir> <base_addr> -offset <offset> -value <value>
Expand All @@ -588,6 +590,26 @@ def pedit_update_byte(opts):
for p in ps_img['entries']:
pycriu.process_edit.pedit_update_a_byte(process_img_dir,
int(base, 16), int(offset, 16), get_task_id(p, 'pid'), int(value,16))

# Remove init code by replacing the 1st byte with int3
# Use: ./criu/crit/crit edit rm init <img dir> <base_addr> -file <file>
# Example: ./criu/crit/crit edit rm init loop.img 0x555555554000 -file init.log
def pedit_rm_init_log(opts):
base = opts['addr']
process_img_dir = opts['dir']
init_log = opts['file']
if not init_log:
sys.stderr.write("crit: error: too few arguments (no init log file)")
sys.exit(1)
with open(init_log) as file:
lines = file.readlines()
#print(lines)
ps_img = pycriu.images.load(dinf(opts, 'pstree.img'))
for p in ps_img['entries']:
for line in lines:
pycriu.process_edit.pedit_update_a_byte(process_img_dir,
int(base, 16), int(line, 16), get_task_id(p, 'pid'), 0xCC)

# ===== End Process Editing functions =====

def pedit_insert(opts):
Expand All @@ -598,8 +620,10 @@ def pedit_insert(opts):
switcher.get(opts['what'])(opts)

def pedit_rm(opts):
print("TODO: choice rm not implemented yet.")
print(opts)
switcher = {
'init': pedit_rm_init_log
}
switcher.get(opts['what'])(opts)

def pedit_update(opts):
print(opts)
Expand All @@ -608,6 +632,8 @@ def pedit_update(opts):
}
switcher.get(opts['what'])(opts)

## The main entry for "crit edit"
# ./criu/crit/crit edit insert|rm|update ...
def process_edit(opts):
switcher = {
'insert': pedit_insert,
Expand Down Expand Up @@ -668,13 +694,15 @@ def main():
# Process Edit
edit_parser = subparsers.add_parser('edit', help="edit criu process images")
edit_parser.add_argument('choice', choices=['insert', 'rm', 'update'])
edit_parser.add_argument('what', choices=['sighandler', 'int3', 'pages', 'byte'],
edit_parser.add_argument('what', choices=['sighandler', 'int3', 'pages', 'byte', 'init'],
help='insert a \'signal handler\' or \'int3\'; remove pages')
edit_parser.add_argument('dir')
edit_parser.add_argument('addr', help='Address of the sighandler VMA, or the base VMA to replace int3')
edit_parser.add_argument('-path','--sighandler_path', help='Path to the signal handler (shared library) to be loaded')
edit_parser.add_argument('-file', help='File containing a list of address')
edit_parser.add_argument('-offset', help='Offset in the binary')
edit_parser.add_argument('-value', help='Value to be updated')
edit_parser.add_argument('-verify', help='Insert removed code verifier')
edit_parser.set_defaults(func=process_edit)

# Add VMAs
Expand Down
43 changes: 6 additions & 37 deletions criu/lib/py/process_edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,39 +32,7 @@ def modify_binary(filepath, address, pid):
f.seek(-1,1)
f.write(b'\xCC')

# Adds traps into the CRIU process image (for dynamically linked binary)
def modify_binary_dynamic(filepath, address, library_offset, pid):
pedit_update_a_byte(filepath, address, library_offset, pid, 0xCC)
"""
def modify_binary_dynamic(filepath, address, library_offset, pid):
pgmap_img, _= pycriu.utils.readImages(pid, filepath)
pgmap_list = pgmap_img['entries']
pages_id = pgmap_list[0]['pages_id']
trap_address = address + library_offset
pg_offset = 0
binary_offset = 0
for i in range(1, len(pgmap_list)):
nr_pages = pgmap_list[i]["nr_pages"]
for key in pgmap_list[i]:
if(key == "vaddr"):
map_address = pgmap_list[i][key]
if(map_address <= trap_address < (map_address + nr_pages*4096)):
binary_offset = pg_offset + (trap_address - map_address)
print("The int3 @ 0x{:04x}, offset in pages-1.img: 0x{:04x}".
format(trap_address, binary_offset))
break
pg_offset = pg_offset + 4096*nr_pages
# Modify binary
with open(os.path.join(filepath, 'pages-%s.img' % pages_id), mode='r+b') as f:
f.seek(binary_offset,0)
bytes_data = f.read(1)
print("The data @ 0x{:04x} is 0x{}".format(binary_offset, bytes_data.encode('hex')))
f.seek(-1,1)
f.write(b'\xCC')
"""

# Update a byte in the CRIU process image
# Update a byte in the CRIU process image (i.e., pages.img)
def pedit_update_a_byte(filepath, address, library_offset, pid, value):
pgmap_img, _= pycriu.utils.readImages(pid, filepath)
pgmap_list = pgmap_img['entries']
Expand All @@ -79,16 +47,17 @@ def pedit_update_a_byte(filepath, address, library_offset, pid, value):
map_address = pgmap_list[i][key]
if(map_address <= trap_address < (map_address + nr_pages*4096)):
binary_offset = pg_offset + (trap_address - map_address)
print("The int3 @ 0x{:04x}, <page index, offset> in pages-1.img: <{:d}, 0x{:03x}>".
format(trap_address, nr_pages, binary_offset))
#print("The int3 @ 0x{:04x}, <page index, offset> in pages-1.img: <{:d}, 0x{:03x}>".
# format(trap_address, nr_pages, binary_offset))
break
pg_offset = pg_offset + 4096*nr_pages

# Modify binary
# Modify pages.img
with open(os.path.join(filepath, 'pages-%s.img' % pages_id), mode='r+b') as f:
f.seek(binary_offset,0)
bytes_data = f.read(1)
print("The data @ 0x{:04x} is 0x{}".format(binary_offset, bytes_data.encode('hex')))
#print("The data @ 0x{:04x} is 0x{}".format(binary_offset, bytes_data.encode('hex')))
print("{{0x{:04x}, 0x{}}},".format(trap_address, bytes_data.encode('hex')))
f.seek(-1,1)
#f.write(b'\xCC')
#f.write(value.to_bytes(1, "little")) # python3
Expand Down
4 changes: 3 additions & 1 deletion docs/init_removal_servers.md
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# Remove initialization code for server applications
# Remove initialization code for server applications

## Lighttpd
60 changes: 53 additions & 7 deletions docs/init_removal_toy_program.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ In real_func function (real functional code).
The toy application has two init functions (`init1` and `init2`) and a real function. It uses a simple loop to simulate the event loop of server applications.
In this example, we want to **automatically identify** the init functions and remove them from the process's memory space.


## Remove initialization code using code coverage information
## Obtain initialization code block locations through code coverage trace comparison
Export the the DynamoRIO Home path:
```
source ./export_drio.sh
Expand All @@ -29,16 +28,16 @@ Run the `init-example` using `dynamorio/drcov`. We want to ask `drcov` to dump t
```
./dynamorio/exports/bin64/drrun -root ./dynamorio/build -c ./dynamorio/build/clients/lib64/debug/libdrcov.so -dump_text -nudge_dump -- ./tests/example/init-example
```
Open another terminal and use nudge to dump the execution log for the initialization phase:
Open another terminal and use `nudge` to signal the target application after the initialization has finished. It will dump the execution log for the initialization execution phase:
```
./dynamorio/build/bin64/nudgeunix -pid $(pidof init-example) -client 0 2
./dynamorio/build/bin64/nudgeunix -pid $(pidof init-example) -client 0 2
```

You will get two log files. This first one is the code coverage of the 1st execution phase (init phase); the 2nd file is the code coverage of the 2nd execution phase (serving phase):
And then wait for the `init-example` to finish. You will get **two log files**. The first one is the code coverage of the 1st execution phase (init phase); the 2nd file is the code coverage of the 2nd execution phase (serving phase):
```
❯ ls drcov.init-example.885048.000*
drcov.init-example.885048.0000.proc.log drcov.init-example.885048.0001.proc.log
```

Next, run the `./tracediff.py` tool to find the basic blocks that only belong to the initialization execution phase. Note module[ 5] is the code section of the `init-example`:
```
❯ ./tools/scripts/tracediff.py -u drcov.init-example.885048.0000.proc.log -b drcov.init-example.885048.0001.proc.log | grep "\[ 5\]:"
Expand All @@ -48,7 +47,54 @@ Next, run the `./tracediff.py` tool to find the basic blocks that only belong to
[1740] module[ 5]: 0x0000000000000016, 5
... ...
```
In this case, there are 29 basic block that only belongs to the initialization phase.
In this case, there are 29 basic blocks in `init-example` code that are only used during the initialization phase.

You can also use this command to directly dump the `offset` into a file:
```
❯ ./tools/scripts/tracediff.py -u drcov.init-example.1150388.0000.proc.log -b drcov.init-example.1150388.0001.proc.log | grep "\[ 5\]:" | awk '{print substr($4, 1, length($4)-1)}' > init.log
```

## Remove the initialization code through process rewriting
Next, use the init code locations from the log to rewrite the process image.

You need to first identify the code segment location:
```
❯ ./criu/crit/crit x init-example.img mems
1136271
exe /home/.../DynaCut/tests/example/init-example
555555554000-555555555000 r-- /home/.../DynaCut/tests/example/init-example
555555555000-555555556000 r-x /home/.../DynaCut/tests/example/init-example + 0x1000
... ...
```
Here, `0x555555555000` is the place where the `init-example`'s code segment is loaded. Use this information and the `init.log` to rewrite the process:
```
❯ ./criu/crit/crit edit rm init init-example.img 555555555000 -file init.log
{0x5555555550a0, 0xf3}
{0x555555555260, 0xf3}
{0x555555555000, 0xf3}
...
```
After this step, the initialization code of the process should be removed.

## Insert an initialization code removal verifier
You may also want to insert a verifier to double verify the correctness of the program logic after the initialization code has removed.

The verifier is a signal handler that captures unexpected `int3` execution, replaces that byte with its original value, and continues the execution. TODO: here I manually change the write permission for the code pages; Abhijit has the code for automatically enable the write permission (`remove_init.py` +43).
```
❯ ./criu/crit/crit edit insert sighandler init-example.img 0x1000000 -path $PWD/tests/sighandler/init_removal_verifier.so
```

Restore the process snapshot:
```
❯ ./tools/scripts/restore.sh init-example.img
[ 6]
[ 5]
...
[ 0]
In real_func function (real functional code).
```

---

## Abhijit's document
In this example, we simulate initialization functions removal.
Expand Down
31 changes: 31 additions & 0 deletions docs/lighttpd.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Remove initialization code of Lighttpd
## Build Lighttpd from source
Under `DynaCut` root directory:
```
❯ cd tests/lighttpd
❯ ./build_lighttpd.sh
```
Run Lighttpd server:
```
❯ ./run_lighttpd.sh
##
## Basic lighttpd configuration.
... ...
2022-03-18 17:29:54: server.c.1513) server started (lighttpd/1.4.59)
```

## Lighttpd initialization code profiling

```
❯ ../../tools/scripts/tracediff.py -u drcov.lighttpd.488622.0000.proc.log -b drcov.lighttpd.488622.0001.proc.log | grep "\[ 5\]:" | awk '{print substr($4, 1, length($4)-1)}' > lighttpd-init.log.0
```


```
❯ ./criu/crit/crit x lighttpd.img mems
489967
exe /home/xiaoguang/works/proc-edit/DynaCut/tests/lighttpd/lighttpd-1.4.59/src/lighttpd
5607c229a000-5607c22a6000 r-- /home/xiaoguang/works/proc-edit/DynaCut/tests/lighttpd/lighttpd-1.4.59/src/lighttpd
5607c22a6000-5607c22fa000 r-x /home/xiaoguang/works/proc-edit/DynaCut/tests/lighttpd/lighttpd-1.4.59/src/lighttpd + 0xc000
... ...
```
41 changes: 41 additions & 0 deletions tests/lighttpd/bin/.libs/mod_access.lai
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# mod_access.la - a libtool library file
# Generated by libtool (GNU libtool) 2.4.6
#
# Please DO NOT delete this file!
# It is necessary for linking the library.

# The name that we can dlopen(3).
dlname='mod_access.so'

# Names of this library.
library_names='mod_access.so mod_access.so mod_access.so'

# The name of the static archive.
old_library=''

# Linker flags that cannot go in dependency_libs.
inherited_linker_flags=''

# Libraries that this one depends upon.
dependency_libs=''

# Names of additional weak libraries provided by this library
weak_library_names=''

# Version information for mod_access.
current=0
age=0
revision=0

# Is this an already installed library?
installed=yes

# Should we warn about portability when linking against -modules?
shouldnotlink=yes

# Files to dlopen/dlpreopen
dlopen=''
dlpreopen=''

# Directory that this library needs to be installed in:
libdir='/usr/local/lib'
Loading

0 comments on commit d570013

Please sign in to comment.