Skip to content

Commit

Permalink
New working model of strlen (#1725)
Browse files Browse the repository at this point in the history
* New working model of strlen - Symbolic Test yet to be updated

* Updated modeling test for all symbolic input

* Fixed code climate issue

* Adding binary for new test

* Updated test to use sets for compairison

* Removed length compairison

* Fix argument concretization indexing

* Preserve both model implementation strategies

* Corrected formatting

* Removed whitespace for code climate

* Removed whitespace for code climate

Co-authored-by: Eric Kilmer <eric.d.kilmer@gmail.com>
  • Loading branch information
sschriner and ekilmer committed Jul 7, 2020
1 parent 7d26c9d commit 14ef9ec
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 73 deletions.
152 changes: 92 additions & 60 deletions manticore/native/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,50 @@ def variadic(func):
return func


def _find_zero(cpu, constrs, ptr):
def is_definitely_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory is NULL.
This supports both concrete & symbolic byte values.
:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or constrained to NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().must_be_true(constrs, byte == 0)
else:
return byte == 0


def cannot_be_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory is not NULL or cannot be NULL
:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is not NULL or cannot be NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().must_be_true(constrs, byte != 0)
else:
return byte != 0


def can_be_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory can be NULL
:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or can be NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().can_be_true(constrs, byte == 0)
else:
return byte == 0


def _find_zero(cpu, constrs, ptr: Union[int, BitVec]) -> int:
"""
Helper for finding the closest NULL or, effectively NULL byte from a starting address.
Expand All @@ -46,15 +89,9 @@ def _find_zero(cpu, constrs, ptr):

offset = 0
while True:
byt = cpu.read_int(ptr + offset, 8)

if issymbolic(byt):
if not SelectedSolver.instance().can_be_true(constrs, byt != 0):
break
else:
if byt == 0:
break

byte = cpu.read_int(ptr + offset, 8)
if is_definitely_NULL(byte, constrs):
break
offset += 1

return offset
Expand All @@ -78,7 +115,7 @@ def strcmp(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]):
been tracking and just replace it with the symbolic subtraction of
the two
:param State state: Current program state
:param state: Current program state
:param s1: Address of string 1
:param s2: Address of string 2
:return: Symbolic strcmp result
Expand Down Expand Up @@ -116,76 +153,71 @@ def strcmp(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]):
return ret


def strlen(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
def strlen_exact(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
"""
strlen symbolic model.
strlen symbolic model
Algorithm: Walks from end of string not including NULL building ITE tree when current byte is symbolic.
Strategy: produce a state for every symbolic string length for better accuracy
:param State state: current program state
Algorithm: Counts the number of characters in a string forking every time a symbolic byte
is found that can be NULL but is not constrained to NULL.
:param state: current program state
:param s: Address of string
:return: Symbolic strlen result
:rtype: Expression or int
"""

cpu = state.cpu

if issymbolic(s):
raise ConcretizeArgument(state.cpu, 1)

zero_idx = _find_zero(cpu, state.constraints, s)
cpu = state.cpu
constrs = state.constraints

ret = zero_idx
# Initialize offset based on whether state has been forked in strlen
if "strlen" not in state.context:
offset = 0
else:
offset = state.context["strlen"]

for offset in range(zero_idx - 1, -1, -1):
byt = cpu.read_int(s + offset, 8)
if issymbolic(byt):
ret = ITEBV(cpu.address_bit_size, byt == 0, offset, ret)
c = cpu.read_int(s + offset, 8)
while not is_definitely_NULL(c, constrs):
# If the byte can be NULL concretize and fork states
if can_be_NULL(c, constrs):
state.context["strlen"] = offset
raise Concretize("Forking on possible NULL strlen", expression=(c == 0), policy="ALL")

return ret
offset += 1
c = cpu.read_int(s + offset, 8)

return offset

def is_definitely_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory is NULL.
This supports both concrete & symbolic byte values.

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or constrained to NULL
def strlen_approx(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
"""
if issymbolic(byte):
return SelectedSolver.instance().must_be_true(constrs, byte == 0)
else:
return byte == 0
strlen symbolic model
Strategy: build a result tree to limit state explosion results approximate
def cannot_be_NULL(byte, constrs) -> bool:
Algorithm: Walks from end of string not including NULL building ITE tree when current byte is symbolic.
:param state: current program state
:param s: Address of string
:return: Symbolic strlen result
"""
Checks if a given byte read from memory is not NULL or cannot be NULL

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is not NULL or cannot be NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().must_be_true(constrs, byte != 0)
else:
return byte != 0
if issymbolic(s):
raise ConcretizeArgument(state.cpu, 1)

cpu = state.cpu
zero_idx = _find_zero(cpu, state.constraints, s)

def can_be_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory can be NULL
ret = zero_idx

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or can be NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().can_be_true(constrs, byte == 0)
else:
return byte == 0
for offset in range(zero_idx - 1, -1, -1):
byt = cpu.read_int(s + offset, 8)
if issymbolic(byt):
ret = ITEBV(cpu.address_bit_size, byt == 0, offset, ret)

return ret


def strcpy(state: State, dst: Union[int, BitVec], src: Union[int, BitVec]) -> Union[int, BitVec]:
Expand All @@ -202,10 +234,10 @@ def strcpy(state: State, dst: Union[int, BitVec], src: Union[int, BitVec]) -> Un
:return: pointer to the dst
"""
if issymbolic(src):
raise ConcretizeArgument(state.cpu, 1)
raise ConcretizeArgument(state.cpu, 2)

if issymbolic(dst):
raise ConcretizeArgument(state.cpu, 0)
raise ConcretizeArgument(state.cpu, 1)

cpu = state.cpu
constrs = state.constraints
Expand All @@ -222,7 +254,7 @@ def strcpy(state: State, dst: Union[int, BitVec], src: Union[int, BitVec]) -> Un
while not is_definitely_NULL(src_val, constrs):
cpu.write_int(dst + offset, src_val, 8)

# If a byte can be NULL set the src_val for concretize and fork states
# If a src byte can be NULL concretize and fork states
if can_be_NULL(src_val, constrs):
state.context["strcpy"] = offset
raise Concretize("Forking on NULL strcpy", expression=(src_val == 0), policy="ALL")
Expand Down
Binary file added tests/native/binaries/sym_strlen_test
Binary file not shown.
14 changes: 14 additions & 0 deletions tests/native/binaries/sym_strlen_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#define LEN 5

int main() {
char str[LEN];
read(0, str, sizeof(str));

int a = strlen(str);
printf("Length of string is: %d", a);

return 0;
}
83 changes: 70 additions & 13 deletions tests/native/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
variadic,
isvariadic,
strcmp,
strlen,
strlen_exact,
strlen_approx,
strcpy,
is_definitely_NULL,
cannot_be_NULL,
Expand Down Expand Up @@ -160,72 +161,128 @@ def test_symbolic_concrete(self):
class StrlenTest(ModelTest):
def test_concrete(self):
s = self._push_string("abc\0")
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertEqual(ret, 3)
ret = strlen_approx(self.state, s)
self.assertEqual(ret, 3)

def test_concrete_empty(self):
s = self._push_string("\0")
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertEqual(ret, 0)
ret = strlen_approx(self.state, s)
self.assertEqual(ret, 0)

def test_symbolic_effective_null(self):
sy = self.state.symbolicate_buffer("ab+")
self.state.constrain(sy[2] == 0)
s = self._push_string(sy)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertEqual(ret, 2)
ret = strlen_approx(self.state, s)
self.assertEqual(ret, 2)

def test_symbolic(self):
def test_symbolic_no_fork(self):
sy = self.state.symbolicate_buffer("+++\0")
s = self._push_string(sy)

ret = strlen(self.state, s)
ret = strlen_approx(self.state, s)
self.assertItemsEqual(
range(4), Z3Solver.instance().get_all_values(self.state.constraints, ret)
)

self.state.constrain(sy[0] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 0))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 0))
self._clear_constraints()

self.state.constrain(sy[0] != 0)
self.state.constrain(sy[1] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 1))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 1))
self._clear_constraints()

self.state.constrain(sy[0] != 0)
self.state.constrain(sy[1] != 0)
self.state.constrain(sy[2] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 2))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 2))
self._clear_constraints()

self.state.constrain(sy[0] != 0)
self.state.constrain(sy[1] != 0)
self.state.constrain(sy[2] != 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 3))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 3))

def test_symbolic_fork(self):
# This binary is compiled using gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
# with flags: -g -static -fno-builtin
BIN_PATH = os.path.join(os.path.dirname(__file__), "binaries", "sym_strlen_test")
tmp_dir = tempfile.TemporaryDirectory(prefix="mcore_test_sym_")
m = Manticore(BIN_PATH, stdin_size=10, workspace_url=str(tmp_dir.name))

addr_of_strlen = 0x04404D0

@m.hook(addr_of_strlen)
def strlen_model(state):
state.invoke_model(strlen_exact)

m.run()
m.finalize()

# Expected stdout outputs
expected = {
"Length of string is: 0",
"Length of string is: 1",
"Length of string is: 2",
"Length of string is: 3",
"Length of string is: 4",
"Length of string is: 5",
}

# Make a list of the generated output states
outputs = f"{str(m.workspace)}/test_*.stdout"
stdouts = set()
for out in glob(outputs):
with open(out) as f:
stdouts.add(f.read())

# Assert that every expected stdout has a matching output
self.assertEqual(expected, stdouts)

def test_symbolic_mixed(self):
sy = self.state.symbolicate_buffer("a+b+\0")
s = self._push_string(sy)

self.state.constrain(sy[1] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 1))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 1))
self._clear_constraints()

self.state.constrain(sy[1] != 0)
self.state.constrain(sy[3] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 3))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 3))
self._clear_constraints()

self.state.constrain(sy[1] != 0)
self.state.constrain(sy[3] != 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 4))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 4))


Expand Down

0 comments on commit 14ef9ec

Please sign in to comment.