Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New working model of strlen #1725

Merged
merged 11 commits into from
Jul 7, 2020
152 changes: 92 additions & 60 deletions manticore/native/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,50 @@ def variadic(func):
return func


def _find_zero(cpu, constrs, ptr):
def is_definitely_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory is NULL.
This supports both concrete & symbolic byte values.

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or constrained to NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().must_be_true(constrs, byte == 0)
else:
return byte == 0


def cannot_be_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory is not NULL or cannot be NULL

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is not NULL or cannot be NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().must_be_true(constrs, byte != 0)
else:
return byte != 0


def can_be_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory can be NULL

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or can be NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().can_be_true(constrs, byte == 0)
else:
return byte == 0


def _find_zero(cpu, constrs, ptr: Union[int, BitVec]) -> int:
"""
Helper for finding the closest NULL or, effectively NULL byte from a starting address.

Expand All @@ -46,15 +89,9 @@ def _find_zero(cpu, constrs, ptr):

offset = 0
while True:
byt = cpu.read_int(ptr + offset, 8)

if issymbolic(byt):
if not SelectedSolver.instance().can_be_true(constrs, byt != 0):
break
else:
if byt == 0:
break

byte = cpu.read_int(ptr + offset, 8)
if is_definitely_NULL(byte, constrs):
break
offset += 1

return offset
Expand All @@ -78,7 +115,7 @@ def strcmp(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]):
been tracking and just replace it with the symbolic subtraction of
the two

:param State state: Current program state
:param state: Current program state
:param s1: Address of string 1
:param s2: Address of string 2
:return: Symbolic strcmp result
Expand Down Expand Up @@ -116,76 +153,71 @@ def strcmp(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]):
return ret


def strlen(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
def strlen_exact(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
"""
strlen symbolic model.
strlen symbolic model

Algorithm: Walks from end of string not including NULL building ITE tree when current byte is symbolic.
Strategy: produce a state for every symbolic string length for better accuracy

:param State state: current program state
Algorithm: Counts the number of characters in a string forking every time a symbolic byte
is found that can be NULL but is not constrained to NULL.

:param state: current program state
:param s: Address of string
:return: Symbolic strlen result
:rtype: Expression or int
"""

cpu = state.cpu

if issymbolic(s):
raise ConcretizeArgument(state.cpu, 1)

zero_idx = _find_zero(cpu, state.constraints, s)
cpu = state.cpu
constrs = state.constraints

ret = zero_idx
# Initialize offset based on whether state has been forked in strlen
if "strlen" not in state.context:
offset = 0
else:
offset = state.context["strlen"]

for offset in range(zero_idx - 1, -1, -1):
byt = cpu.read_int(s + offset, 8)
if issymbolic(byt):
ret = ITEBV(cpu.address_bit_size, byt == 0, offset, ret)
c = cpu.read_int(s + offset, 8)
while not is_definitely_NULL(c, constrs):
# If the byte can be NULL concretize and fork states
if can_be_NULL(c, constrs):
state.context["strlen"] = offset
raise Concretize("Forking on possible NULL strlen", expression=(c == 0), policy="ALL")
sschriner marked this conversation as resolved.
Show resolved Hide resolved

return ret
offset += 1
c = cpu.read_int(s + offset, 8)

return offset

def is_definitely_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory is NULL.
This supports both concrete & symbolic byte values.

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or constrained to NULL
def strlen_approx(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
"""
if issymbolic(byte):
return SelectedSolver.instance().must_be_true(constrs, byte == 0)
else:
return byte == 0
strlen symbolic model

Strategy: build a result tree to limit state explosion results approximate

def cannot_be_NULL(byte, constrs) -> bool:
Algorithm: Walks from end of string not including NULL building ITE tree when current byte is symbolic.
:param state: current program state
:param s: Address of string
:return: Symbolic strlen result
"""
Checks if a given byte read from memory is not NULL or cannot be NULL

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is not NULL or cannot be NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().must_be_true(constrs, byte != 0)
else:
return byte != 0
if issymbolic(s):
raise ConcretizeArgument(state.cpu, 1)

cpu = state.cpu
zero_idx = _find_zero(cpu, state.constraints, s)

def can_be_NULL(byte, constrs) -> bool:
"""
Checks if a given byte read from memory can be NULL
ret = zero_idx

:param byte: byte read from memory to be examined
:param constrs: state constraints
:return: whether a given byte is NULL or can be NULL
"""
if issymbolic(byte):
return SelectedSolver.instance().can_be_true(constrs, byte == 0)
else:
return byte == 0
for offset in range(zero_idx - 1, -1, -1):
byt = cpu.read_int(s + offset, 8)
if issymbolic(byt):
ret = ITEBV(cpu.address_bit_size, byt == 0, offset, ret)

return ret


def strcpy(state: State, dst: Union[int, BitVec], src: Union[int, BitVec]) -> Union[int, BitVec]:
Expand All @@ -202,10 +234,10 @@ def strcpy(state: State, dst: Union[int, BitVec], src: Union[int, BitVec]) -> Un
:return: pointer to the dst
"""
if issymbolic(src):
raise ConcretizeArgument(state.cpu, 1)
raise ConcretizeArgument(state.cpu, 2)

if issymbolic(dst):
raise ConcretizeArgument(state.cpu, 0)
raise ConcretizeArgument(state.cpu, 1)
ekilmer marked this conversation as resolved.
Show resolved Hide resolved

cpu = state.cpu
constrs = state.constraints
Expand All @@ -222,7 +254,7 @@ def strcpy(state: State, dst: Union[int, BitVec], src: Union[int, BitVec]) -> Un
while not is_definitely_NULL(src_val, constrs):
cpu.write_int(dst + offset, src_val, 8)

# If a byte can be NULL set the src_val for concretize and fork states
# If a src byte can be NULL concretize and fork states
if can_be_NULL(src_val, constrs):
state.context["strcpy"] = offset
raise Concretize("Forking on NULL strcpy", expression=(src_val == 0), policy="ALL")
Expand Down
Binary file added tests/native/binaries/sym_strlen_test
Binary file not shown.
14 changes: 14 additions & 0 deletions tests/native/binaries/sym_strlen_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#define LEN 5

int main() {
char str[LEN];
read(0, str, sizeof(str));

int a = strlen(str);
printf("Length of string is: %d", a);

return 0;
}
83 changes: 70 additions & 13 deletions tests/native/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
variadic,
isvariadic,
strcmp,
strlen,
strlen_exact,
strlen_approx,
strcpy,
is_definitely_NULL,
cannot_be_NULL,
Expand Down Expand Up @@ -160,72 +161,128 @@ def test_symbolic_concrete(self):
class StrlenTest(ModelTest):
def test_concrete(self):
s = self._push_string("abc\0")
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertEqual(ret, 3)
ret = strlen_approx(self.state, s)
self.assertEqual(ret, 3)

def test_concrete_empty(self):
s = self._push_string("\0")
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertEqual(ret, 0)
ret = strlen_approx(self.state, s)
self.assertEqual(ret, 0)

def test_symbolic_effective_null(self):
sy = self.state.symbolicate_buffer("ab+")
self.state.constrain(sy[2] == 0)
s = self._push_string(sy)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertEqual(ret, 2)
ret = strlen_approx(self.state, s)
self.assertEqual(ret, 2)

def test_symbolic(self):
def test_symbolic_no_fork(self):
sy = self.state.symbolicate_buffer("+++\0")
s = self._push_string(sy)

ret = strlen(self.state, s)
ret = strlen_approx(self.state, s)
self.assertItemsEqual(
range(4), Z3Solver.instance().get_all_values(self.state.constraints, ret)
)

self.state.constrain(sy[0] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 0))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 0))
self._clear_constraints()

self.state.constrain(sy[0] != 0)
self.state.constrain(sy[1] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 1))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 1))
self._clear_constraints()

self.state.constrain(sy[0] != 0)
self.state.constrain(sy[1] != 0)
self.state.constrain(sy[2] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 2))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 2))
self._clear_constraints()

self.state.constrain(sy[0] != 0)
self.state.constrain(sy[1] != 0)
self.state.constrain(sy[2] != 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 3))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 3))

def test_symbolic_fork(self):
# This binary is compiled using gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
# with flags: -g -static -fno-builtin
BIN_PATH = os.path.join(os.path.dirname(__file__), "binaries", "sym_strlen_test")
tmp_dir = tempfile.TemporaryDirectory(prefix="mcore_test_sym_")
m = Manticore(BIN_PATH, stdin_size=10, workspace_url=str(tmp_dir.name))

addr_of_strlen = 0x04404D0

@m.hook(addr_of_strlen)
def strlen_model(state):
state.invoke_model(strlen_exact)

m.run()
m.finalize()

# Expected stdout outputs
expected = {
"Length of string is: 0",
"Length of string is: 1",
"Length of string is: 2",
"Length of string is: 3",
"Length of string is: 4",
"Length of string is: 5",
}

# Make a list of the generated output states
outputs = f"{str(m.workspace)}/test_*.stdout"
stdouts = set()
for out in glob(outputs):
with open(out) as f:
stdouts.add(f.read())

# Assert that every expected stdout has a matching output
self.assertEqual(expected, stdouts)

def test_symbolic_mixed(self):
sy = self.state.symbolicate_buffer("a+b+\0")
s = self._push_string(sy)

self.state.constrain(sy[1] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 1))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 1))
self._clear_constraints()

self.state.constrain(sy[1] != 0)
self.state.constrain(sy[3] == 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 3))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 3))
self._clear_constraints()

self.state.constrain(sy[1] != 0)
self.state.constrain(sy[3] != 0)
ret = strlen(self.state, s)
ret = strlen_exact(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 4))
ret = strlen_approx(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 4))


Expand Down