Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New working model of strlen #1725

Merged
merged 11 commits into from
Jul 7, 2020
42 changes: 25 additions & 17 deletions manticore/native/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def strcmp(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]):
been tracking and just replace it with the symbolic subtraction of
the two

:param State state: Current program state
:param state: Current program state
:param s1: Address of string 1
:param s2: Address of string 2
:return: Symbolic strcmp result
Expand Down Expand Up @@ -116,33 +116,41 @@ def strcmp(state: State, s1: Union[int, BitVec], s2: Union[int, BitVec]):
return ret


def strlen(state: State, s: Union[int, BitVec]) -> Union[int, BitVec]:
def strlen(state: State, addr: Union[int, BitVec]) -> Union[int, BitVec]:
"""
strlen symbolic model.
strlen symbolic model

Algorithm: Walks from end of string not including NULL building ITE tree when current byte is symbolic.
Algorithm: Counts the number of characters in a string forking every time a symbolic byte
is found that can be NULL but is not constrained to NULL.

:param State state: current program state
:param state: current program state
:param s: Address of string
:return: Symbolic strlen result
:rtype: Expression or int
"""

cpu = state.cpu
if issymbolic(addr):
raise ConcretizeArgument(state.cpu, 0)
ekilmer marked this conversation as resolved.
Show resolved Hide resolved

if issymbolic(s):
raise ConcretizeArgument(state.cpu, 1)
cpu = state.cpu
constrs = state.constraints

zero_idx = _find_zero(cpu, state.constraints, s)
# Initialize offset based on whether state has been forked in strlen
if "strlen" not in state.context:
offset = 0
else:
offset = state.context["strlen"]

ret = zero_idx
c = cpu.read_int(addr + offset, 8)
while not is_definitely_NULL(c, constrs):
# If the byte can be NULL concretize and fork states
if can_be_NULL(c, constrs):
state.context["strlen"] = offset
raise Concretize("Forking on possible NULL strlen", expression=(c == 0), policy="ALL")
sschriner marked this conversation as resolved.
Show resolved Hide resolved

for offset in range(zero_idx - 1, -1, -1):
byt = cpu.read_int(s + offset, 8)
if issymbolic(byt):
ret = ITEBV(cpu.address_bit_size, byt == 0, offset, ret)
offset += 1
c = cpu.read_int(addr + offset, 8)

return ret
return offset


def is_definitely_NULL(byte, constrs) -> bool:
Expand Down Expand Up @@ -222,7 +230,7 @@ def strcpy(state: State, dst: Union[int, BitVec], src: Union[int, BitVec]) -> Un
while not is_definitely_NULL(src_val, constrs):
cpu.write_int(dst + offset, src_val, 8)

# If a byte can be NULL set the src_val for concretize and fork states
# If a src byte can be NULL concretize and fork states
if can_be_NULL(src_val, constrs):
state.context["strcpy"] = offset
raise Concretize("Forking on NULL strcpy", expression=(src_val == 0), policy="ALL")
Expand Down
Binary file added tests/native/binaries/sym_strlen_test
Binary file not shown.
14 changes: 14 additions & 0 deletions tests/native/binaries/sym_strlen_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#define LEN 5

int main() {
char str[LEN];
read(0, str, sizeof(str));

int a = strlen(str);
printf("Length of string is: %d", a);

return 0;
}
52 changes: 46 additions & 6 deletions tests/native/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,10 @@ def test_symbolic_effective_null(self):
ret = strlen(self.state, s)
self.assertEqual(ret, 2)

def test_symbolic(self):
def test_symbolic_no_fork(self):
sy = self.state.symbolicate_buffer("+++\0")
s = self._push_string(sy)

ret = strlen(self.state, s)
self.assertItemsEqual(
range(4), Z3Solver.instance().get_all_values(self.state.constraints, ret)
)

self.state.constrain(sy[0] == 0)
ret = strlen(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 0))
Expand All @@ -208,6 +203,51 @@ def test_symbolic(self):
ret = strlen(self.state, s)
self.assertTrue(self.state.must_be_true(ret == 3))

def test_symbolic_fork(self):
# This binary is compiled using gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
# with flags: -g -static -fno-builtin
BIN_PATH = os.path.join(os.path.dirname(__file__), "binaries", "sym_strlen_test")
tmp_dir = tempfile.TemporaryDirectory(prefix="mcore_test_sym_")
m = Manticore(BIN_PATH, stdin_size=10, workspace_url=str(tmp_dir.name))

addr_of_strlen = 0x04404D0

@m.hook(addr_of_strlen)
def strlen_model(state):
state.invoke_model(strlen)

m.run()
m.finalize()

# Expected stdout outputs
expected = [
"Length of string is: 0",
"Length of string is: 1",
"Length of string is: 2",
"Length of string is: 3",
"Length of string is: 4",
"Length of string is: 5",
]

# Make a list of the generated output states
outputs = f"{str(m.workspace)}/test_*.stdout"
stdouts = []
for out in glob(outputs):
with open(out) as f:
stdouts.append(f.read())

# Check the number of output states is the number of expected stdouts
self.assertEqual(len(stdouts), len(expected))

# Assert that every expected stdout has a matching output
for e in expected:
match = False
for s in stdouts:
if e == s:
match = True
break
self.assertTrue(match)
ekilmer marked this conversation as resolved.
Show resolved Hide resolved

def test_symbolic_mixed(self):
sy = self.state.symbolicate_buffer("a+b+\0")
s = self._push_string(sy)
Expand Down