From df0456bf7cf21560602e404fbbfd968cc51d7501 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Thu, 26 Mar 2020 16:19:21 -0400 Subject: [PATCH 01/16] Add an implementation of sys_llseek for Linux --- manticore/platforms/linux.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 8aef0aace..ae4557d02 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1349,6 +1349,18 @@ def sys_lseek(self, fd, offset, whence): ) return -e.err + def sys_llseek(self, fd, offset_high, offset_low, resultp, whence): + signed_offset_high = self._to_signed_dword(offset_high) + signed_offset_low = self._to_signed_dword(offset_low) + signed_offset = (signed_offset_high << 32) | signed_offset_low + try: + return self._get_fd(fd).seek(signed_offset, whence) + except FdError as e: + logger.info( + "LSEEK: Not valid file descriptor on llseek. Fd not seekable. Returning EBADF" + ) + return -e.err + def sys_read(self, fd, buf, count): data: bytes = bytes() if count != 0: From aedb8428d105920497838f592c79807faba1f369 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Thu, 26 Mar 2020 16:19:33 -0400 Subject: [PATCH 02/16] Reformat a couple funny string literals --- manticore/platforms/linux.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index ae4557d02..942471d70 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1345,7 +1345,7 @@ def sys_lseek(self, fd, offset, whence): return self._get_fd(fd).seek(signed_offset, whence) except FdError as e: logger.info( - ("LSEEK: Not valid file descriptor on lseek." "Fd not seekable. Returning EBADF") + "LSEEK: Not valid file descriptor on lseek. Fd not seekable. Returning EBADF" ) return -e.err @@ -1373,7 +1373,7 @@ def sys_read(self, fd, buf, count): # Read the data and put it in memory data = self._get_fd(fd).read(count) except FdError as e: - logger.info(("READ: Not valid file descriptor on read." " Returning EBADF")) + logger.info("READ: Not valid file descriptor on read. Returning EBADF") return -e.err self.syscall_trace.append(("_read", fd, data)) self.current.write_bytes(buf, data) From c0983dc7d33f52e0f9b69f13c2b144de7bf1f256 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Thu, 26 Mar 2020 16:28:27 -0400 Subject: [PATCH 03/16] Add a docstring for `sys_llseek` This was taken mainly from the manpage. --- manticore/platforms/linux.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 942471d70..c51a75562 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1338,7 +1338,6 @@ def sys_lseek(self, fd, offset, whence): SEEK_END: The file offset is set to the size of the file plus offset bytes. :return: offset from file beginning, or EBADF (fd is not a valid file descriptor or is not open) - """ signed_offset = self._to_signed_dword(offset) try: @@ -1350,6 +1349,28 @@ def sys_lseek(self, fd, offset, whence): return -e.err def sys_llseek(self, fd, offset_high, offset_low, resultp, whence): + """ + _llseek - reposition read/write file offset + + The _llseek() system call repositions the offset of the open + file description associated with the file descriptor fd to + (offset_high<<32) | offset_low bytes relative to the beginning of the + file, the current file offset, or the end of the file, depending on + whether whence is SEEK_SET, SEEK_CUR, or SEEK_END, respectively. It + returns the resulting file position in the argument result. + + This system call exists on various 32-bit platforms to support seeking + to large file offsets. + + :param fd: a valid file descriptor + :param offset_high: the high 32 bits of the byte offset + :param offset_low: the low 32 bits of the byte offset + :param whence: SEEK_SET: The file offset is set to offset bytes. + SEEK_CUR: The file offset is set to its current location plus offset bytes. + SEEK_END: The file offset is set to the size of the file plus offset bytes. + + :return: offset from file beginning, or EBADF (fd is not a valid file descriptor or is not open) + """ signed_offset_high = self._to_signed_dword(offset_high) signed_offset_low = self._to_signed_dword(offset_low) signed_offset = (signed_offset_high << 32) | signed_offset_low From 98a65d25c356467896a5579621a2a28b068ea074 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Fri, 27 Mar 2020 15:22:08 -0400 Subject: [PATCH 04/16] Fix a broken log format string --- manticore/platforms/linux.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index c51a75562..290a76c1b 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -238,10 +238,7 @@ def __init__(self, constraints, path="sfile", mode="rw", max_size=100, wildcard= if symbols_cnt > max_size: logger.warning( - ( - "Found more wildcards in the file than free ", - "symbolic values allowed (%d > %d)", - ), + "Found more wildcards in the file than free symbolic values allowed (%d > %d)", symbols_cnt, max_size, ) From 5f3bc51d2dd63bcf87177ebee2cf54dad85ed54b Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Fri, 27 Mar 2020 15:24:22 -0400 Subject: [PATCH 05/16] Improve logging & exception messages in linux platform --- manticore/platforms/linux.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 290a76c1b..95cf4e68b 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1228,7 +1228,7 @@ def _close(self, fd): ) # Keep track for SymbolicFile testcase generation self.files[fd] = None except IndexError: - raise FdError(f"Bad file descriptor ({fd})") + raise FdError(f"Bad file descriptor ({fd})", os.EBADF) def _dup(self, fd): """ @@ -1248,7 +1248,7 @@ def _is_fd_open(self, fd): def _get_fd(self, fd): if not self._is_fd_open(fd): - raise FdError + raise FdError(f"File descriptor is not open", errno.EBADF) else: return self.files[fd] @@ -1304,13 +1304,13 @@ def sys_getcwd(self, buf, size): if size > 0 and size < length: logger.info( - "GETCWD: size is greater than 0, but is smaller than the length" - "of the path + 1. Returning ERANGE" + "GETCWD: size is greater than 0, but is smaller than the length " + "of the path + 1. Returning -errno.ERANGE" ) return -errno.ERANGE if not self.current.memory.access_ok(slice(buf, buf + length), "w"): - logger.info("GETCWD: buf within invalid memory. Returning EFAULT") + logger.info("GETCWD: buf within invalid memory. Returning -errno.EFAULT") return -errno.EFAULT self.current.write_string(buf, current_dir) @@ -1341,7 +1341,7 @@ def sys_lseek(self, fd, offset, whence): return self._get_fd(fd).seek(signed_offset, whence) except FdError as e: logger.info( - "LSEEK: Not valid file descriptor on lseek. Fd not seekable. Returning EBADF" + f"LSEEK: Not valid file descriptor on lseek. Fd not seekable. Returning {-e.err}" ) return -e.err @@ -1384,14 +1384,14 @@ def sys_read(self, fd, buf, count): if count != 0: # TODO check count bytes from buf if buf not in self.current.memory: # or not self.current.memory.isValid(buf+count): - logger.info("READ: buf points to invalid address. Returning EFAULT") + logger.info("READ: buf points to invalid address. Returning -errno.EFAULT") return -errno.EFAULT try: # Read the data and put it in memory data = self._get_fd(fd).read(count) except FdError as e: - logger.info("READ: Not valid file descriptor on read. Returning EBADF") + logger.info(f"READ: Not valid file descriptor ({fd}). Returning -{e.err}") return -e.err self.syscall_trace.append(("_read", fd, data)) self.current.write_bytes(buf, data) @@ -1422,7 +1422,7 @@ def sys_write(self, fd, buf, count): # TODO check count bytes from buf if buf not in cpu.memory or buf + count not in cpu.memory: - logger.debug("WRITE: buf points to invalid address. Returning EFAULT") + logger.debug("WRITE: buf points to invalid address. Returning -errno.EFAULT") return -errno.EFAULT if fd > 2 and write_fd.is_full(): @@ -1608,11 +1608,11 @@ def sys_openat(self, dirfd, buf, flags, mode): try: dir_entry = self._get_fd(dirfd) except FdError as e: - logger.info("openat: Not valid file descriptor. Returning EBADF") + logger.info(f"openat: Not valid file descriptor. Returning {-e.err}") return -e.err if not isinstance(dir_entry, Directory): - logger.info("openat: Not directory descriptor. Returning ENOTDIR") + logger.info("openat: Not directory descriptor. Returning -errno.ENOTDIR") return -errno.ENOTDIR dir_path = dir_entry.name @@ -1704,7 +1704,7 @@ def sys_dup(self, fd): """ if not self._is_fd_open(fd): - logger.info("DUP: Passed fd is not open. Returning EBADF") + logger.info(f"DUP: Passed fd is not open ({fd}). Returning -errno.EBADF") return -errno.EBADF newfd = self._dup(fd) @@ -1721,12 +1721,12 @@ def sys_dup2(self, fd, newfd): try: file = self._get_fd(fd) except FdError as e: - logger.info("DUP2: Passed fd is not open. Returning EBADF") + logger.info("DUP2: fd ({fd}) is not open. Returning {-e.err}") return -e.err soft_max, hard_max = self._rlimits[self.RLIMIT_NOFILE] if newfd >= soft_max: - logger.info("DUP2: newfd is above max descriptor table size") + logger.info(f"DUP2: newfd ({newfd}) is above max descriptor table size") return -errno.EBADF if self._is_fd_open(newfd): @@ -2166,7 +2166,7 @@ def sys_accept4(self, sockfd, addr, addrlen, flags): def sys_recv(self, sockfd, buf, count, flags, trace_str="_recv"): data: bytes = bytes() if not self.current.memory.access_ok(slice(buf, buf + count), "w"): - logger.info("RECV: buf within invalid memory. Returning EFAULT") + logger.info("RECV: buf within invalid memory. Returning -errno.EFAULT") return -errno.EFAULT try: @@ -2246,7 +2246,7 @@ def sys_getrandom(self, buf, size, flags): return 0 if buf not in self.current.memory: - logger.info("getrandom: Provided an invalid address. Returning EFAULT") + logger.info("getrandom: Provided an invalid address. Returning -errno.EFAULT") return -errno.EFAULT if flags & ~(GRND_NONBLOCK | GRND_RANDOM): @@ -2539,7 +2539,7 @@ def sys_fstat(self, fd, buf): try: stat = self._get_fd(fd).stat() except FdError as e: - logger.info("Calling fstat with invalid fd, returning EBADF") + logger.info(f"Calling fstat with invalid fd, returning {-e.err}") return -e.err def add(width, val): @@ -2582,7 +2582,7 @@ def sys_fstat64(self, fd, buf): try: stat = self._get_fd(fd).stat() except FdError as e: - logger.info("Calling fstat with invalid fd, returning EBADF") + logger.info(f"Calling fstat with invalid fd, returning {-e.err}") return -e.err def add(width, val): From cb3a9249a3d0f1d83762f5804233207206a35a3d Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Fri, 27 Mar 2020 15:25:49 -0400 Subject: [PATCH 06/16] Update docstring for sys_lseek --- manticore/platforms/linux.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 95cf4e68b..320d29bd8 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1330,9 +1330,9 @@ def sys_lseek(self, fd, offset, whence): :param fd: a valid file descriptor :param offset: the offset in bytes - :param whence: SEEK_SET: The file offset is set to offset bytes. - SEEK_CUR: The file offset is set to its current location plus offset bytes. - SEEK_END: The file offset is set to the size of the file plus offset bytes. + :param whence: os.SEEK_SET: The file offset is set to offset bytes. + os.SEEK_CUR: The file offset is set to its current location plus offset bytes. + os.SEEK_END: The file offset is set to the size of the file plus offset bytes. :return: offset from file beginning, or EBADF (fd is not a valid file descriptor or is not open) """ From ee77f23dc73ffe635516dc1720382fcf10c3a2d3 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Fri, 27 Mar 2020 15:28:44 -0400 Subject: [PATCH 07/16] Remove redundant space in mmap mode strings --- tests/native/test_syscalls.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/native/test_syscalls.py b/tests/native/test_syscalls.py index 9d3f3e97b..c199050d2 100644 --- a/tests/native/test_syscalls.py +++ b/tests/native/test_syscalls.py @@ -27,7 +27,7 @@ def tearDown(self): f.close() def test_time(self): - self.linux.current.memory.mmap(0x1000, 0x1000, "rw ") + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") time_0 = self.linux.sys_time(0) self.linux.sys_clock_gettime(1, 0x1100) @@ -52,7 +52,7 @@ def test_time(self): def test_directories(self): tmpdir = get_random_filename() - self.linux.current.memory.mmap(0x1000, 0x1000, "rw ") + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") self.linux.current.write_string(0x1100, tmpdir) self.assertFalse(os.path.exists(tmpdir)) @@ -62,7 +62,7 @@ def test_directories(self): self.assertFalse(os.path.exists(tmpdir)) def test_pipe(self): - self.linux.current.memory.mmap(0x1000, 0x1000, "rw ") + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") self.linux.sys_pipe(0x1100) fd1 = self.linux.current.read_int(0x1100, 8 * 4) @@ -80,7 +80,7 @@ def test_pipe(self): def test_ftruncate(self): fname = get_random_filename() - self.linux.current.memory.mmap(0x1000, 0x1000, "rw ") + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") self.linux.current.write_string(0x1100, fname) fd = self.linux.sys_open(0x1100, os.O_RDWR, 0o777) @@ -101,7 +101,7 @@ def test_ftruncate(self): def test_link(self): fname = get_random_filename() newname = get_random_filename() - self.linux.current.memory.mmap(0x1000, 0x1000, "rw ") + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") self.linux.current.write_string(0x1100, fname) self.linux.current.write_string(0x1180, newname) @@ -127,7 +127,7 @@ def test_link(self): def test_chmod(self): fname = get_random_filename() - self.linux.current.memory.mmap(0x1000, 0x1000, "rw ") + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") self.linux.current.write_string(0x1100, fname) print("Creating", fname) @@ -143,7 +143,7 @@ def test_chmod(self): self.assertEqual(-errno.EPERM, self.linux.sys_chown(0x1100, 0, 0)) def test_recvfrom(self): - self.linux.current.memory.mmap(0x1000, 0x1000, "rw ") + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") sock_fd = self.linux.sys_socket(socket.AF_INET, socket.SOCK_STREAM, 0) self.assertEqual(sock_fd, 3) From 99cd6ba514b70c5fd41373a93d5ca8532babb456 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Fri, 27 Mar 2020 15:29:47 -0400 Subject: [PATCH 08/16] Clean up temp files after linux platform tests --- tests/native/test_syscalls.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tests/native/test_syscalls.py b/tests/native/test_syscalls.py index c199050d2..7372d41a0 100644 --- a/tests/native/test_syscalls.py +++ b/tests/native/test_syscalls.py @@ -1,5 +1,6 @@ import random import socket +import tempfile import unittest import os @@ -10,21 +11,24 @@ from manticore.platforms.platform import SyscallNotImplemented -def get_random_filename(): - return f"/tmp/mcore_test_{int(random.getrandbits(32))}" - - class LinuxTest(unittest.TestCase): _multiprocess_can_split_ = True BIN_PATH = os.path.join(os.path.dirname(__file__), "binaries", "basic_linux_amd64") def setUp(self): + self.tmp_dir = tempfile.TemporaryDirectory(prefix='mcore_test_') self.linux = linux.SLinux(self.BIN_PATH) def tearDown(self): for f in self.linux.files: if isinstance(f, linux.File): f.close() + self.tmp_dir.cleanup() + + def get_path(self, basename: str) -> str: + "Returns an absolute path with the given basename" + return f"{self.tmp_dir.name}/{basename}" + def test_time(self): self.linux.current.memory.mmap(0x1000, 0x1000, "rw") @@ -50,16 +54,16 @@ def test_time(self): self.assertGreater(time_2_final, time_2_0, "Time did not increase!") def test_directories(self): - tmpdir = get_random_filename() + dname = self.get_path("test_directories") self.linux.current.memory.mmap(0x1000, 0x1000, "rw") - self.linux.current.write_string(0x1100, tmpdir) + self.linux.current.write_string(0x1100, dname) - self.assertFalse(os.path.exists(tmpdir)) + self.assertFalse(os.path.exists(dname)) self.linux.sys_mkdir(0x1100, mode=0o777) - self.assertTrue(os.path.exists(tmpdir)) + self.assertTrue(os.path.exists(dname)) self.linux.sys_rmdir(0x1100) - self.assertFalse(os.path.exists(tmpdir)) + self.assertFalse(os.path.exists(dname)) def test_pipe(self): self.linux.current.memory.mmap(0x1000, 0x1000, "rw") @@ -79,7 +83,7 @@ def test_pipe(self): ) def test_ftruncate(self): - fname = get_random_filename() + fname = self.get_path("test_ftruncate") self.linux.current.memory.mmap(0x1000, 0x1000, "rw") self.linux.current.write_string(0x1100, fname) @@ -99,8 +103,8 @@ def test_ftruncate(self): ) def test_link(self): - fname = get_random_filename() - newname = get_random_filename() + fname = self.get_path("test_link_from") + newname = self.get_path("test_link_to") self.linux.current.memory.mmap(0x1000, 0x1000, "rw") self.linux.current.write_string(0x1100, fname) self.linux.current.write_string(0x1180, newname) @@ -126,7 +130,7 @@ def test_link(self): self.assertFalse(os.path.exists(newname)) def test_chmod(self): - fname = get_random_filename() + fname = self.get_path("test_chmod") self.linux.current.memory.mmap(0x1000, 0x1000, "rw") self.linux.current.write_string(0x1100, fname) From cdbc3e49c04e9f685d20382b89328d4740415fc0 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Fri, 27 Mar 2020 15:30:18 -0400 Subject: [PATCH 09/16] Add some tests for the `sys_lseek` --- tests/native/test_syscalls.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/native/test_syscalls.py b/tests/native/test_syscalls.py index 7372d41a0..4dc2a0c2b 100644 --- a/tests/native/test_syscalls.py +++ b/tests/native/test_syscalls.py @@ -194,6 +194,39 @@ def test_multiple_sockets(self): conn_fd = self.linux.sys_accept(sock_fd, None, 0) self.assertEqual(conn_fd, 4) + def test_lseek(self): + fname = self.get_path("test_lseek") + assert len(fname) < 0x100 + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") + self.linux.current.write_string(0x1100, fname) + + fd = self.linux.sys_open(0x1100, os.O_RDWR, 0o777) + buf = b"1" * 1000 + self.linux.current.write_bytes(0x1200, buf) + self.linux.sys_write(fd, 0x1200, len(buf)) + + pos = self.linux.sys_lseek(fd, 100, os.SEEK_SET) + self.assertEqual(100, pos) + + pos = self.linux.sys_lseek(fd, -50, os.SEEK_CUR) + self.assertEqual(50, pos) + + pos = self.linux.sys_lseek(fd, 50, os.SEEK_CUR) + self.assertEqual(100, pos) + + pos = self.linux.sys_lseek(fd, 0, os.SEEK_END) + self.assertEqual(1000, pos) + + pos = self.linux.sys_lseek(fd, -50, os.SEEK_END) + self.assertEqual(950, pos) + + pos = self.linux.sys_lseek(fd, 50, os.SEEK_END) + self.assertEqual(1050, pos) + + self.linux.sys_close(fd) + pos = self.linux.sys_lseek(fd, 0, os.SEEK_SET) + self.assertEqual(-errno.EBADF, pos) + def test_unimplemented(self): stubs = linux_syscall_stubs.SyscallStubs(default_to_fail=False) From 2b3a8f5eca4949db1b705702a6585da9a7a66826 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Fri, 27 Mar 2020 15:31:20 -0400 Subject: [PATCH 10/16] blacken tests/native/test_syscalls.py --- tests/native/test_syscalls.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/native/test_syscalls.py b/tests/native/test_syscalls.py index 4dc2a0c2b..7e5fedf46 100644 --- a/tests/native/test_syscalls.py +++ b/tests/native/test_syscalls.py @@ -16,7 +16,7 @@ class LinuxTest(unittest.TestCase): BIN_PATH = os.path.join(os.path.dirname(__file__), "binaries", "basic_linux_amd64") def setUp(self): - self.tmp_dir = tempfile.TemporaryDirectory(prefix='mcore_test_') + self.tmp_dir = tempfile.TemporaryDirectory(prefix="mcore_test_") self.linux = linux.SLinux(self.BIN_PATH) def tearDown(self): @@ -29,7 +29,6 @@ def get_path(self, basename: str) -> str: "Returns an absolute path with the given basename" return f"{self.tmp_dir.name}/{basename}" - def test_time(self): self.linux.current.memory.mmap(0x1000, 0x1000, "rw") From 7b6f3aea547595aaed4cb2868cfd6c5bc6b22445 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Fri, 27 Mar 2020 19:53:26 -0400 Subject: [PATCH 11/16] Fix implementation of sys_llseek; expand & refine tests --- manticore/platforms/linux.py | 21 +++++--- tests/native/test_syscalls.py | 97 +++++++++++++++++++++++++++++++++-- 2 files changed, 107 insertions(+), 11 deletions(-) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 320d29bd8..c4e513ea7 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1353,8 +1353,9 @@ def sys_llseek(self, fd, offset_high, offset_low, resultp, whence): file description associated with the file descriptor fd to (offset_high<<32) | offset_low bytes relative to the beginning of the file, the current file offset, or the end of the file, depending on - whether whence is SEEK_SET, SEEK_CUR, or SEEK_END, respectively. It - returns the resulting file position in the argument result. + whether whence is os.SEEK_SET, os.SEEK_CUR, or os.SEEK_END, + respectively. It returns the resulting file position in the argument + result. This system call exists on various 32-bit platforms to support seeking to large file offsets. @@ -1362,20 +1363,24 @@ def sys_llseek(self, fd, offset_high, offset_low, resultp, whence): :param fd: a valid file descriptor :param offset_high: the high 32 bits of the byte offset :param offset_low: the low 32 bits of the byte offset - :param whence: SEEK_SET: The file offset is set to offset bytes. - SEEK_CUR: The file offset is set to its current location plus offset bytes. - SEEK_END: The file offset is set to the size of the file plus offset bytes. + :param resultp: a pointer to write the position into on success + :param whence: os.SEEK_SET: The file offset is set to offset bytes. + os.SEEK_CUR: The file offset is set to its current location plus offset bytes. + os.SEEK_END: The file offset is set to the size of the file plus offset bytes. - :return: offset from file beginning, or EBADF (fd is not a valid file descriptor or is not open) + :return: 0 on success, negative on error """ signed_offset_high = self._to_signed_dword(offset_high) signed_offset_low = self._to_signed_dword(offset_low) signed_offset = (signed_offset_high << 32) | signed_offset_low try: - return self._get_fd(fd).seek(signed_offset, whence) + pos = self._get_fd(fd).seek(signed_offset, whence) + posbuf = struct.pack("q", pos) # `loff_t * resultp` in linux, which is `long long` + self.current.write_bytes(resultp, posbuf) + return 0 except FdError as e: logger.info( - "LSEEK: Not valid file descriptor on llseek. Fd not seekable. Returning EBADF" + f"LSEEK: Not valid file descriptor on llseek. Fd not seekable. Returning {-e.err}" ) return -e.err diff --git a/tests/native/test_syscalls.py b/tests/native/test_syscalls.py index 7e5fedf46..d0aa5b9d6 100644 --- a/tests/native/test_syscalls.py +++ b/tests/native/test_syscalls.py @@ -1,4 +1,5 @@ import random +import struct import socket import tempfile import unittest @@ -201,6 +202,7 @@ def test_lseek(self): fd = self.linux.sys_open(0x1100, os.O_RDWR, 0o777) buf = b"1" * 1000 + self.assertEqual(len(buf), 1000) self.linux.current.write_bytes(0x1200, buf) self.linux.sys_write(fd, 0x1200, len(buf)) @@ -214,18 +216,107 @@ def test_lseek(self): self.assertEqual(100, pos) pos = self.linux.sys_lseek(fd, 0, os.SEEK_END) - self.assertEqual(1000, pos) + self.assertEqual(len(buf), pos) pos = self.linux.sys_lseek(fd, -50, os.SEEK_END) - self.assertEqual(950, pos) + self.assertEqual(len(buf) - 50, pos) pos = self.linux.sys_lseek(fd, 50, os.SEEK_END) - self.assertEqual(1050, pos) + self.assertEqual(len(buf) + 50, pos) self.linux.sys_close(fd) pos = self.linux.sys_lseek(fd, 0, os.SEEK_SET) self.assertEqual(-errno.EBADF, pos) + @unittest.expectedFailure + def test_lseek_end_broken(self): + fname = self.get_path("test_lseek") + assert len(fname) < 0x100 + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") + self.linux.current.write_string(0x1100, fname) + + fd = self.linux.sys_open(0x1100, os.O_RDWR, 0o777) + buf = b"1" * 1000 + self.assertEqual(len(buf), 1000) + self.linux.current.write_bytes(0x1200, buf) + self.linux.sys_write(fd, 0x1200, len(buf)) + + # FIXME: currently broken -- raises a Python OSError invalid argument exception! + pos = self.linux.sys_lseek(fd, -2 * len(buf), os.SEEK_END) + self.assertEqual(-errno.EBADF, pos) + + def test_llseek(self): + fname = self.get_path("test_llseek") + assert len(fname) < 0x100 + # map some memory we can play with + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") + # open a file descriptor for `fname` + self.linux.current.write_string(0x1100, fname) + fd = self.linux.sys_open(0x1100, os.O_RDWR, 0o777) + # write some bogus data to the file + buf = b"1" * 1000 + self.assertEqual(len(buf), 1000) + self.linux.current.write_bytes(0x1200, buf) + self.linux.sys_write(fd, 0x1200, len(buf)) + + # set up a location & some helpers for the result pointer for `sys_llseek` + result_struct = struct.Struct("q") + resultp = 0x1900 + result_size = result_struct.size + + def read_resultp(): + "reads the `loff_t` value -- a long long -- from the result pointer" + data = self.linux.current.read_bytes(resultp, result_struct.size) + return result_struct.unpack(b"".join(data))[0] + + # now actually test some things about sys_llseek + res = self.linux.sys_llseek(fd, 0, 100, resultp, os.SEEK_SET) + self.assertEqual(res, 0) + self.assertEqual(read_resultp(), 100) + + res = self.linux.sys_llseek(fd, 1, 0, resultp, os.SEEK_CUR) + self.assertEqual(res, 0) + self.assertEqual(read_resultp(), 4294967396) + + res = self.linux.sys_llseek(fd, 0, -1000, resultp, os.SEEK_CUR) + self.assertEqual(res, 0) + self.assertEqual(read_resultp(), 4294966396) + + res = self.linux.sys_llseek(fd, 0, 0, resultp, os.SEEK_END) + self.assertEqual(res, 0) + self.assertEqual(read_resultp(), len(buf)) + + res = self.linux.sys_llseek(fd, 0, 50, resultp, os.SEEK_END) + self.assertEqual(res, 0) + self.assertEqual(read_resultp(), len(buf) + 50) + + res = self.linux.sys_llseek(fd, 0, -50, resultp, os.SEEK_END) + self.assertEqual(res, 0) + self.assertEqual(read_resultp(), len(buf) - 50) + + self.linux.sys_close(fd) + res = self.linux.sys_llseek(fd, 0, 0, resultp, os.SEEK_SET) + self.assertEqual(-errno.EBADF, res) + + @unittest.expectedFailure + def test_llseek_end_broken(self): + fname = self.get_path("test_llseek_end_broken") + assert len(fname) < 0x100 + # map some memory we can play with + self.linux.current.memory.mmap(0x1000, 0x1000, "rw") + # open a file descriptor for `fname` + self.linux.current.write_string(0x1100, fname) + fd = self.linux.sys_open(0x1100, os.O_RDWR, 0o777) + # write some bogus data to the file + buf = b"1" * 1000 + self.assertEqual(len(buf), 1000) + self.linux.current.write_bytes(0x1200, buf) + self.linux.sys_write(fd, 0x1200, len(buf)) + + # FIXME: currently broken -- raises a Python OSError invalid argument exception! + res = self.linux.sys_llseek(fd, 0, -2 * len(buf), resultp, os.SEEK_END) + self.assertTrue(res < 0) + def test_unimplemented(self): stubs = linux_syscall_stubs.SyscallStubs(default_to_fail=False) From 13690ad5b7546393be9bbdfb0aab33e860af881b Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Fri, 27 Mar 2020 19:59:33 -0400 Subject: [PATCH 12/16] blacken manticore/platforms/linux.py --- manticore/platforms/linux.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index c4e513ea7..89e67729d 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1375,7 +1375,7 @@ def sys_llseek(self, fd, offset_high, offset_low, resultp, whence): signed_offset = (signed_offset_high << 32) | signed_offset_low try: pos = self._get_fd(fd).seek(signed_offset, whence) - posbuf = struct.pack("q", pos) # `loff_t * resultp` in linux, which is `long long` + posbuf = struct.pack("q", pos) # `loff_t * resultp` in linux, which is `long long` self.current.write_bytes(resultp, posbuf) return 0 except FdError as e: From bd28600ef35cdef06ab9522d4911a07350df3777 Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Mon, 30 Mar 2020 15:01:49 -0400 Subject: [PATCH 13/16] Add some type signatures --- manticore/native/cpu/abstractcpu.py | 2 +- manticore/platforms/linux.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/manticore/native/cpu/abstractcpu.py b/manticore/native/cpu/abstractcpu.py index 2cf89175e..5237a40e5 100644 --- a/manticore/native/cpu/abstractcpu.py +++ b/manticore/native/cpu/abstractcpu.py @@ -726,7 +726,7 @@ def read_int(self, where, size=None, force=False): self._publish("did_read_memory", where, value, size) return value - def write_bytes(self, where, data, force=False): + def write_bytes(self, where: int, data, force: bool = False) -> None: """ Write a concrete or symbolic (or mixed) buffer to memory diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 89e67729d..7cdd7fdd0 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1320,7 +1320,7 @@ def sys_getcwd(self, buf, size): except OSError as e: return -e.errno - def sys_lseek(self, fd, offset, whence): + def sys_lseek(self, fd: File, offset: int, whence: int) -> int: """ lseek - reposition read/write file offset @@ -1345,7 +1345,9 @@ def sys_lseek(self, fd, offset, whence): ) return -e.err - def sys_llseek(self, fd, offset_high, offset_low, resultp, whence): + def sys_llseek( + self, fd: File, offset_high: int, offset_low: int, resultp: int, whence: int + ) -> int: """ _llseek - reposition read/write file offset From 779276bf7d6e0f04a8d82d910e015e4bf918e66d Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Mon, 30 Mar 2020 15:15:30 -0400 Subject: [PATCH 14/16] Fix type signatures; add more signatures --- manticore/platforms/linux.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 7cdd7fdd0..7e004388e 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1198,7 +1198,7 @@ def _to_signed_dword(self, dword): raise EnvironmentError(f"Corrupted internal CPU state (arch width is {arch_width})") return sdword - def _open(self, f): + def _open(self, f) -> int: """ Adds a file descriptor to the current file descriptor list @@ -1214,7 +1214,7 @@ def _open(self, f): self.files.append(f) return fd - def _close(self, fd): + def _close(self, fd: int) -> None: """ Removes a file descriptor from the file descriptor list :rtype: int @@ -1230,7 +1230,7 @@ def _close(self, fd): except IndexError: raise FdError(f"Bad file descriptor ({fd})", os.EBADF) - def _dup(self, fd): + def _dup(self, fd: int) -> int: """ Duplicates a file descriptor :rtype: int @@ -1239,14 +1239,14 @@ def _dup(self, fd): """ return self._open(self.files[fd]) - def _is_fd_open(self, fd): + def _is_fd_open(self, fd: int) -> bool: """ Determines if the fd is within range and in the file descr. list :param fd: the file descriptor to check. """ return fd >= 0 and fd < len(self.files) and self.files[fd] is not None - def _get_fd(self, fd): + def _get_fd(self, fd: int) -> File: if not self._is_fd_open(fd): raise FdError(f"File descriptor is not open", errno.EBADF) else: @@ -1320,7 +1320,7 @@ def sys_getcwd(self, buf, size): except OSError as e: return -e.errno - def sys_lseek(self, fd: File, offset: int, whence: int) -> int: + def sys_lseek(self, fd: int, offset: int, whence: int) -> int: """ lseek - reposition read/write file offset @@ -1346,7 +1346,7 @@ def sys_lseek(self, fd: File, offset: int, whence: int) -> int: return -e.err def sys_llseek( - self, fd: File, offset_high: int, offset_low: int, resultp: int, whence: int + self, fd: int, offset_high: int, offset_low: int, resultp: int, whence: int ) -> int: """ _llseek - reposition read/write file offset From 238b6c51b73d223bc8b27a22f115b5e8e40822eb Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Mon, 30 Mar 2020 15:16:28 -0400 Subject: [PATCH 15/16] Fix a bad variable reference --- manticore/platforms/linux.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 7e004388e..93a9da03b 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1228,7 +1228,7 @@ def _close(self, fd: int) -> None: ) # Keep track for SymbolicFile testcase generation self.files[fd] = None except IndexError: - raise FdError(f"Bad file descriptor ({fd})", os.EBADF) + raise FdError(f"Bad file descriptor ({fd})", errno.EBADF) def _dup(self, fd: int) -> int: """ From bca0ff2f6aa2201c2f724b70b7e3643a5e31b33b Mon Sep 17 00:00:00 2001 From: Brad Larsen Date: Mon, 30 Mar 2020 15:32:37 -0400 Subject: [PATCH 16/16] More type signatures; fix typing issues with linux `sys_write()` --- manticore/native/cpu/cpufactory.py | 7 ++-- manticore/platforms/linux.py | 52 ++++++++++++++++-------------- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/manticore/native/cpu/cpufactory.py b/manticore/native/cpu/cpufactory.py index 16a6d81b2..cd2f85172 100644 --- a/manticore/native/cpu/cpufactory.py +++ b/manticore/native/cpu/cpufactory.py @@ -8,6 +8,7 @@ I386CdeclAbi, SystemVAbi, ) +from .abstractcpu import Abi, Cpu, SyscallAbi class CpuFactory: @@ -28,20 +29,20 @@ class CpuFactory: } @staticmethod - def get_cpu(mem, machine): + def get_cpu(mem, machine: str) -> Cpu: cpu = CpuFactory._cpus[machine](mem) mem.cpu = cpu return cpu @staticmethod - def get_function_abi(cpu, os, machine): + def get_function_abi(cpu: Cpu, os: str, machine: str) -> Abi: if os != "linux" or machine not in CpuFactory._linux_abis: raise NotImplementedError(f"OS and machine combination not supported: {os}/{machine}") return CpuFactory._linux_abis[machine](cpu) @staticmethod - def get_syscall_abi(cpu, os, machine): + def get_syscall_abi(cpu: Cpu, os: str, machine: str) -> SyscallAbi: if os != "linux" or machine not in CpuFactory._linux_syscalls_abis: raise NotImplementedError(f"OS and machine combination not supported: {os}/{machine}") diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 93a9da03b..3b0d7d693 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -25,11 +25,13 @@ from ..core.smtlib import ConstraintSet, Operators, Expression, issymbolic from ..core.smtlib.solver import Z3Solver from ..exceptions import SolverError -from ..native.cpu.abstractcpu import Syscall, ConcretizeArgument, Interruption +from ..native.cpu.abstractcpu import Cpu, Syscall, ConcretizeArgument, Interruption from ..native.cpu.cpufactory import CpuFactory from ..native.memory import SMemory32, SMemory64, Memory32, Memory64, LazySMemory32, LazySMemory64 from ..platforms.platform import Platform, SyscallNotImplemented, unimplemented +from typing import List, Set + logger = logging.getLogger(__name__) MixedSymbolicBuffer = Union[List[Union[bytes, Expression]], bytes] @@ -404,7 +406,7 @@ def write(self, buf): assert self.is_connected() return self.peer._transmit(buf) - def _transmit(self, buf): + def _transmit(self, buf) -> int: for c in buf: self.buffer.append(c) return len(buf) @@ -508,7 +510,7 @@ def empty_platform(cls, arch): platform._init_std_fds() return platform - def _init_std_fds(self): + def _init_std_fds(self) -> None: # open standard files stdin, stdout, stderr logger.debug("Opening file descriptors (0,1,2) (STDIN, STDOUT, STDERR)") self.input = Socket() @@ -532,7 +534,7 @@ def _init_std_fds(self): assert (in_fd, out_fd, err_fd) == (0, 1, 2) - def _init_cpu(self, arch): + def _init_cpu(self, arch) -> None: # create memory and CPU cpu = self._mk_proc(arch) self.procs = [cpu] @@ -540,7 +542,7 @@ def _init_cpu(self, arch): self._function_abi = CpuFactory.get_function_abi(cpu, "linux", arch) self._syscall_abi = CpuFactory.get_syscall_abi(cpu, "linux", arch) - def _find_symbol(self, name): + def _find_symbol(self, name: str): symbol_tables = (s for s in self.elf.iter_sections() if isinstance(s, SymbolTableSection)) for section in symbol_tables: @@ -554,7 +556,7 @@ def _find_symbol(self, name): return None - def _execve(self, program, argv, envp): + def _execve(self, program: str, argv: List[str], envp: List[str]) -> None: """ Load `program` and establish program state, such as stack and arguments. @@ -581,14 +583,14 @@ def _execve(self, program, argv, envp): # Each process can wait for one timeout self.timers = [None] * nprocs # each fd has a waitlist - self.rwait = [set() for _ in range(nfiles)] - self.twait = [set() for _ in range(nfiles)] + self.rwait: List[Set] = [set() for _ in range(nfiles)] + self.twait: List[Set] = [set() for _ in range(nfiles)] # Install event forwarders for proc in self.procs: self.forward_events_from(proc) - def _mk_proc(self, arch): + def _mk_proc(self, arch: str) -> Cpu: mem = Memory32() if arch in {"i386", "armv7"} else Memory64() cpu = CpuFactory.get_cpu(mem, arch) return cpu @@ -1252,21 +1254,21 @@ def _get_fd(self, fd: int) -> File: else: return self.files[fd] - def _transform_write_data(self, data: bytes) -> bytes: + def _transform_write_data(self, data) -> bytes: """ Implement in subclass to transform data written by write(2)/writev(2) Nop by default. """ return data - def _exit(self, message): + def _exit(self, message) -> None: procid = self.procs.index(self.current) self.sched() self.running.remove(procid) if len(self.running) == 0: raise TerminateState(message, testcase=True) - def sys_umask(self, mask): + def sys_umask(self, mask: int) -> int: """ umask - Set file creation mode mask :param int mask: New mask @@ -1277,7 +1279,7 @@ def sys_umask(self, mask): except OSError as e: return -e.errno - def sys_chdir(self, path): + def sys_chdir(self, path) -> int: """ chdir - Change current working directory :param int path: Pointer to path @@ -1290,7 +1292,7 @@ def sys_chdir(self, path): except OSError as e: return -e.errno - def sys_getcwd(self, buf, size): + def sys_getcwd(self, buf, size) -> int: """ getcwd - Get the current working directory :param int buf: Pointer to dest array @@ -1386,7 +1388,7 @@ def sys_llseek( ) return -e.err - def sys_read(self, fd, buf, count): + def sys_read(self, fd: int, buf, count) -> int: data: bytes = bytes() if count != 0: # TODO check count bytes from buf @@ -1405,7 +1407,7 @@ def sys_read(self, fd, buf, count): return len(data) - def sys_write(self, fd, buf, count): + def sys_write(self, fd: int, buf, count) -> int: """ write - send bytes through a file descriptor The write system call writes up to count bytes from the buffer pointed to by buf to the file descriptor fd. If count is zero, write returns 0 @@ -1437,27 +1439,27 @@ def sys_write(self, fd, buf, count): self.wait([], [fd], None) raise RestartSyscall() - data: MixedSymbolicBuffer = cpu.read_bytes(buf, count) - data: bytes = self._transform_write_data(data) + data_sym: MixedSymbolicBuffer = cpu.read_bytes(buf, count) + data = self._transform_write_data(data_sym) write_fd.write(data) for line in data.split(b"\n"): - line = line.decode( + line_str = line.decode( "latin-1" ) # latin-1 encoding will happily decode any byte (0x00-0xff) - logger.debug(f"WRITE({fd}, 0x{buf:08x}, {count}) -> <{repr(line):48s}>") + logger.debug(f"WRITE({fd}, 0x{buf:08x}, {count}) -> <{repr(line_str):48s}>") self.syscall_trace.append(("_write", fd, data)) self.signal_transmit(fd) return len(data) - def sys_fork(self): + def sys_fork(self) -> int: """ We don't support forking, but do return a valid error code to client binary. """ return -errno.ENOSYS - def sys_access(self, buf, mode): + def sys_access(self, buf, mode) -> int: """ Checks real user's permissions for a file :rtype: int @@ -1594,7 +1596,7 @@ def sys_open(self, buf, flags, mode): return self._open(f) - def sys_openat(self, dirfd, buf, flags, mode): + def sys_openat(self, dirfd, buf, flags, mode) -> int: """ Openat SystemCall - Similar to open system call except dirfd argument when path contained in buf is relative, dirfd is referred to set the relative path @@ -1634,7 +1636,7 @@ def sys_openat(self, dirfd, buf, flags, mode): return self._open(f) - def sys_rename(self, oldnamep, newnamep): + def sys_rename(self, oldnamep, newnamep) -> int: """ Rename filename `oldnamep` to `newnamep`. @@ -1652,7 +1654,7 @@ def sys_rename(self, oldnamep, newnamep): return ret - def sys_fsync(self, fd): + def sys_fsync(self, fd: int) -> int: """ Synchronize a file's in-core state with that on disk. """