Skip to content
Permalink
Browse files

Keep mmaps coherent with writes performed to separately opened files

  • Loading branch information...
rocallahan committed Apr 26, 2019
1 parent dec6bd5 commit f1843c474b27517f7f3b6240621f4f4a9e369e96
@@ -774,6 +774,7 @@ set(BASIC_TESTS
mmap_shared_write_fork
mmap_short_file
mmap_tmpfs
mmap_write_complex
mmap_zero_size_fd
modify_ldt
mount_ns_exec
@@ -160,6 +160,13 @@ class KernelMapping : public MemoryRange {
// The kernel's name for the mapping, as per /proc/<pid>/maps. This must
// be exactly correct.
const std::string fsname_;
// Note that btrfs has weird behavior and /proc/.../maps can show a different
// device number to the device from stat()ing the file that was mapped.
// https://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg57667.html
// We store here the device number obtained from fstat()ing the file.
// This also seems to be consistent with what we read from populate_address_space
// for the initial post-exec mappings. It is NOT consistent with what we get
// from reading /proc/.../maps for non-initial mappings.
dev_t device_;
ino_t inode_;
const int prot_;
@@ -151,6 +151,15 @@ EmuFile::shr_ptr EmuFs::get_or_create(const KernelMapping& recorded_km) {
return vf;
}

EmuFile::shr_ptr EmuFs::find(dev_t device, ino_t inode) {
FileId id(device, inode);
auto it = files.find(id);
if (it == files.end()) {
return EmuFile::shr_ptr();
}
return it->second.lock();
}

void EmuFs::log() const {
LOG(error) << "EmuFs " << this << " with " << files.size() << " files:";
for (auto& kv : files) {
@@ -149,6 +149,12 @@ class EmuFs {
*/
EmuFile::shr_ptr get_or_create(const KernelMapping& recorded_km);

/**
* Return an already-existing emulated file for the given device/inode.
* Returns null if not found.
*/
EmuFile::shr_ptr find(dev_t device, ino_t inode);

/**
* Dump information about this emufs to the "error" log.
*/
@@ -168,6 +174,8 @@ class EmuFs {
FileId(const KernelMapping& recorded_map);
FileId(const EmuFile& emu_file)
: device(emu_file.device()), inode(emu_file.inode()) {}
FileId(dev_t device, ino_t inode)
: device(device), inode(inode) {}
bool operator<(const FileId& other) const {
return device < other.device ||
(device == other.device && inode < other.inode);
@@ -187,6 +187,8 @@ enum SyscallState {
struct OpenedFd {
std::string path;
int fd;
dev_t device;
ino_t inode;
};

struct SyscallEvent {
@@ -204,6 +204,7 @@ static remote_ptr<uint8_t> allocate_extended_jump(
&recorded);
t->vm()->mapping_flags_of(addr) |= AddressSpace::Mapping::IS_PATCH_STUBS;
t->trace_writer().write_mapped_region(t, recorded, recorded.fake_stat(),
vector<TraceRemoteFd>(),
TraceWriter::PATCH_MAPPING);
}

@@ -1358,9 +1358,9 @@ void RecordSession::signal_state_changed(RecordTask* t, StepState* step_state) {
}

// We record this data even if sigframe_size is zero to simplify replay.
// Stop recording data if we run off the end of a writeable mapping.
// Stop recording data if we run off the end of a writable mapping.
// Our sigframe size is conservative so we need to do this.
t->record_remote_writeable(t->regs().sp(), sigframe_size);
t->record_remote_writable(t->regs().sp(), sigframe_size);

// This event is used by the replayer to set up the signal handler frame.
// But if we don't have a handler, we don't want to record the event
@@ -467,6 +467,7 @@ template <typename Arch> void RecordTask::init_buffers_arch() {

auto record_in_trace = trace_writer().write_mapped_region(
this, syscallbuf_km, syscallbuf_km.fake_stat(),
vector<TraceRemoteFd>(),
TraceWriter::RR_BUFFER_MAPPING);
ASSERT(this, record_in_trace == TraceWriter::DONT_RECORD_IN_TRACE);

@@ -1507,8 +1508,8 @@ void RecordTask::record_remote(remote_ptr<void> addr, ssize_t num_bytes) {
trace_writer().write_raw(rec_tid, buf.data(), num_bytes, addr);
}

void RecordTask::record_remote_writeable(remote_ptr<void> addr,
ssize_t num_bytes) {
void RecordTask::record_remote_writable(remote_ptr<void> addr,
ssize_t num_bytes) {
ASSERT(this, num_bytes >= 0);

remote_ptr<void> p = addr;
@@ -362,7 +362,7 @@ class RecordTask : public Task {
ssize_t record_remote_fallible(remote_ptr<void> addr, ssize_t num_bytes);
// Record as much as we can of the bytes in this range. Will record only
// contiguous mapped-writable data starting at `addr`.
void record_remote_writeable(remote_ptr<void> addr, ssize_t num_bytes);
void record_remote_writable(remote_ptr<void> addr, ssize_t num_bytes);

// Simple helper that attempts to use the local mapping to record if one
// exists
@@ -175,6 +175,15 @@ struct stat Task::stat_fd(int fd) {
return result;
}

struct stat Task::lstat_fd(int fd) {
char path[PATH_MAX];
snprintf(path, sizeof(path) - 1, "/proc/%d/fd/%d", tid, fd);
struct stat result;
auto ret = ::lstat(path, &result);
ASSERT(this, ret == 0);
return result;
}

ScopedFd Task::open_fd(int fd, int flags) {
char path[PATH_MAX];
snprintf(path, sizeof(path) - 1, "/proc/%d/fd/%d", tid, fd);
@@ -177,6 +177,10 @@ class Task {
* Stat |fd| in the context of this task's fd table.
*/
struct stat stat_fd(int fd);
/**
* Lstat |fd| in the context of this task's fd table.
*/
struct stat lstat_fd(int fd);
/**
* Open |fd| in the context of this task's fd table.
*/
@@ -481,6 +481,8 @@ void TraceWriter::write_frame(RecordTask* t, const Event& ev,
auto opened = e.opened[i];
o.setFd(opened.fd);
o.setPath(str_to_data(opened.path));
o.setDevice(opened.device);
o.setInode(opened.inode);
}
}
break;
@@ -617,8 +619,12 @@ TraceFrame TraceReader::read_frame() {
auto open = data.getOpenedFds();
syscall_ev.opened.resize(open.size());
for (size_t i = 0; i < open.size(); ++i) {
syscall_ev.opened[i].fd = check_fd(open[i].getFd());
syscall_ev.opened[i].path = data_to_str(open[i].getPath());
auto& opened = syscall_ev.opened[i];
const auto& o = open[i];
opened.fd = check_fd(o.getFd());
opened.path = data_to_str(o.getPath());
opened.device = o.getDevice();
opened.inode = o.getInode();
}
break;
}
@@ -834,7 +840,7 @@ static bool starts_with(const string& s, const string& with) {

TraceWriter::RecordInTrace TraceWriter::write_mapped_region(
RecordTask* t, const KernelMapping& km, const struct stat& stat,
MappingOrigin origin) {
const vector<TraceRemoteFd>& extra_fds, MappingOrigin origin) {
MallocMessageBuilder map_msg;
trace::MMap::Builder map = map_msg.initRoot<trace::MMap>();
map.setFrameTime(global_time);
@@ -851,6 +857,13 @@ TraceWriter::RecordInTrace TraceWriter::write_mapped_region(
map.setStatGid(stat.st_gid);
map.setStatSize(stat.st_size);
map.setStatMTime(stat.st_mtime);
auto fds = map.initExtraFds(extra_fds.size());
for (size_t i = 0; i < extra_fds.size(); ++i) {
auto e = fds[i];
auto& r = extra_fds[i];
e.setTid(r.tid);
e.setFd(r.fd);
}
auto src = map.getSource();
string backing_file_name;

@@ -967,7 +980,8 @@ void TraceWriter::write_mapped_region_to_alternative_stream(

KernelMapping TraceReader::read_mapped_region(MappedData* data, bool* found,
ValidateSourceFile validate,
TimeConstraint time_constraint) {
TimeConstraint time_constraint,
vector<TraceRemoteFd>* extra_fds) {
if (found) {
*found = false;
}
@@ -998,6 +1012,13 @@ KernelMapping TraceReader::read_mapped_region(MappedData* data, bool* found,
}
data->data_offset_bytes = 0;
data->file_size_bytes = map.getStatSize();
if (extra_fds) {
const auto& fds = map.getExtraFds();
for (size_t i = 0; i < fds.size(); ++i) {
const auto& f = fds[i];
extra_fds->push_back({ f.getTid(), f.getFd() });
}
}
auto src = map.getSource();
switch (src.which()) {
case trace::MMap::Source::Which::ZERO:
@@ -133,6 +133,11 @@ class TraceStream {
FrameTime global_time;
};

struct TraceRemoteFd {
pid_t tid;
int fd;
};

/**
* Trace writing takes the trace directory through a defined set of states.
* These states can be usefully observed by external programs.
@@ -193,6 +198,7 @@ class TraceWriter : public TraceStream {
*/
RecordInTrace write_mapped_region(RecordTask* t, const KernelMapping& map,
const struct stat& stat,
const std::vector<TraceRemoteFd>& extra_fds,
MappingOrigin origin = SYSCALL_MAPPING);

static void write_mapped_region_to_alternative_stream(
@@ -318,7 +324,8 @@ class TraceReader : public TraceStream {
KernelMapping read_mapped_region(
MappedData* data = nullptr, bool* found = nullptr,
ValidateSourceFile validate = VALIDATE,
TimeConstraint time_constraint = CURRENT_TIME_ONLY);
TimeConstraint time_constraint = CURRENT_TIME_ONLY,
std::vector<TraceRemoteFd>* extra_fds = nullptr);

/**
* Read a task event (clone or exec record) from the trace.
@@ -205,7 +205,7 @@ static bool try_grow_map(RecordTask* t, siginfo_t* si) {
t->vm()->map(t, new_start, it->map.start() - new_start, it->map.prot(),
it->map.flags() | MAP_ANONYMOUS, 0, string(),
KernelMapping::NO_DEVICE, KernelMapping::NO_INODE);
t->trace_writer().write_mapped_region(t, km, km.fake_stat());
t->trace_writer().write_mapped_region(t, km, km.fake_stat(), vector<TraceRemoteFd>());
// No need to flush syscallbuf here. It's safe to map these pages "early"
// before they're really needed.
t->record_event(Event::grow_map(), RecordTask::DONT_FLUSH_SYSCALLBUF);
Oops, something went wrong.

0 comments on commit f1843c4

Please sign in to comment.
You can’t perform that action at this time.