From cd929187fc7e8d251dede2e0099ae0a49cb964ff Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 15 Oct 2022 17:31:58 -0400 Subject: [PATCH 1/5] Get rid of some unwrap()s. --- .changelog/433.bugfix | 1 + memapi/src/oom.rs | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) create mode 100644 .changelog/433.bugfix diff --git a/.changelog/433.bugfix b/.changelog/433.bugfix new file mode 100644 index 00000000..9a3002ff --- /dev/null +++ b/.changelog/433.bugfix @@ -0,0 +1 @@ +If /proc is in unexpected format, try to keep running anyway. This can happen, for example, on very old versions of Linux. \ No newline at end of file diff --git a/memapi/src/oom.rs b/memapi/src/oom.rs index e42d9d87..64cb926c 100644 --- a/memapi/src/oom.rs +++ b/memapi/src/oom.rs @@ -132,18 +132,18 @@ impl OutOfMemoryEstimator { } #[cfg(target_os = "linux")] -fn get_cgroup_paths<'a>(proc_cgroups: &'a str) -> Vec<&'a str> { +fn get_cgroup_paths<'a>(proc_cgroups: &'a str) -> Option> { let mut result = vec![]; for line in proc_cgroups.lines() { // TODO better error handling? let mut parts = line.splitn(3, ":"); - let subsystems = parts.nth(1).unwrap(); + let subsystems = parts.nth(1)?; if (subsystems == "") || subsystems.split(",").any(|s| s == "memory") { - let cgroup_path = parts.nth(0).unwrap().strip_prefix("/").unwrap(); + let cgroup_path = parts.nth(0)?.strip_prefix("/")?; result.push(cgroup_path); } } - result + Some(result) } /// Real system information. @@ -167,14 +167,14 @@ impl RealMemoryInfo { return None; } }; - let cgroup_paths = get_cgroup_paths(&contents); - for path in cgroup_paths { + let cgroup_paths = get_cgroup_paths(&contents)?; + if let Some(path) = cgroup_paths.into_iter().next() { let h = cgroups_rs::hierarchies::auto(); let cgroup = cgroups_rs::Cgroup::load(h, path); // Make sure memory_stat() works. Sometimes it doesn't // (https://github.com/pythonspeed/filprofiler/issues/147). If // it doesn't, this'll panic. - let mem: &cgroups_rs::memory::MemController = cgroup.controller_of().unwrap(); + let mem: &cgroups_rs::memory::MemController = cgroup.controller_of()?; let _mem = mem.memory_stat(); return Some(cgroup); } @@ -190,7 +190,7 @@ impl RealMemoryInfo { } }; Self { - cgroup: cgroup, + cgroup, process: psutil::process::Process::current().ok(), } } @@ -231,14 +231,18 @@ impl RealMemoryInfo { impl MemoryInfo for RealMemoryInfo { fn total_memory(&self) -> usize { - psutil::memory::virtual_memory().unwrap().total() as usize + psutil::memory::virtual_memory() + .map(|vm| vm.total() as usize) + .unwrap_or(0) } /// Return how much free memory we have, as bytes. fn get_available_memory(&self) -> usize { // This will include memory that can become available by syncing // filesystem buffers to disk, which is probably what we want. - let available = psutil::memory::virtual_memory().unwrap().available() as usize; + let available = psutil::memory::virtual_memory() + .map(|vm| vm.available() as usize) + .unwrap_or(std::usize::MAX); let cgroup_available = self.get_cgroup_available_memory(); std::cmp::min(available, cgroup_available) } @@ -261,8 +265,11 @@ impl MemoryInfo for RealMemoryInfo { eprintln!( "=fil-profile= cgroup (e.g. container) memory info: {:?}", if let Some(cgroup) = &self.cgroup { - let mem: &cgroups_rs::memory::MemController = cgroup.controller_of().unwrap(); - Some(mem.memory_stat()) + if let Some(mem) = &cgroup.controller_of::() { + Some(mem.memory_stat()) + } else { + None + } } else { None } From a0ed6878009c3a5cd991fb8cd158b6996bda273f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 15 Oct 2022 18:34:04 -0400 Subject: [PATCH 2/5] Clippy lints. --- filpreload/src/lib.rs | 2 +- memapi/src/oom.rs | 27 ++++++++++++++------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/filpreload/src/lib.rs b/filpreload/src/lib.rs index 26a9fc2b..aa98d81f 100644 --- a/filpreload/src/lib.rs +++ b/filpreload/src/lib.rs @@ -32,7 +32,7 @@ lazy_static! { if std::env::var("__FIL_DISABLE_OOM_DETECTION") == Ok("1".to_string()) { Box::new(InfiniteMemory {}) } else { - Box::new(RealMemoryInfo::new()) + Box::new(RealMemoryInfo::default()) } ), }); diff --git a/memapi/src/oom.rs b/memapi/src/oom.rs index 64cb926c..5700abf2 100644 --- a/memapi/src/oom.rs +++ b/memapi/src/oom.rs @@ -132,14 +132,14 @@ impl OutOfMemoryEstimator { } #[cfg(target_os = "linux")] -fn get_cgroup_paths<'a>(proc_cgroups: &'a str) -> Option> { +fn get_cgroup_paths(proc_cgroups: &str) -> Option> { let mut result = vec![]; for line in proc_cgroups.lines() { // TODO better error handling? - let mut parts = line.splitn(3, ":"); + let mut parts = line.splitn(3, ':'); let subsystems = parts.nth(1)?; - if (subsystems == "") || subsystems.split(",").any(|s| s == "memory") { - let cgroup_path = parts.nth(0)?.strip_prefix("/")?; + if subsystems.is_empty() || subsystems.split(',').any(|s| s == "memory") { + let cgroup_path = parts.next()?.strip_prefix('/')?; result.push(cgroup_path); } } @@ -156,9 +156,9 @@ pub struct RealMemoryInfo { cgroup: Option, } -impl RealMemoryInfo { +impl Default for RealMemoryInfo { #[cfg(target_os = "linux")] - pub fn new() -> Self { + fn default() -> Self { let get_cgroup = || { let contents = match read_to_string("/proc/self/cgroup") { Ok(contents) => contents, @@ -196,12 +196,14 @@ impl RealMemoryInfo { } #[cfg(target_os = "macos")] - pub fn new() -> Self { + fn default() -> Self { Self { process: psutil::process::Process::current().ok(), } } +} +impl RealMemoryInfo { #[cfg(target_os = "linux")] pub fn get_cgroup_available_memory(&self) -> usize { let mut result = std::usize::MAX; @@ -265,11 +267,10 @@ impl MemoryInfo for RealMemoryInfo { eprintln!( "=fil-profile= cgroup (e.g. container) memory info: {:?}", if let Some(cgroup) = &self.cgroup { - if let Some(mem) = &cgroup.controller_of::() { - Some(mem.memory_stat()) - } else { - None - } + cgroup + .controller_of::() + .as_ref() + .map(|mem| mem.memory_stat()) } else { None } @@ -376,7 +377,7 @@ mod tests { proptest! { // Random allocations don't break invariants #[test] - fn not_oom(allocated_sizes in prop::collection::vec(1..1000 as usize, 10..2000)) { + fn not_oom(allocated_sizes in prop::collection::vec(1..1000usize, 10..2000)) { let (mut estimator, memory_info) = setup_estimator(); let mut allocated = 0; for size in allocated_sizes { From f08f0de497361c0395df7b92a7b066177a941836 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 15 Oct 2022 18:40:41 -0400 Subject: [PATCH 3/5] Stricter tests. --- tests/test_endtoend.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/test_endtoend.py b/tests/test_endtoend.py index 2884ce01..1498cfd2 100644 --- a/tests/test_endtoend.py +++ b/tests/test_endtoend.py @@ -326,7 +326,7 @@ def test_out_of_memory_detection_disabled(): assert False, "process succeeded?!" -def get_systemd_run_args(available_memory): +def get_systemd_run_args(memory_limit): """ Figure out if we're on system with cgroups v2, or not, and return appropriate systemd-run args. @@ -340,7 +340,7 @@ def get_systemd_run_args(available_memory): "--gid", str(os.getegid()), "-p", - f"MemoryLimit={available_memory // 4}B", + f"MemoryLimit={memory_limit}B", "--scope", "--same-dir", ] @@ -364,10 +364,11 @@ def test_out_of_memory_slow_leak_cgroups(): """ available_memory = psutil.virtual_memory().available script = TEST_SCRIPTS / "oom-slow.py" + memory_limit = available_memory // 4 output_dir = profile( script, expect_exit_code=53, - argv_prefix=get_systemd_run_args(available_memory), + argv_prefix=get_systemd_run_args(memory_limit), ) time.sleep(10) # wait for child process to finish allocations = get_allocations( @@ -385,7 +386,9 @@ def test_out_of_memory_slow_leak_cgroups(): # Should've allocated at least a little before running out, unless testing # environment is _really_ restricted, in which case other tests would've # failed. - assert match(allocations, {expected_alloc: big}, as_mb) > 100 + failed_alloc_size = match(allocations, {expected_alloc: big}, lambda kb: kb * 1024) + assert failed_alloc_size > 0.7 * memory_limit + assert failed_alloc_size < 1.3 * memory_limit def test_external_behavior(): @@ -506,7 +509,7 @@ def test_jupyter(tmpdir): continue else: actual_path = key - assert actual_path != None + assert actual_path is not None assert actual_path[0][0] != actual_path[1][0] # code is in different cells path2 = ( (re.compile(".*ipy.*"), "__magic_run_with_fil", 2), From cbe3cb64f2ed169c81465f5e35ed1c7c273a0010 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Sat, 15 Oct 2022 19:05:14 -0400 Subject: [PATCH 4/5] More lenient limit. --- tests/test_endtoend.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_endtoend.py b/tests/test_endtoend.py index 1498cfd2..392c3ca2 100644 --- a/tests/test_endtoend.py +++ b/tests/test_endtoend.py @@ -383,12 +383,13 @@ def test_out_of_memory_slow_leak_cgroups(): expected_alloc = ((str(script), "", 3),) + failed_alloc_size = match(allocations, {expected_alloc: big}, lambda kb: kb * 1024) # Should've allocated at least a little before running out, unless testing # environment is _really_ restricted, in which case other tests would've # failed. - failed_alloc_size = match(allocations, {expected_alloc: big}, lambda kb: kb * 1024) - assert failed_alloc_size > 0.7 * memory_limit - assert failed_alloc_size < 1.3 * memory_limit + assert failed_alloc_size > 100 * 1024 * 1024 + # Shouldn't have allocated much beyond the limit. + assert failed_alloc_size < 1.1 * memory_limit def test_external_behavior(): From 0effe601b33a1bdc6dbd0771ce83736c58fbc68c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 19 Oct 2022 08:08:29 -0400 Subject: [PATCH 5/5] Too much memory isn't a problem; so long as we did OOM detection and didn't too it soon, that's what we care about. --- tests/test_endtoend.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/test_endtoend.py b/tests/test_endtoend.py index 392c3ca2..423c1126 100644 --- a/tests/test_endtoend.py +++ b/tests/test_endtoend.py @@ -254,6 +254,7 @@ def test_out_of_memory(): written out. """ script = TEST_SCRIPTS / "oom.py" + # Exit code 53 means out-of-memory detection was triggered output_dir = profile(script, expect_exit_code=53) time.sleep(10) # wait for child process to finish allocations = get_allocations( @@ -285,6 +286,7 @@ def test_out_of_memory_slow_leak(): written out. """ script = TEST_SCRIPTS / "oom-slow.py" + # Exit code 53 means out-of-memory detection was triggered output_dir = profile(script, expect_exit_code=53) time.sleep(10) # wait for child process to finish allocations = get_allocations( @@ -367,6 +369,7 @@ def test_out_of_memory_slow_leak_cgroups(): memory_limit = available_memory // 4 output_dir = profile( script, + # Exit code 53 means out-of-memory detection was triggered expect_exit_code=53, argv_prefix=get_systemd_run_args(memory_limit), ) @@ -384,12 +387,8 @@ def test_out_of_memory_slow_leak_cgroups(): expected_alloc = ((str(script), "", 3),) failed_alloc_size = match(allocations, {expected_alloc: big}, lambda kb: kb * 1024) - # Should've allocated at least a little before running out, unless testing - # environment is _really_ restricted, in which case other tests would've - # failed. - assert failed_alloc_size > 100 * 1024 * 1024 - # Shouldn't have allocated much beyond the limit. - assert failed_alloc_size < 1.1 * memory_limit + # We shouldn't trigger OOM detection too soon. + assert failed_alloc_size > 0.7 * memory_limit def test_external_behavior():