Skip to content

Commit

Permalink
chunking: Bin packing algorithm which allows to minimize
Browse files Browse the repository at this point in the history
layer deltas using historical builds

Revamp basic_packing to follow the prior packing structure
if the --prior-build flag exists. This simply modifies existing
layers with upgrades/downgrades/removal of packages. The last layer
contains any new addition to packages.
In the case where --prior-build flag does not exist, the frequency
of updates of the packages (frequencyinfo) and size is utilized to
segment packages into different partitions (all combinations of
low, medium, high frequency and low, medium, high size). The partition
that each package falls into is decided by its deviation from mean.
Then the packages are alloted to different layers to ensure
1) low frequency packages don't mix with high frequency packages
2) High sized packages are alloted separate bins
3) Low sized packages can be put together in the same bin
This problem is aka multi-objective bin packing problem with constraints
aka multiple knapsack problem. The objectives are conflicting given our
constraints and hence a compromise is taken to minimize layer deltas
while respecting the hard limit of overlayfs that the kernel can handle.
  • Loading branch information
RishabhSaini committed May 8, 2023
1 parent b6747d0 commit 5807950
Show file tree
Hide file tree
Showing 8 changed files with 535 additions and 74 deletions.
469 changes: 409 additions & 60 deletions lib/src/chunking.rs

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions lib/src/container/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,10 @@ pub struct PreparedImport {
}

impl PreparedImport {
/// Iterate over all layers; the ostree split object layers, the commit layer, and any non-ostree layers.
/// Iterate over all layers; the commit layer, the ostree split object layers, and any non-ostree layers.
pub fn all_layers(&self) -> impl Iterator<Item = &ManifestLayerState> {
self.ostree_layers
.iter()
.chain(std::iter::once(&self.ostree_commit_layer))
std::iter::once(&self.ostree_commit_layer)
.chain(self.ostree_layers.iter())
.chain(self.layers.iter())
}

Expand Down
3 changes: 2 additions & 1 deletion lib/src/fixture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ d tmp
"## };
pub const CONTENTS_CHECKSUM_V0: &str =
"5e41de82f9f861fa51e53ce6dd640a260e4fb29b7657f5a3f14157e93d2c0659";
pub static CONTENTS_V0_LEN: Lazy<usize> = Lazy::new(|| OWNERS.len().checked_sub(1).unwrap());
// 1 for ostree commit, 2 for max frequency packages, 3 as empty layer
pub const LAYERS_V0_LEN: usize = 3usize;

#[derive(Debug, PartialEq, Eq)]
enum SeLabel {
Expand Down
Binary file modified lib/src/fixtures/fedora-coreos-contentmeta.json.gz
Binary file not shown.
1 change: 1 addition & 0 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub mod objectsource;
pub(crate) mod objgv;
#[cfg(feature = "internal-testing-api")]
pub mod ostree_manual;
pub(crate) mod statistics;

#[cfg(feature = "docgen")]
mod docgen;
Expand Down
3 changes: 1 addition & 2 deletions lib/src/objectsource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ pub struct ObjectSourceMeta {
/// Unique identifier, does not need to be human readable, but can be.
#[serde(with = "rcstr_serialize")]
pub identifier: ContentID,
/// Identifier for this source (e.g. package name-version, git repo).
/// Unlike the [`ContentID`], this should be human readable.
/// Just the name of the package (no version), needs to be human readable.
#[serde(with = "rcstr_serialize")]
pub name: Rc<str>,
/// Identifier for the *source* of this content; for example, if multiple binary
Expand Down
109 changes: 109 additions & 0 deletions lib/src/statistics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
//! This module holds implementations of some basic statistical properties, such as mean and standard deviation.

pub(crate) fn mean(data: &[u64]) -> Option<f64> {
if data.is_empty() {
None
} else {
Some(data.iter().sum::<u64>() as f64 / data.len() as f64)
}
}

pub(crate) fn std_deviation(data: &[u64]) -> Option<f64> {
match (mean(data), data.len()) {
(Some(data_mean), count) if count > 0 => {
let variance = data
.iter()
.map(|value| {
let diff = data_mean - (*value as f64);
diff * diff
})
.sum::<f64>()
/ count as f64;
Some(variance.sqrt())
}
_ => None,
}
}

//Assumed sorted
pub(crate) fn median_absolute_deviation(data: &mut [u64]) -> Option<(f64, f64)> {
if data.is_empty() {
None
} else {
//Sort data
//data.sort_by(|a, b| a.partial_cmp(b).unwrap());

//Find median of data
let median_data: f64 = match data.len() % 2 {
1 => data[data.len() / 2] as f64,
_ => 0.5 * (data[data.len() / 2 - 1] + data[data.len() / 2]) as f64,
};

//Absolute deviations
let mut absolute_deviations = Vec::new();
for size in data {
absolute_deviations.push(f64::abs(*size as f64 - median_data))
}

absolute_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
let l = absolute_deviations.len();
let mad: f64 = match l % 2 {
1 => absolute_deviations[l / 2],
_ => 0.5 * (absolute_deviations[l / 2 - 1] + absolute_deviations[l / 2]),
};

Some((median_data, mad))
}
}

#[test]
fn test_mean() {
assert_eq!(mean(&[]), None);
for v in [0u64, 1, 5, 100] {
assert_eq!(mean(&[v]), Some(v as f64));
}
assert_eq!(mean(&[0, 1]), Some(0.5));
assert_eq!(mean(&[0, 5, 100]), Some(35.0));
assert_eq!(mean(&[7, 4, 30, 14]), Some(13.75));
}

#[test]
fn test_std_deviation() {
assert_eq!(std_deviation(&[]), None);
for v in [0u64, 1, 5, 100] {
assert_eq!(std_deviation(&[v]), Some(0 as f64));
}
assert_eq!(std_deviation(&[1, 4]), Some(1.5));
assert_eq!(std_deviation(&[2, 2, 2, 2]), Some(0.0));
assert_eq!(
std_deviation(&[1, 20, 300, 4000, 50000, 600000, 7000000, 80000000]),
Some(26193874.56387471)
);
}

#[test]
fn test_median_absolute_deviation() {
//Assumes sorted
assert_eq!(median_absolute_deviation(&mut []), None);
for v in [0u64, 1, 5, 100] {
assert_eq!(median_absolute_deviation(&mut [v]), Some((v as f64, 0.0)));
}
assert_eq!(median_absolute_deviation(&mut [1, 4]), Some((2.5, 1.5)));
assert_eq!(
median_absolute_deviation(&mut [2, 2, 2, 2]),
Some((2.0, 0.0))
);
assert_eq!(
median_absolute_deviation(&mut [
1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 9, 12, 52, 90
]),
Some((6.0, 2.0))
);

//if more than half of the data has the same value, MAD = 0, thus any
//value different from the residual median is classified as an outlier
assert_eq!(
median_absolute_deviation(&mut [0, 1, 1, 1, 1, 1, 1, 1, 0]),
Some((1.0, 0.0))
);
}
17 changes: 10 additions & 7 deletions lib/tests/it/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use std::os::unix::fs::DirBuilderExt;
use std::process::Command;
use std::time::SystemTime;

use ostree_ext::fixture::{FileDef, Fixture, CONTENTS_CHECKSUM_V0, CONTENTS_V0_LEN};
use ostree_ext::fixture::{FileDef, Fixture, CONTENTS_CHECKSUM_V0, LAYERS_V0_LEN};

const EXAMPLE_TAR_LAYER: &[u8] = include_bytes!("fixtures/hlinks.tar.gz");
const TEST_REGISTRY_DEFAULT: &str = "localhost:5000";
Expand Down Expand Up @@ -514,7 +514,7 @@ async fn impl_test_container_import_export(chunked: bool) -> Result<()> {
"/usr/bin/bash"
);

let n_chunks = if chunked { *CONTENTS_V0_LEN } else { 1 };
let n_chunks = if chunked { LAYERS_V0_LEN } else { 1 };
assert_eq!(cfg.rootfs().diff_ids().len(), n_chunks);
assert_eq!(cfg.history().len(), n_chunks);

Expand Down Expand Up @@ -617,7 +617,7 @@ fn validate_chunked_structure(oci_path: &Utf8Path) -> Result<()> {
let d = Dir::open_ambient_dir(oci_path, cap_std::ambient_authority())?;
let d = ocidir::OciDir::open(&d)?;
let manifest = d.read_manifest()?;
assert_eq!(manifest.layers().len(), *CONTENTS_V0_LEN);
assert_eq!(manifest.layers().len(), LAYERS_V0_LEN);
let ostree_layer = manifest.layers().first().unwrap();
let mut ostree_layer_blob = d
.read_blob(ostree_layer)
Expand Down Expand Up @@ -650,7 +650,7 @@ fn validate_chunked_structure(oci_path: &Utf8Path) -> Result<()> {

#[tokio::test]
async fn test_container_chunked() -> Result<()> {
let nlayers = *CONTENTS_V0_LEN - 1;
let nlayers = LAYERS_V0_LEN - 1;
let mut fixture = Fixture::new_v1()?;

let (imgref, expected_digest) = fixture.export_container().await.unwrap();
Expand Down Expand Up @@ -717,8 +717,11 @@ r usr/bin/bash bash-v0
let (first, second) = (to_fetch[0], to_fetch[1]);
assert!(first.0.commit.is_none());
assert!(second.0.commit.is_none());
assert_eq!(first.1, "testlink");
assert_eq!(second.1, "bash");
assert_eq!(
first.1,
"ostree export of commit 38ab1f9da373a0184b0b48db6e280076ab4b5d4691773475ae24825aae2272d4"
);
assert_eq!(second.1, "7 components");

assert_eq!(store::list_images(fixture.destrepo()).unwrap().len(), 1);
let n = store::count_layer_references(fixture.destrepo())? as i64;
Expand Down Expand Up @@ -792,7 +795,7 @@ r usr/bin/bash bash-v0
store::remove_images(fixture.destrepo(), [&derived_imgref.imgref]).unwrap();
assert_eq!(store::list_images(fixture.destrepo()).unwrap().len(), 0);
let n_removed = store::gc_image_layers(fixture.destrepo())?;
assert_eq!(n_removed, (*CONTENTS_V0_LEN + 1) as u32);
assert_eq!(n_removed, (LAYERS_V0_LEN + 1) as u32);

// Repo should be clean now
assert_eq!(store::count_layer_references(fixture.destrepo())?, 0);
Expand Down

0 comments on commit 5807950

Please sign in to comment.