Skip to content

Commit

Permalink
Auto merge of #718 - Mark-Simulacrum:shrink-html-report, r=Mark-Simul…
Browse files Browse the repository at this point in the history
…acrum

zstd-compress archives

Archives are smaller, faster to compress, and faster to decompress. Installing zstd is pretty easy on most platforms as well, on my couple systems the default system tar is able to read .tar.zst files just fine too.

It's a win for the duration of the compression though; zstd compressing the all.tar tarball takes ~12 seconds locally vs. 2 minutes with gzip (at default settings), and zstd produces a ~500MB smaller compressed result (1.2 GB -> 705MB).
  • Loading branch information
bors committed Dec 22, 2023
2 parents 6bf2017 + 48d50f0 commit 2474dbf
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 24 deletions.
29 changes: 29 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ docsrs-metadata = { git = "https://github.com/rust-lang/docs.rs/" }
dotenv = "0.15"
failure = "0.1.3"
flate2 = "1"
zstd = "0.13.0"
http = "0.2"
hyper = "0.14"
lazy_static = "1.0"
Expand Down
45 changes: 22 additions & 23 deletions src/report/archives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::experiments::Experiment;
use crate::prelude::*;
use crate::report::{compare, Comparison, ReportWriter};
use crate::results::{EncodedLog, EncodingType, ReadResults};
use flate2::{write::GzEncoder, Compression};
use indexmap::IndexMap;
use tar::{Builder as TarBuilder, Header as TarHeader};
use tempfile::tempfile;
Expand Down Expand Up @@ -156,7 +155,7 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(
// writes to S3 (requiring buffer management etc) while avoiding keeping the blob entirely
// in memory.
let backing = tempfile()?;
let mut all = TarBuilder::new(GzEncoder::new(backing, Compression::default()));
let mut all = TarBuilder::new(zstd::stream::Encoder::new(backing, 0)?);
for entry in iterate(db, ex, crates, config) {
let entry = entry?;
let mut header = entry.header();
Expand All @@ -180,9 +179,9 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(
view = &buffer[..];
}
match dest.write_bytes(
"logs-archives/all.tar.gz",
"logs-archives/all.tar.zst",
view,
&"application/gzip".parse().unwrap(),
&"application/zstd".parse().unwrap(),
EncodingType::Plain,
) {
Ok(()) => break,
Expand All @@ -192,7 +191,7 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(
} else {
std::thread::sleep(std::time::Duration::from_secs(2));
warn!(
"retry ({}/{}) writing logs-archives/all.tar.gz ({} bytes) (error: {:?})",
"retry ({}/{}) writing logs-archives/all.tar.zst ({} bytes) (error: {:?})",
i,
RETRIES,
view.len(),
Expand All @@ -206,7 +205,7 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(

Ok(Archive {
name: "All the crates".to_string(),
path: "logs-archives/all.tar.gz".to_string(),
path: "logs-archives/all.tar.zst".to_string(),
})
}

Expand All @@ -229,22 +228,22 @@ pub fn write_logs_archives<DB: ReadResults, W: ReportWriter>(

by_comparison
.entry(entry.comparison)
.or_insert_with(|| TarBuilder::new(GzEncoder::new(Vec::new(), Compression::default())))
.or_insert_with(|| TarBuilder::new(zstd::stream::Encoder::new(Vec::new(), 3).unwrap()))
.append_data(&mut entry.header(), &entry.path, &entry.log_bytes[..])?;
}

for (comparison, archive) in by_comparison.drain(..) {
let data = archive.into_inner()?.finish()?;
dest.write_bytes(
format!("logs-archives/{comparison}.tar.gz"),
format!("logs-archives/{comparison}.tar.zst"),
&data,
&"application/gzip".parse().unwrap(),
EncodingType::Plain,
)?;

archives.push(Archive {
name: format!("{comparison} crates"),
path: format!("logs-archives/{comparison}.tar.gz"),
path: format!("logs-archives/{comparison}.tar.zst"),
});
}

Expand All @@ -261,11 +260,11 @@ mod tests {
use crate::prelude::*;
use crate::report::DummyWriter;
use crate::results::{DatabaseDB, EncodingType, FailureReason, TestResult, WriteResults};
use flate2::read::GzDecoder;
use mime::Mime;
use rustwide::logging::LogStorage;
use std::io::Read;
use tar::Archive;
use zstd::stream::Decoder;

#[test]
fn test_logs_archives_generation() {
Expand Down Expand Up @@ -355,20 +354,20 @@ mod tests {
assert_eq!(
&archives_paths,
&[
"logs-archives/all.tar.gz",
"logs-archives/regressed.tar.gz",
"logs-archives/test-pass.tar.gz",
"logs-archives/all.tar.zst",
"logs-archives/regressed.tar.zst",
"logs-archives/test-pass.tar.zst",
]
);

// Load the content of all the archives
let mime: Mime = "application/gzip".parse().unwrap();
let all_content = writer.get("logs-archives/all.tar.gz", &mime);
let mut all = Archive::new(GzDecoder::new(all_content.as_slice()));
let regressed_content = writer.get("logs-archives/regressed.tar.gz", &mime);
let mut regressed = Archive::new(GzDecoder::new(regressed_content.as_slice()));
let test_pass_content = writer.get("logs-archives/test-pass.tar.gz", &mime);
let mut test_pass = Archive::new(GzDecoder::new(test_pass_content.as_slice()));
let mime: Mime = "application/zstd".parse().unwrap();
let all_content = writer.get("logs-archives/all.tar.zst", &mime);
let mut all = Archive::new(Decoder::new(all_content.as_slice()).unwrap());
let regressed_content = writer.get("logs-archives/regressed.tar.zst", &mime);
let mut regressed = Archive::new(Decoder::new(regressed_content.as_slice()).unwrap());
let test_pass_content = writer.get("logs-archives/test-pass.tar.zst", &mime);
let mut test_pass = Archive::new(Decoder::new(test_pass_content.as_slice()).unwrap());

macro_rules! check_content {
($archive:ident: { $($file:expr => $match:expr,)* }) => {{
Expand Down Expand Up @@ -401,21 +400,21 @@ mod tests {
}}
}

// Check all.tar.gz
// Check all.tar.zst
check_content!(all: {
format!("regressed/{}/{}.txt", crate1.id(), ex.toolchains[0]) => "tc1 crate1",
format!("regressed/{}/{}.txt", crate1.id(), ex.toolchains[1]) => "tc2 crate1",
format!("test-pass/{}/{}.txt", crate2.id(), ex.toolchains[0]) => "tc1 crate2",
format!("test-pass/{}/{}.txt", crate2.id(), ex.toolchains[1]) => "tc2 crate2",
});

// Check regressed.tar.gz
// Check regressed.tar.zst
check_content!(regressed: {
format!("regressed/{}/{}.txt", crate1.id(), ex.toolchains[0]) => "tc1 crate1",
format!("regressed/{}/{}.txt", crate1.id(), ex.toolchains[1]) => "tc2 crate1",
});

// Check test-pass.tar.gz
// Check test-pass.tar.zst
check_content!(test_pass: {
format!("test-pass/{}/{}.txt", crate2.id(), ex.toolchains[0]) => "tc1 crate2",
format!("test-pass/{}/{}.txt", crate2.id(), ex.toolchains[1]) => "tc2 crate2",
Expand Down
2 changes: 1 addition & 1 deletion templates/report/downloads.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

<div class="category">
<div class="header header-background toggle" data-toggle="#downloads-archives">
Build logs (tar.gz)
Build logs (tar.zst)
</div>
<div class="crates" id="downloads-archives">
{% for archive in available_archives %}
Expand Down

0 comments on commit 2474dbf

Please sign in to comment.