Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/batch/src/format/flags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ pub struct BatchFlags {
/// Bit 7: --dirs (-d) [protocol >= 29] - upstream: batch.c:67 `&xfer_dirs`
pub xfer_dirs: bool,
/// Bit 8: --compress (-z) [protocol >= 29] - upstream: batch.c:68 `&do_compression`
///
/// When true, the batch body contains compressed token data and the
/// reader must decompress using CPRES_ZLIB (upstream compat.c:194-195).
/// oc-rsync always writes `false` here because it records uncompressed
/// data, avoiding the upstream limitation where the batch format does
/// not record the actual compression algorithm.
pub do_compression: bool,
/// Bit 9: --iconv [protocol >= 30] - upstream: batch.c:69 `&tweaked_iconv`
pub iconv: bool,
Expand Down
18 changes: 11 additions & 7 deletions crates/batch/src/replay.rs
Original file line number Diff line number Diff line change
Expand Up @@ -920,20 +920,24 @@ fn default_xfer_sum_len(protocol_version: i32) -> usize {

/// Creates a `CompressedTokenDecoder` for batch replay.
///
/// Creates a compressed token decoder for batch replay.
///
/// Batch files do not record which compression algorithm was negotiated
/// during the original transfer. When upstream's `check_batch_flags()`
/// restores `do_compression` from the stream flags bitmap, the value is
/// just 1 (truthy). `parse_compress_choice()` then maps that to
/// `CPRES_ZLIB` (upstream compat.c:194-195), regardless of protocol
/// version.
///
/// Batch files always contain CPRES_ZLIB data because write-batch is a
/// local operation with no remote to negotiate compress-choice with.
/// Upstream's `parse_compress_choice()` falls through to
/// `do_compression = CPRES_ZLIB` (compat.c:194-195) when there is no
/// negotiated compression and no explicit `--compress-choice`.
/// This creates a known upstream limitation: if the original write-batch
/// transfer auto-negotiated zstd (rsync 3.4.1 with SUPPORT_ZSTD), the
/// batch file contains zstd-compressed tokens, but read-batch assumes
/// CPRES_ZLIB. This causes upstream rsync to fail reading its own batch
/// files with "WARNING: failed verification" on delta transfers. The
/// workaround is `--compress-choice=zlib` during write-batch.
///
/// oc-rsync avoids this issue entirely by recording uncompressed data
/// in batch files (do_compression=false in stream flags). When reading
/// upstream-written batch files, oc-rsync uses CPRES_ZLIB to match
/// upstream's read-batch behavior.
///
/// We always set zlibx=false for batch reading to match upstream:
/// - see_token() feeds matched block data into the inflate dictionary
Expand Down
46 changes: 42 additions & 4 deletions crates/core/src/client/remote/batch_support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,18 @@ pub(crate) fn build_batch_context(
preserve_hard_links: config.preserve_hard_links(),
always_checksum: config.checksum(),
xfer_dirs: config.dirs(),
// upstream tees raw (compressed) wire bytes to the batch file,
// but oc-rsync captures post-decompression data. Always false
// so replay reads uncompressed tokens correctly.
// upstream tees raw (compressed) wire bytes to the batch file
// and sets do_compression=true in stream flags. On read-batch,
// upstream's parse_compress_choice() (compat.c:194-195) maps
// do_compression=1 to CPRES_ZLIB regardless of the actual
// algorithm used during write. This causes upstream to fail
// reading its own batch files when the original transfer
// auto-negotiated zstd (rsync 3.4.1 with SUPPORT_ZSTD).
//
// oc-rsync avoids this upstream limitation by capturing
// post-decompression data. Always false so replay reads
// uncompressed tokens correctly and batch files are portable
// across all compression backends.
do_compression: false,
preserve_acls,
preserve_xattrs,
Expand Down Expand Up @@ -136,7 +145,9 @@ mod tests {
/// Verifies that `build_batch_context` always sets `do_compression=false`,
/// even when the client configuration has compression enabled. oc-rsync
/// captures post-decompression data, so the batch file body is always
/// uncompressed.
/// uncompressed. This avoids an upstream rsync 3.4.1 limitation where
/// batch files written with zstd compression are unreadable because
/// read-batch forces CPRES_ZLIB (upstream compat.c:194-195).
#[test]
fn batch_context_never_sets_do_compression() {
let config = ClientConfig::builder().compress(true).build();
Expand All @@ -154,4 +165,31 @@ mod tests {
"do_compression must be false - oc-rsync captures uncompressed data"
);
}

/// Verifies do_compression is false for all compression-related configs.
///
/// This documents that oc-rsync's batch design avoids the upstream rsync
/// 3.4.1 limitation where the batch format does not record which
/// compression algorithm was used. Upstream's read-batch always assumes
/// CPRES_ZLIB (compat.c:194-195), but the write-batch may have used
/// zstd, lz4, or zlibx - causing unreadable batch files.
#[test]
fn batch_context_do_compression_false_regardless_of_compress_config() {
for compress_enabled in [true, false] {
let config = ClientConfig::builder().compress(compress_enabled).build();
let temp = tempfile::TempDir::new().unwrap();
let path = temp.path().join("test.batch");
let batch_cfg = engine::batch::BatchConfig::new(
engine::batch::BatchMode::Write,
path.to_string_lossy().to_string(),
32,
);
let writer = Arc::new(Mutex::new(BatchWriter::new(batch_cfg).unwrap()));
let ctx = build_batch_context(&config, writer);
assert!(
!ctx.flags.do_compression,
"do_compression must always be false (compress={compress_enabled})"
);
}
}
}
9 changes: 6 additions & 3 deletions crates/transfer/src/compressed_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ use compress::zstd::CountingZstdEncoder;
/// applied on top of the multiplexed stream.
///
/// Batch recording is handled by the inner `MultiplexWriter`, not here.
/// upstream: `io.c:write_buf()` tees compressed wire bytes to `batch_fd`
/// after compression. The batch header stores `do_compression: true`
/// so replay decompresses the tokens.
/// Unlike upstream (which tees compressed wire bytes via `io.c:write_buf()`),
/// oc-rsync records data at the pre-compression level and sets
/// `do_compression: false` in the batch stream flags. This avoids an
/// upstream rsync 3.4.1 limitation where the batch format does not record
/// the compression algorithm, causing read-batch to force CPRES_ZLIB
/// (compat.c:194-195) even when the original write used zstd.
pub struct CompressedWriter<W: Write> {
/// The underlying writer (typically MultiplexWriter)
inner: W,
Expand Down
11 changes: 11 additions & 0 deletions tests/batch_interop_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,17 @@ test_upstream_to_oc() {

# =========================================================================
# Compressed batch test: upstream writes compressed, oc-rsync reads
#
# Note: upstream rsync 3.4.1 has a limitation where the batch file format
# does not record which compression algorithm was used (only that
# compression was active via bit 8 in stream flags). On read-batch,
# upstream's parse_compress_choice() (compat.c:194-195) always assumes
# CPRES_ZLIB. If the original write-batch auto-negotiated zstd (builds
# with SUPPORT_ZSTD), the batch file contains zstd data but the reader
# uses a zlib decoder, causing "WARNING: failed verification" errors.
#
# The workaround is --compress-choice=zlib during --write-batch.
# oc-rsync avoids this issue by recording uncompressed data in batch files.
# =========================================================================

test_upstream_compressed_to_oc() {
Expand Down
9 changes: 8 additions & 1 deletion tools/ci/known_failures.conf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@
# Unconditional known failures (direction:name).
# Currently empty - all remaining failures are protocol-conditional or batch-specific.
KNOWN_FAILURES=(
# (none outside protocol-specific handling below)
# upstream-compressed-batch-self-roundtrip: upstream rsync 3.4.1 cannot
# read back its own compressed delta batch files. The batch writer records
# raw compressed tokens but the reader expects uncompressed data, causing
# data corruption on replay. This is an upstream limitation, not oc-rsync.
"standalone:upstream-compressed-batch-self-roundtrip"
)

# Protocol-specific and conditional known failure rules.
Expand Down Expand Up @@ -75,4 +79,7 @@ DASHBOARD_ENTRIES=(
"oc:compress-lz4|lz4 compression (proto <= 29, upstream compat.c:556-564)|daemon"
"up:compress-zstd|zstd compression pull (proto <= 29, upstream compat.c:556-564)|daemon"
"up:compress-lz4|lz4 compression pull (proto <= 29, upstream compat.c:556-564)|daemon"
# Compressed batch delta: upstream batch writer records raw compressed tokens
# but batch reader expects uncompressed data - upstream limitation
"standalone:upstream-compressed-batch-self-roundtrip|upstream compressed batch delta self-roundtrip fails|standalone"
)
107 changes: 107 additions & 0 deletions tools/ci/run_interop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2156,6 +2156,111 @@ test_compressed_batch_delta_interop() {
return 0
}

# #1705: upstream self-roundtrip compressed delta batch verification.
# Validates that upstream rsync can read its own compressed batch files with
# delta transfers. This documents an upstream limitation: when upstream rsync
# is built with zstd support and writes a batch with -z (auto-negotiating
# zstd for local transfers), the read-batch path forces CPRES_ZLIB
# (compat.c:194-195) which cannot inflate zstd-compressed data. The batch
# file format does not record which compression algorithm was used - only
# that compression was active (bit 8 in stream flags).
#
# This test uses --compress-choice=zlib to ensure the roundtrip works.
# Without it, upstream may fail reading its own batch on zstd-enabled builds.
#
# Key upstream source references:
# batch.c:59-76 - stream flags bitmap (bit 8 = do_compression)
# compat.c:181-220 - parse_compress_choice(): batch read -> CPRES_ZLIB
# compat.c:194-195 - fallback: "else if (do_compression) do_compression = CPRES_ZLIB"
# io.c:1903,2208 - write_batch_monitor tees raw wire bytes to batch_fd
#
# oc-rsync avoids this issue entirely by recording uncompressed data in
# batch files (do_compression=false in stream flags), so oc-rsync batch
# files are always readable regardless of the compression algorithm used
# during the original transfer.
test_upstream_compressed_batch_self_roundtrip() {
local upstream_binary=$1 oc_bin=$2 src_dir=$3 work=$4 log=$5

local batch_dir="${work}/batch-upstream-self-z-delta"
rm -rf "$batch_dir"

local delta_src="${batch_dir}/src"
local delta_basis="${batch_dir}/basis"
local delta_write_dest="${batch_dir}/write-dest"
local delta_read_dest="${batch_dir}/read-dest"
local batch_file="${batch_dir}/batch-up-self-z.rsync"
mkdir -p "$delta_src" "$delta_basis" "$delta_write_dest" "$delta_read_dest"

# Create source files large enough to produce block matches with delta.
dd if=/dev/zero bs=1K count=100 2>/dev/null | tr '\0' 'B' > "$delta_src/data.bin"
printf 'CHANGED_DATA_HERE' | dd of="$delta_src/data.bin" bs=1 seek=40000 conv=notrunc 2>/dev/null

# Pre-seed write destination with slightly different version (basis for delta).
dd if=/dev/zero bs=1K count=100 2>/dev/null | tr '\0' 'B' > "$delta_write_dest/data.bin"

# Copy same basis to read destination so replay has it.
cp "$delta_write_dest/data.bin" "$delta_read_dest/data.bin"

# Step 1: upstream writes compressed delta batch with --compress-choice=zlib.
# Without --compress-choice=zlib, upstream builds with zstd support may
# auto-negotiate zstd for local transfers, producing a batch file that
# upstream itself cannot read back (CPRES_ZLIB decoder vs zstd data).
if ! timeout "$hard_timeout" "$upstream_binary" -avI -z --no-whole-file \
--compress-choice=zlib --write-batch="$batch_file" --timeout=10 \
"${delta_src}/" "${delta_write_dest}/" \
>"${log}.up-self-z-write.out" 2>"${log}.up-self-z-write.err"; then
echo " upstream --write-batch -z --no-whole-file failed (exit=$?)"
return 1
fi

if [[ ! -f "$batch_file" ]]; then
echo " batch file not created"
return 1
fi

# Step 2: upstream reads its own compressed delta batch.
local rc1=0
timeout "$hard_timeout" "$upstream_binary" -av \
--read-batch="$batch_file" --timeout=10 \
"${delta_read_dest}/" \
>"${log}.up-self-z-read.out" 2>"${log}.up-self-z-read.err" || rc1=$?
if [[ $rc1 -ne 0 ]]; then
echo " upstream failed reading its own compressed delta batch (exit=$rc1)"
head -5 "${log}.up-self-z-read.err" 2>/dev/null | sed 's/^/ stderr: /'
return 1
fi

# Step 3: verify upstream self-roundtrip.
if ! cmp -s "${delta_src}/data.bin" "${delta_read_dest}/data.bin"; then
echo " content mismatch after upstream self-roundtrip of compressed delta batch"
return 1
fi

# Step 4: verify oc-rsync can also read the same batch.
local oc_read_dest="${batch_dir}/oc-read-dest"
mkdir -p "$oc_read_dest"
# Re-seed basis for oc-rsync read.
dd if=/dev/zero bs=1K count=100 2>/dev/null | tr '\0' 'B' > "$oc_read_dest/data.bin"

local rc2=0
timeout "$hard_timeout" "$oc_bin" -av \
--read-batch="$batch_file" --timeout=10 \
"${oc_read_dest}/" \
>"${log}.up-self-z-oc-read.out" 2>"${log}.up-self-z-oc-read.err" || rc2=$?
if [[ $rc2 -ne 0 ]]; then
echo " oc-rsync failed reading upstream compressed delta batch (exit=$rc2)"
head -5 "${log}.up-self-z-oc-read.err" 2>/dev/null | sed 's/^/ stderr: /'
return 1
fi

if ! cmp -s "${delta_src}/data.bin" "${oc_read_dest}/data.bin"; then
echo " content mismatch after oc-rsync read of upstream compressed delta batch"
return 1
fi

return 0
}

# #3084: batch framing interop with multi-file varying-size transfers
# Validates that the NDX-driven batch framing produces correct upstream-compatible
# batch files when transferring many files with varying sizes. PR #3084 fixed the
Expand Down Expand Up @@ -6566,6 +6671,7 @@ run_standalone_interop_tests() {
"upstream-compressed-batch-oc-reads"
"oc-compressed-batch-upstream-reads"
"compressed-batch-delta-interop"
"upstream-compressed-batch-self-roundtrip"
"batch-framing-multifile"
"info-progress2"
"large-file-2gb"
Expand Down Expand Up @@ -6619,6 +6725,7 @@ run_standalone_interop_tests() {
"test_upstream_compressed_batch_oc_reads"
"test_oc_compressed_batch_upstream_reads"
"test_compressed_batch_delta_interop"
"test_upstream_compressed_batch_self_roundtrip"
"test_batch_framing_multifile"
"test_info_progress2"
"test_large_file_2gb"
Expand Down
Loading