From 71a8daecd9ad10ba8aa0d66131e326722f1d78ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josef=20=C5=A0im=C3=A1nek?= Date: Tue, 27 Jun 2023 02:36:59 +0200 Subject: [PATCH] Opaque Etags for compact index requests This changes the CompactIndexClient to store etags received from the compact index in separate files rather than relying on the MD5 checksum of the file as the etag. Smoothes the upgrade from md5 etags to opaque by generating them when no etag file exists. This should reduce the initial impact of changing the caching behavior by reducing cache misses when the MD5 etag is the same. Eventually, the MD5 behavior should be retired and the etag should be considered completely opaque with no assumption that MD5 would match. --- lib/bundler/compact_index_client.rb | 21 +- lib/bundler/compact_index_client/cache.rb | 30 ++- .../compact_index_client/cache_file.rb | 153 ++++++++++++++ lib/bundler/compact_index_client/updater.rb | 165 +++++++-------- lib/bundler/fetcher/compact_index.rb | 4 +- lib/bundler/shared_helpers.rb | 15 ++ .../compact_index_client/updater_spec.rb | 199 +++++++++++++++++- .../install/gems/compact_index_spec.rb | 125 +++++++++-- .../compact_index_checksum_mismatch.rb | 4 +- .../compact_index_concurrent_download.rb | 7 +- .../artifice/compact_index_etag_match.rb | 16 ++ .../artifice/compact_index_partial_update.rb | 2 +- ...compact_index_partial_update_bad_digest.rb | 40 ++++ ...rtial_update_no_digest_not_incremental.rb} | 12 +- .../artifice/compact_index_range_ignored.rb | 40 ++++ .../support/artifice/helpers/compact_index.rb | 14 +- 16 files changed, 702 insertions(+), 145 deletions(-) create mode 100644 lib/bundler/compact_index_client/cache_file.rb create mode 100644 spec/bundler/support/artifice/compact_index_etag_match.rb create mode 100644 spec/bundler/support/artifice/compact_index_partial_update_bad_digest.rb rename spec/bundler/support/artifice/{compact_index_partial_update_no_etag_not_incremental.rb => compact_index_partial_update_no_digest_not_incremental.rb} (72%) create mode 100644 spec/bundler/support/artifice/compact_index_range_ignored.rb diff --git a/lib/bundler/compact_index_client.rb b/lib/bundler/compact_index_client.rb index 127a50e81080aa..68e0d7e0d51ffc 100644 --- a/lib/bundler/compact_index_client.rb +++ b/lib/bundler/compact_index_client.rb @@ -5,7 +5,13 @@ module Bundler class CompactIndexClient + # NOTE: MD5 is here not because we expect a server to respond with it, but + # because we use it to generate the etag on first request during the upgrade + # to the compact index client that uses opaque etags saved to files. + # Remove once 2.5.0 has been out for a while. + SUPPORTED_DIGESTS = { "sha-256" => :SHA256, "md5" => :MD5 }.freeze DEBUG_MUTEX = Thread::Mutex.new + def self.debug return unless ENV["DEBUG_COMPACT_INDEX"] DEBUG_MUTEX.synchronize { warn("[#{self}] #{yield}") } @@ -14,6 +20,7 @@ def self.debug class Error < StandardError; end require_relative "compact_index_client/cache" + require_relative "compact_index_client/cache_file" require_relative "compact_index_client/updater" attr_reader :directory @@ -54,13 +61,13 @@ def sequentially def names Bundler::CompactIndexClient.debug { "/names" } - update(@cache.names_path, "names") + update("names", @cache.names_path, @cache.names_etag_path) @cache.names end def versions Bundler::CompactIndexClient.debug { "/versions" } - update(@cache.versions_path, "versions") + update("versions", @cache.versions_path, @cache.versions_etag_path) versions, @info_checksums_by_name = @cache.versions versions end @@ -76,36 +83,36 @@ def dependencies(names) def update_and_parse_checksums! Bundler::CompactIndexClient.debug { "update_and_parse_checksums!" } return @info_checksums_by_name if @parsed_checksums - update(@cache.versions_path, "versions") + update("versions", @cache.versions_path, @cache.versions_etag_path) @info_checksums_by_name = @cache.checksums @parsed_checksums = true end private - def update(local_path, remote_path) + def update(remote_path, local_path, local_etag_path) Bundler::CompactIndexClient.debug { "update(#{local_path}, #{remote_path})" } unless synchronize { @endpoints.add?(remote_path) } Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" } return end - @updater.update(local_path, url(remote_path)) + @updater.update(url(remote_path), local_path, local_etag_path) end def update_info(name) Bundler::CompactIndexClient.debug { "update_info(#{name})" } path = @cache.info_path(name) - checksum = @updater.checksum_for_file(path) unless existing = @info_checksums_by_name[name] Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since it is missing from versions" } return end + checksum = SharedHelpers.checksum_for_file(path, :MD5) if checksum == existing Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since the versions checksum matches the local checksum" } return end Bundler::CompactIndexClient.debug { "updating info for #{name} since the versions checksum #{existing} != the local checksum #{checksum}" } - update(path, "info/#{name}") + update("info/#{name}", path, @cache.info_etag_path(name)) end def url(path) diff --git a/lib/bundler/compact_index_client/cache.rb b/lib/bundler/compact_index_client/cache.rb index b5607c875139af..5efdf18eba5516 100644 --- a/lib/bundler/compact_index_client/cache.rb +++ b/lib/bundler/compact_index_client/cache.rb @@ -9,11 +9,8 @@ class Cache def initialize(directory) @directory = Pathname.new(directory).expand_path - info_roots.each do |dir| - SharedHelpers.filesystem_access(dir) do - FileUtils.mkdir_p(dir) - end - end + info_roots.each {|dir| mkdir(dir) } + mkdir(info_etag_root) end def names @@ -24,6 +21,10 @@ def names_path directory.join("names") end + def names_etag_path + directory.join("names.etag") + end + def versions versions_by_name = Hash.new {|hash, key| hash[key] = [] } info_checksums_by_name = {} @@ -49,6 +50,10 @@ def versions_path directory.join("versions") end + def versions_etag_path + directory.join("versions.etag") + end + def checksums checksums = {} @@ -76,8 +81,19 @@ def info_path(name) end end + def info_etag_path(name) + name = name.to_s + info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}") + end + private + def mkdir(dir) + SharedHelpers.filesystem_access(dir) do + FileUtils.mkdir_p(dir) + end + end + def lines(path) return [] unless path.file? lines = SharedHelpers.filesystem_access(path, :read, &:read).split("\n") @@ -96,6 +112,10 @@ def info_roots directory.join("info-special-characters"), ] end + + def info_etag_root + directory.join("info-etags") + end end end end diff --git a/lib/bundler/compact_index_client/cache_file.rb b/lib/bundler/compact_index_client/cache_file.rb new file mode 100644 index 00000000000000..5988bc91b3bac4 --- /dev/null +++ b/lib/bundler/compact_index_client/cache_file.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require_relative "../vendored_fileutils" +require "rubygems/package" + +module Bundler + class CompactIndexClient + # write cache files in a way that is robust to concurrent modifications + # if digests are given, the checksums will be verified + class CacheFile + DEFAULT_FILE_MODE = 0o644 + private_constant :DEFAULT_FILE_MODE + + class Error < RuntimeError; end + class ClosedError < Error; end + + class DigestMismatchError < Error + def initialize(digests, expected_digests) + super "Calculated checksums #{digests.inspect} did not match expected #{expected_digests.inspect}." + end + end + + # Initialize with a copy of the original file, then yield the instance. + def self.copy(path, &block) + new(path) do |file| + file.initialize_digests + + SharedHelpers.filesystem_access(path, :read) do + path.open("rb") do |s| + file.open {|f| IO.copy_stream(s, f) } + end + end + + yield file + end + end + + # Write data to a temp file, then replace the original file with it verifying the digests if given. + def self.write(path, data, digests = nil) + return unless data + new(path) do |file| + file.digests = digests + file.write(data) + end + end + + attr_reader :original_path, :path + + def initialize(original_path, &block) + @original_path = original_path + @perm = original_path.file? ? original_path.stat.mode : DEFAULT_FILE_MODE + @path = original_path.sub(/$/, ".#{$$}.tmp") + return unless block_given? + begin + yield self + ensure + close + end + end + + def size + path.size + end + + # initialize the digests using CompactIndexClient::SUPPORTED_DIGESTS, or a subset based on keys. + def initialize_digests(keys = nil) + @digests = keys ? SUPPORTED_DIGESTS.slice(*keys) : SUPPORTED_DIGESTS.dup + @digests.transform_values! {|algo_class| SharedHelpers.digest(algo_class).new } + end + + # reset the digests so they don't contain any previously read data + def reset_digests + @digests&.each_value(&:reset) + end + + # set the digests that will be verified at the end + def digests=(expected_digests) + @expected_digests = expected_digests + + if @expected_digests.nil? + @digests = nil + elsif @digests + @digests = @digests.slice(*@expected_digests.keys) + else + initialize_digests(@expected_digests.keys) + end + end + + # remove this method when we stop generating md5 digests for legacy etags + def md5 + @digests && @digests["md5"] + end + + def digests? + @digests&.any? + end + + # Open the temp file for writing, reusing original permissions, yielding the IO object. + def open(write_mode = "wb", perm = @perm, &block) + raise ClosedError, "Cannot reopen closed file" if @closed + SharedHelpers.filesystem_access(path, :write) do + path.open(write_mode, perm) do |f| + yield digests? ? Gem::Package::DigestIO.new(f, @digests) : f + end + end + end + + # Returns false without appending when no digests since appending is too error prone to do without digests. + def append(data) + return false unless digests? + open("a") {|f| f.write data } + verify && commit + end + + def write(data) + reset_digests + open {|f| f.write data } + commit! + end + + def commit! + verify || raise(DigestMismatchError.new(@base64digests, @expected_digests)) + commit + end + + # Verify the digests, returning true on match, false on mismatch. + def verify + return true unless @expected_digests && digests? + @base64digests = @digests.transform_values!(&:base64digest) + @digests = nil + @base64digests.all? {|algo, digest| @expected_digests[algo] == digest } + end + + # Replace the original file with the temp file without verifying digests. + # The file is permanently closed. + def commit + raise ClosedError, "Cannot commit closed file" if @closed + SharedHelpers.filesystem_access(original_path, :write) do + FileUtils.mv(path, original_path) + end + @closed = true + end + + # Remove the temp file without replacing the original file. + # The file is permanently closed. + def close + return if @closed + FileUtils.remove_file(path) if @path&.file? + @closed = true + end + end + end +end diff --git a/lib/bundler/compact_index_client/updater.rb b/lib/bundler/compact_index_client/updater.rb index 3b75d5c129b121..c4686fad7d74df 100644 --- a/lib/bundler/compact_index_client/updater.rb +++ b/lib/bundler/compact_index_client/updater.rb @@ -1,20 +1,11 @@ # frozen_string_literal: true -require_relative "../vendored_fileutils" - module Bundler class CompactIndexClient class Updater - class MisMatchedChecksumError < Error - def initialize(path, server_checksum, local_checksum) - @path = path - @server_checksum = server_checksum - @local_checksum = local_checksum - end - - def message - "The checksum of /#{@path} does not match the checksum provided by the server! Something is wrong " \ - "(local checksum is #{@local_checksum.inspect}, was expecting #{@server_checksum.inspect})." + class MismatchedChecksumError < Error + def initialize(path, message) + super "The checksum of /#{path} does not match the checksum provided by the server! Something is wrong. #{message}" end end @@ -22,100 +13,102 @@ def initialize(fetcher) @fetcher = fetcher end - def update(local_path, remote_path, retrying = nil) - headers = {} - - local_temp_path = local_path.sub(/$/, ".#{$$}") - local_temp_path = local_temp_path.sub(/$/, ".retrying") if retrying - local_temp_path = local_temp_path.sub(/$/, ".tmp") - - # first try to fetch any new bytes on the existing file - if retrying.nil? && local_path.file? - copy_file local_path, local_temp_path + def update(remote_path, local_path, etag_path) + append(remote_path, local_path, etag_path) || replace(remote_path, local_path, etag_path) + rescue CacheFile::DigestMismatchError => e + raise MismatchedChecksumError.new(remote_path, e.message) + rescue Zlib::GzipFile::Error + raise Bundler::HTTPError + end - headers["If-None-Match"] = etag_for(local_temp_path) - headers["Range"] = - if local_temp_path.size.nonzero? - # Subtract a byte to ensure the range won't be empty. - # Avoids 416 (Range Not Satisfiable) responses. - "bytes=#{local_temp_path.size - 1}-" - else - "bytes=#{local_temp_path.size}-" - end - end + private - response = @fetcher.call(remote_path, headers) - return nil if response.is_a?(Net::HTTPNotModified) + def append(remote_path, local_path, etag_path) + return false unless local_path.file? && local_path.size.nonzero? - content = response.body + CacheFile.copy(local_path) do |file| + etag = etag_path.read.tap(&:chomp!) if etag_path.file? + etag ||= generate_etag(etag_path, file) # Remove this after 2.5.0 has been out for a while. - etag = (response["ETag"] || "").gsub(%r{\AW/}, "") - correct_response = SharedHelpers.filesystem_access(local_temp_path) do - if response.is_a?(Net::HTTPPartialContent) && local_temp_path.size.nonzero? - local_temp_path.open("a") {|f| f << slice_body(content, 1..-1) } + # Subtract a byte to ensure the range won't be empty. + # Avoids 416 (Range Not Satisfiable) responses. + response = @fetcher.call(remote_path, request_headers(etag, file.size - 1)) + break true if response.is_a?(Net::HTTPNotModified) - etag_for(local_temp_path) == etag + file.digests = parse_digests(response) + # server may ignore Range and return the full response + if response.is_a?(Net::HTTPPartialContent) + break false unless file.append(response.body.byteslice(1..-1)) else - local_temp_path.open("wb") {|f| f << content } - - etag.length.zero? || etag_for(local_temp_path) == etag + file.write(response.body) end + CacheFile.write(etag_path, etag(response)) + true end + end - if correct_response - SharedHelpers.filesystem_access(local_path) do - FileUtils.mv(local_temp_path, local_path) - end - return nil - end + # request without range header to get the full file or a 304 Not Modified + def replace(remote_path, local_path, etag_path) + etag = etag_path.read.tap(&:chomp!) if etag_path.file? + response = @fetcher.call(remote_path, request_headers(etag)) + return true if response.is_a?(Net::HTTPNotModified) + CacheFile.write(local_path, response.body, parse_digests(response)) + CacheFile.write(etag_path, etag(response)) + end - if retrying - raise MisMatchedChecksumError.new(remote_path, etag, etag_for(local_temp_path)) - end + def request_headers(etag, range_start = nil) + headers = {} + headers["Range"] = "bytes=#{range_start}-" if range_start + headers["If-None-Match"] = etag if etag + headers + end - update(local_path, remote_path, :retrying) - rescue Zlib::GzipFile::Error - raise Bundler::HTTPError - ensure - FileUtils.remove_file(local_temp_path) if File.exist?(local_temp_path) + def etag_for_request(etag_path) + etag_path.read.tap(&:chomp!) if etag_path.file? end - def etag_for(path) - sum = checksum_for_file(path) - sum ? %("#{sum}") : nil + # When first releasing this opaque etag feature, we want to generate the old MD5 etag + # based on the content of the file. After that it will always use the saved opaque etag. + # This transparently saves existing users with good caches from updating a bunch of files. + # Remove this behavior after 2.5.0 has been out for a while. + def generate_etag(etag_path, file) + etag = file.md5.hexdigest + CacheFile.write(etag_path, etag) + etag end - def slice_body(body, range) - body.byteslice(range) + def etag(response) + return unless response["ETag"] + etag = response["ETag"].delete_prefix("W/") + return if etag.delete_prefix!('"') && !etag.delete_suffix!('"') + etag end - def checksum_for_file(path) - return nil unless path.file? - # This must use File.read instead of Digest.file().hexdigest - # because we need to preserve \n line endings on windows when calculating - # the checksum - SharedHelpers.filesystem_access(path, :read) do - File.open(path, "rb") do |f| - digest = SharedHelpers.digest(:MD5).new - buf = String.new(:capacity => 16_384, :encoding => Encoding::BINARY) - digest << buf while f.read(16_384, buf) - digest.hexdigest - end + # Unwraps and returns a Hash of digest algorithms and base64 values + # according to RFC 8941 Structured Field Values for HTTP. + # https://www.rfc-editor.org/rfc/rfc8941#name-parsing-a-byte-sequence + # Ignores unsupported algorithms. + def parse_digests(response) + return unless header = response["Repr-Digest"] || response["Digest"] + digests = {} + header.split(",") do |param| + algorithm, value = param.split("=", 2) + algorithm.strip! + algorithm.downcase! + next unless SUPPORTED_DIGESTS.key?(algorithm) + next unless value = byte_sequence(value) + digests[algorithm] = value end + digests.empty? ? nil : digests end - private - - def copy_file(source, dest) - SharedHelpers.filesystem_access(source, :read) do - File.open(source, "r") do |s| - SharedHelpers.filesystem_access(dest, :write) do - File.open(dest, "wb", s.stat.mode) do |f| - IO.copy_stream(s, f) - end - end - end - end + # Unwrap surrounding colons (byte sequence) + # The wrapping characters must be matched or we return nil. + # Also handles quotes because right now rubygems.org sends them. + def byte_sequence(value) + return if value.delete_prefix!(":") && !value.delete_suffix!(":") + return if value.delete_prefix!('"') && !value.delete_suffix!('"') + value end end end diff --git a/lib/bundler/fetcher/compact_index.rb b/lib/bundler/fetcher/compact_index.rb index dc30443e27a542..f0ba30c7ca687b 100644 --- a/lib/bundler/fetcher/compact_index.rb +++ b/lib/bundler/fetcher/compact_index.rb @@ -13,7 +13,7 @@ def self.compact_index_request(method_name) undef_method(method_name) define_method(method_name) do |*args, &blk| method.bind(self).call(*args, &blk) - rescue NetworkDownError, CompactIndexClient::Updater::MisMatchedChecksumError => e + rescue NetworkDownError, CompactIndexClient::Updater::MismatchedChecksumError => e raise HTTPError, e.message rescue AuthenticationRequiredError, BadAuthenticationError # Fail since we got a 401 from the server. @@ -62,7 +62,7 @@ def available? end # Read info file checksums out of /versions, so we can know if gems are up to date compact_index_client.update_and_parse_checksums! - rescue CompactIndexClient::Updater::MisMatchedChecksumError => e + rescue CompactIndexClient::Updater::MismatchedChecksumError => e Bundler.ui.debug(e.message) nil end diff --git a/lib/bundler/shared_helpers.rb b/lib/bundler/shared_helpers.rb index cccc2b63d9ee65..fa7db1c09d42d8 100644 --- a/lib/bundler/shared_helpers.rb +++ b/lib/bundler/shared_helpers.rb @@ -193,6 +193,21 @@ def digest(name) Digest(name) end + def checksum_for_file(path, digest) + return unless path.file? + # This must use File.read instead of Digest.file().hexdigest + # because we need to preserve \n line endings on windows when calculating + # the checksum + SharedHelpers.filesystem_access(path, :read) do + File.open(path, "rb") do |f| + digest = SharedHelpers.digest(digest).new + buf = String.new(:capacity => 16_384, :encoding => Encoding::BINARY) + digest << buf while f.read(16_384, buf) + digest.hexdigest + end + end + end + def write_to_gemfile(gemfile_path, contents) filesystem_access(gemfile_path) {|g| File.open(g, "w") {|file| file.puts contents } } end diff --git a/spec/bundler/bundler/compact_index_client/updater_spec.rb b/spec/bundler/bundler/compact_index_client/updater_spec.rb index fe417e392067c9..430e536ac0dfdb 100644 --- a/spec/bundler/bundler/compact_index_client/updater_spec.rb +++ b/spec/bundler/bundler/compact_index_client/updater_spec.rb @@ -6,21 +6,202 @@ require "tmpdir" RSpec.describe Bundler::CompactIndexClient::Updater do + subject(:updater) { described_class.new(fetcher) } + let(:fetcher) { double(:fetcher) } - let(:local_path) { Pathname.new Dir.mktmpdir("localpath") } + let(:local_path) { Pathname.new(Dir.mktmpdir("localpath")).join("versions") } + let(:etag_path) { Pathname.new(Dir.mktmpdir("localpath-etags")).join("versions.etag") } let(:remote_path) { double(:remote_path) } - let!(:updater) { described_class.new(fetcher) } + let(:full_body) { "abc123" } + let(:response) { double(:response, :body => full_body, :is_a? => false) } + let(:digest) { Digest::SHA256.base64digest(full_body) } + + context "when the local path does not exist" do + before do + allow(response).to receive(:[]).with("Repr-Digest") { nil } + allow(response).to receive(:[]).with("Digest") { nil } + allow(response).to receive(:[]).with("ETag") { "thisisanetag" } + end + + it "downloads the file without attempting append" do + expect(fetcher).to receive(:call).once.with(remote_path, {}) { response } + + updater.update(remote_path, local_path, etag_path) + + expect(local_path.read).to eq(full_body) + expect(etag_path.read).to eq("thisisanetag") + end + + it "fails immediately on bad checksum" do + expect(fetcher).to receive(:call).once.with(remote_path, {}) { response } + allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:baddigest:" } + + expect do + updater.update(remote_path, local_path, etag_path) + end.to raise_error(Bundler::CompactIndexClient::Updater::MismatchedChecksumError) + end + end + + context "when the local path exists" do + let(:local_body) { "abc" } + + before do + local_path.open("w") {|f| f.write(local_body) } + end + + context "with an etag" do + before do + etag_path.open("w") {|f| f.write("LocalEtag") } + end + + let(:headers) do + { + "If-None-Match" => "LocalEtag", + "Range" => "bytes=2-", + } + end + + it "does nothing if etags match" do + expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response) + allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { false } + allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { true } + + updater.update(remote_path, local_path, etag_path) + + expect(local_path.read).to eq("abc") + expect(etag_path.read).to eq("LocalEtag") + end + + it "appends the file if etags do not match" do + expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response) + allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" } + allow(response).to receive(:[]).with("ETag") { "NewEtag" } + allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { true } + allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { false } + allow(response).to receive(:body) { "c123" } + + updater.update(remote_path, local_path, etag_path) + + expect(local_path.read).to eq(full_body) + expect(etag_path.read).to eq("NewEtag") + end + + it "replaces the file if response ignores range" do + expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response) + allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" } + allow(response).to receive(:[]).with("ETag") { "NewEtag" } + allow(response).to receive(:body) { full_body } + + updater.update(remote_path, local_path, etag_path) + + expect(local_path.read).to eq(full_body) + expect(etag_path.read).to eq("NewEtag") + end + + it "tries the request again if the partial response fails digest check" do + allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:baddigest:" } + allow(response).to receive(:body) { "the beginning of the file changed" } + allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { true } + expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response) + + full_response = double(:full_response, :body => full_body, :is_a? => false) + allow(full_response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" } + allow(full_response).to receive(:[]).with("ETag") { "NewEtag" } + expect(fetcher).to receive(:call).once.with(remote_path, { "If-None-Match" => "LocalEtag" }).and_return(full_response) + + updater.update(remote_path, local_path, etag_path) + + expect(local_path.read).to eq(full_body) + expect(etag_path.read).to eq("NewEtag") + end + end + + context "without an etag file" do + let(:headers) do + { + "Range" => "bytes=2-", + # This MD5 feature should be deleted after sufficient time has passed since release. + # From then on, requests that still don't have a saved etag will be made without this header. + "If-None-Match" => Digest::MD5.hexdigest(local_body), + } + end + + it "saves only the etag_path if generated etag matches" do + expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response) + allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { false } + allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { true } + + updater.update(remote_path, local_path, etag_path) + + expect(local_path.read).to eq("abc") + expect(etag_path.read).to eq(headers["If-None-Match"]) + end + + it "appends the file" do + expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response) + allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" } + allow(response).to receive(:[]).with("ETag") { "OpaqueEtag" } + allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { true } + allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { false } + allow(response).to receive(:body) { "c123" } + + updater.update(remote_path, local_path, etag_path) + + expect(local_path.read).to eq(full_body) + expect(etag_path.read).to eq("OpaqueEtag") + end + + it "replaces the file on full file response that ignores range request" do + expect(fetcher).to receive(:call).once.with(remote_path, headers).and_return(response) + allow(response).to receive(:[]).with("Repr-Digest") { nil } + allow(response).to receive(:[]).with("Digest") { nil } + allow(response).to receive(:[]).with("ETag") { "OpaqueEtag" } + allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { false } + allow(response).to receive(:is_a?).with(Net::HTTPNotModified) { false } + allow(response).to receive(:body) { full_body } + + updater.update(remote_path, local_path, etag_path) + + expect(local_path.read).to eq(full_body) + expect(etag_path.read).to eq("OpaqueEtag") + end + + it "tries the request again if the partial response fails digest check" do + allow(response).to receive(:[]).with("Repr-Digest") { "sha-256=:baddigest:" } + allow(response).to receive(:body) { "the beginning of the file changed" } + allow(response).to receive(:is_a?).with(Net::HTTPPartialContent) { true } + expect(fetcher).to receive(:call).once.with(remote_path, headers) do + # During the failed first request, we simulate another process writing the etag. + # This ensures the second request doesn't generate the md5 etag again but just uses whatever is written. + etag_path.open("w") {|f| f.write("LocalEtag") } + response + end + + full_response = double(:full_response, :body => full_body, :is_a? => false) + allow(full_response).to receive(:[]).with("Repr-Digest") { "sha-256=:#{digest}:" } + allow(full_response).to receive(:[]).with("ETag") { "NewEtag" } + expect(fetcher).to receive(:call).once.with(remote_path, { "If-None-Match" => "LocalEtag" }).and_return(full_response) + + updater.update(remote_path, local_path, etag_path) + + expect(local_path.read).to eq(full_body) + expect(etag_path.read).to eq("NewEtag") + end + end + end context "when the ETag header is missing" do # Regression test for https://github.com/rubygems/bundler/issues/5463 - let(:response) { double(:response, :body => "abc123") } + let(:response) { double(:response, :body => full_body) } it "treats the response as an update" do - expect(response).to receive(:[]).with("ETag") { nil } + allow(response).to receive(:[]).with("Repr-Digest") { nil } + allow(response).to receive(:[]).with("Digest") { nil } + allow(response).to receive(:[]).with("ETag") { nil } expect(fetcher).to receive(:call) { response } - updater.update(local_path, remote_path) + updater.update(remote_path, local_path, etag_path) end end @@ -31,7 +212,7 @@ expect(fetcher).to receive(:call).and_raise(Zlib::GzipFile::Error) expect do - updater.update(local_path, remote_path) + updater.update(remote_path, local_path, etag_path) end.to raise_error(Bundler::HTTPError) end end @@ -46,10 +227,12 @@ begin $VERBOSE = false Encoding.default_internal = "ASCII" - expect(response).to receive(:[]).with("ETag") { nil } + allow(response).to receive(:[]).with("Repr-Digest") { nil } + allow(response).to receive(:[]).with("Digest") { nil } + allow(response).to receive(:[]).with("ETag") { nil } expect(fetcher).to receive(:call) { response } - updater.update(local_path, remote_path) + updater.update(remote_path, local_path, etag_path) ensure Encoding.default_internal = previous_internal_encoding $VERBOSE = old_verbose diff --git a/spec/bundler/install/gems/compact_index_spec.rb b/spec/bundler/install/gems/compact_index_spec.rb index b2ed9565a42fe0..b383614410b5c7 100644 --- a/spec/bundler/install/gems/compact_index_spec.rb +++ b/spec/bundler/install/gems/compact_index_spec.rb @@ -157,9 +157,8 @@ bundle :install, :verbose => true, :artifice => "compact_index_checksum_mismatch" expect(out).to include("Fetching gem metadata from #{source_uri}") - expect(out).to include <<-'WARN' -The checksum of /versions does not match the checksum provided by the server! Something is wrong (local checksum is "\"d41d8cd98f00b204e9800998ecf8427e\"", was expecting "\"123\""). - WARN + expect(out).to include("The checksum of /versions does not match the checksum provided by the server!") + expect(out).to include('Calculated checksums {"sha-256"=>"8KfZiM/fszVkqhP/m5s9lvE6M9xKu4I1bU4Izddp5Ms="} did not match expected {"sha-256"=>"ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0="}') expect(the_bundle).to include_gems "rack 1.0.0" end @@ -169,10 +168,12 @@ gem "rack" G - versions = File.join(Bundler.rubygems.user_home, ".bundle", "cache", "compact_index", - "localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions") - FileUtils.mkdir_p(File.dirname(versions)) - FileUtils.touch(versions) + versions = Pathname.new(Bundler.rubygems.user_home).join( + ".bundle", "cache", "compact_index", + "localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions" + ) + versions.dirname.mkpath + versions.write("created_at") FileUtils.chmod("-r", versions) bundle :install, :artifice => "compact_index", :raise_on_error => false @@ -772,23 +773,83 @@ def start end end - it "performs partial update with a non-empty range" do + it "performs update with etag not-modified" do + versions_etag = Pathname.new(Bundler.rubygems.user_home).join( + ".bundle", "cache", "compact_index", + "localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions.etag" + ) + expect(versions_etag.file?).to eq(false) + gemfile <<-G source "#{source_uri}" gem 'rack', '0.9.1' G - # Initial install creates the cached versions file + # Initial install creates the cached versions file and etag file bundle :install, :artifice => "compact_index" + expect(versions_etag.file?).to eq(true) + previous_content = versions_etag.binread + # Update the Gemfile so we can check subsequent install was successful gemfile <<-G source "#{source_uri}" gem 'rack', '1.0.0' G - # Second install should make only a partial request to /versions - bundle :install, :artifice => "compact_index_partial_update" + # Second install should match etag + bundle :install, :artifice => "compact_index_etag_match" + + expect(versions_etag.binread).to eq(previous_content) + expect(the_bundle).to include_gems "rack 1.0.0" + end + + it "performs full update when range is ignored" do + gemfile <<-G + source "#{source_uri}" + gem 'rack', '0.9.1' + G + + # Initial install creates the cached versions file and etag file + bundle :install, :artifice => "compact_index" + + gemfile <<-G + source "#{source_uri}" + gem 'rack', '1.0.0' + G + + versions = Pathname.new(Bundler.rubygems.user_home).join( + ".bundle", "cache", "compact_index", + "localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions" + ) + # Modify the cached file. The ranged request will be based on this but, + # in this test, the range is ignored so this gets overwritten, allowing install. + versions.write "ruining this file" + + bundle :install, :artifice => "compact_index_range_ignored" + + expect(the_bundle).to include_gems "rack 1.0.0" + end + + it "performs partial update with a non-empty range" do + build_repo4 do + build_gem "rack", "0.9.1" + end + + # Initial install creates the cached versions file + install_gemfile <<-G, :artifice => "compact_index", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s } + source "#{source_uri}" + gem 'rack', '0.9.1' + G + + update_repo4 do + build_gem "rack", "1.0.0" + end + + install_gemfile <<-G, :artifice => "compact_index_partial_update", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s } + source "#{source_uri}" + gem 'rack', '1.0.0' + G expect(the_bundle).to include_gems "rack 1.0.0" end @@ -799,19 +860,43 @@ def start gem 'rack' G - # Create an empty file to trigger a partial download - versions = File.join(Bundler.rubygems.user_home, ".bundle", "cache", "compact_index", - "localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions") - FileUtils.mkdir_p(File.dirname(versions)) - FileUtils.touch(versions) + # Create a partial cache versions file + versions = Pathname.new(Bundler.rubygems.user_home).join( + ".bundle", "cache", "compact_index", + "localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions" + ) + versions.dirname.mkpath + versions.write("created_at") bundle :install, :artifice => "compact_index_concurrent_download" - expect(File.read(versions)).to start_with("created_at") + expect(versions.read).to start_with("created_at") + expect(the_bundle).to include_gems "rack 1.0.0" + end + + it "performs a partial update that fails digest check, then a full update" do + build_repo4 do + build_gem "rack", "0.9.1" + end + + install_gemfile <<-G, :artifice => "compact_index", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s } + source "#{source_uri}" + gem 'rack', '0.9.1' + G + + update_repo4 do + build_gem "rack", "1.0.0" + end + + install_gemfile <<-G, :artifice => "compact_index_partial_update_bad_digest", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s } + source "#{source_uri}" + gem 'rack', '1.0.0' + G + expect(the_bundle).to include_gems "rack 1.0.0" end - it "performs full update if server endpoints serve partial content responses but don't have incremental content and provide no Etag" do + it "performs full update if server endpoints serve partial content responses but don't have incremental content and provide no digest" do build_repo4 do build_gem "rack", "0.9.1" end @@ -825,7 +910,7 @@ def start build_gem "rack", "1.0.0" end - install_gemfile <<-G, :artifice => "compact_index_partial_update_no_etag_not_incremental", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s } + install_gemfile <<-G, :artifice => "compact_index_partial_update_no_digest_not_incremental", :env => { "BUNDLER_SPEC_GEM_REPO" => gem_repo4.to_s } source "#{source_uri}" gem 'rack', '1.0.0' G @@ -847,7 +932,7 @@ def start expected_rack_info_content = File.read(rake_info_path) # Modify the cache files. We expect them to be reset to the normal ones when we re-run :install - File.open(rake_info_path, "w") {|f| f << (expected_rack_info_content + "this is different") } + File.open(rake_info_path, "a") {|f| f << "this is different" } # Update the Gemfile so the next install does its normal things gemfile <<-G diff --git a/spec/bundler/support/artifice/compact_index_checksum_mismatch.rb b/spec/bundler/support/artifice/compact_index_checksum_mismatch.rb index a6545b9ee4b574..83b147d2aecc96 100644 --- a/spec/bundler/support/artifice/compact_index_checksum_mismatch.rb +++ b/spec/bundler/support/artifice/compact_index_checksum_mismatch.rb @@ -4,10 +4,10 @@ class CompactIndexChecksumMismatch < CompactIndexAPI get "/versions" do - headers "ETag" => quote("123") + headers "Repr-Digest" => "sha-256=:ungWv48Bz+pBQUDeXa4iI7ADYaOWF3qctBD/YfIAFa0=:" headers "Surrogate-Control" => "max-age=2592000, stale-while-revalidate=60" content_type "text/plain" - body "" + body "content does not match the checksum" end end diff --git a/spec/bundler/support/artifice/compact_index_concurrent_download.rb b/spec/bundler/support/artifice/compact_index_concurrent_download.rb index 35548f278c8232..5d55b8a72b0d8c 100644 --- a/spec/bundler/support/artifice/compact_index_concurrent_download.rb +++ b/spec/bundler/support/artifice/compact_index_concurrent_download.rb @@ -7,11 +7,12 @@ class CompactIndexConcurrentDownload < CompactIndexAPI versions = File.join(Bundler.rubygems.user_home, ".bundle", "cache", "compact_index", "localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions") - # Verify the original (empty) content hasn't been deleted, e.g. on a retry - File.binread(versions) == "" || raise("Original file should be present and empty") + # Verify the original content hasn't been deleted, e.g. on a retry + data = File.binread(versions) + data == "created_at" || raise("Original file should be present with expected content") # Verify this is only requested once for a partial download - env["HTTP_RANGE"] || raise("Missing Range header for expected partial download") + env["HTTP_RANGE"] == "bytes=#{data.bytesize - 1}-" || raise("Missing Range header for expected partial download") # Overwrite the file in parallel, which should be then overwritten # after a successful download to prevent corruption diff --git a/spec/bundler/support/artifice/compact_index_etag_match.rb b/spec/bundler/support/artifice/compact_index_etag_match.rb new file mode 100644 index 00000000000000..08d7b5ec539129 --- /dev/null +++ b/spec/bundler/support/artifice/compact_index_etag_match.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require_relative "helpers/compact_index" + +class CompactIndexEtagMatch < CompactIndexAPI + get "/versions" do + raise "ETag header should be present" unless env["HTTP_IF_NONE_MATCH"] + headers "ETag" => env["HTTP_IF_NONE_MATCH"] + status 304 + body "" + end +end + +require_relative "helpers/artifice" + +Artifice.activate_with(CompactIndexEtagMatch) diff --git a/spec/bundler/support/artifice/compact_index_partial_update.rb b/spec/bundler/support/artifice/compact_index_partial_update.rb index 8c73011346b2ce..f111d91ef94ba8 100644 --- a/spec/bundler/support/artifice/compact_index_partial_update.rb +++ b/spec/bundler/support/artifice/compact_index_partial_update.rb @@ -23,7 +23,7 @@ def not_modified?(_checksum) # Verify that a partial request is made, starting from the index of the # final byte of the cached file. unless env["HTTP_RANGE"] == "bytes=#{File.binread(cached_versions_path).bytesize - 1}-" - raise("Range header should be present, and start from the index of the final byte of the cache.") + raise("Range header should be present, and start from the index of the final byte of the cache. #{env["HTTP_RANGE"].inspect}") end etag_response do diff --git a/spec/bundler/support/artifice/compact_index_partial_update_bad_digest.rb b/spec/bundler/support/artifice/compact_index_partial_update_bad_digest.rb new file mode 100644 index 00000000000000..72cb579ad4cac4 --- /dev/null +++ b/spec/bundler/support/artifice/compact_index_partial_update_bad_digest.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +require_relative "helpers/compact_index" + +# The purpose of this Artifice is to test that an incremental response is invalidated +# and a second request is issued for the full content. +class CompactIndexPartialUpdateBadDigest < CompactIndexAPI + def partial_update_bad_digest + response_body = yield + if request.env["HTTP_RANGE"] + headers "Repr-Digest" => "sha-256=:#{Digest::SHA256.base64digest("wrong digest on ranged request")}:" + else + headers "Repr-Digest" => "sha-256=:#{Digest::SHA256.base64digest(response_body)}:" + end + headers "Surrogate-Control" => "max-age=2592000, stale-while-revalidate=60" + content_type "text/plain" + requested_range_for(response_body) + end + + get "/versions" do + partial_update_bad_digest do + file = tmp("versions.list") + FileUtils.rm_f(file) + file = CompactIndex::VersionsFile.new(file.to_s) + file.create(gems) + file.contents([], :calculate_info_checksums => true) + end + end + + get "/info/:name" do + partial_update_bad_digest do + gem = gems.find {|g| g.name == params[:name] } + CompactIndex.info(gem ? gem.versions : []) + end + end +end + +require_relative "helpers/artifice" + +Artifice.activate_with(CompactIndexPartialUpdateBadDigest) diff --git a/spec/bundler/support/artifice/compact_index_partial_update_no_etag_not_incremental.rb b/spec/bundler/support/artifice/compact_index_partial_update_no_digest_not_incremental.rb similarity index 72% rename from spec/bundler/support/artifice/compact_index_partial_update_no_etag_not_incremental.rb rename to spec/bundler/support/artifice/compact_index_partial_update_no_digest_not_incremental.rb index 20546ba4c3a80b..ce275037417122 100644 --- a/spec/bundler/support/artifice/compact_index_partial_update_no_etag_not_incremental.rb +++ b/spec/bundler/support/artifice/compact_index_partial_update_no_digest_not_incremental.rb @@ -2,8 +2,10 @@ require_relative "helpers/compact_index" -class CompactIndexPartialUpdateNoEtagNotIncremental < CompactIndexAPI - def partial_update_no_etag +# The purpose of this Artifice is to test that an incremental response is ignored +# when the digest is not present to verify that the partial response is valid. +class CompactIndexPartialUpdateNoDigestNotIncremental < CompactIndexAPI + def partial_update_no_digest response_body = yield headers "Surrogate-Control" => "max-age=2592000, stale-while-revalidate=60" content_type "text/plain" @@ -11,7 +13,7 @@ def partial_update_no_etag end get "/versions" do - partial_update_no_etag do + partial_update_no_digest do file = tmp("versions.list") FileUtils.rm_f(file) file = CompactIndex::VersionsFile.new(file.to_s) @@ -25,7 +27,7 @@ def partial_update_no_etag end get "/info/:name" do - partial_update_no_etag do + partial_update_no_digest do gem = gems.find {|g| g.name == params[:name] } lines = CompactIndex.info(gem ? gem.versions : []).split("\n") @@ -37,4 +39,4 @@ def partial_update_no_etag require_relative "helpers/artifice" -Artifice.activate_with(CompactIndexPartialUpdateNoEtagNotIncremental) +Artifice.activate_with(CompactIndexPartialUpdateNoDigestNotIncremental) diff --git a/spec/bundler/support/artifice/compact_index_range_ignored.rb b/spec/bundler/support/artifice/compact_index_range_ignored.rb new file mode 100644 index 00000000000000..2303682c1f21f7 --- /dev/null +++ b/spec/bundler/support/artifice/compact_index_range_ignored.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +require_relative "helpers/compact_index" + +class CompactIndexRangeIgnored < CompactIndexAPI + # Stub the server to not return 304 so that we don't bypass all the logic + def not_modified?(_checksum) + false + end + + get "/versions" do + cached_versions_path = File.join( + Bundler.rubygems.user_home, ".bundle", "cache", "compact_index", + "localgemserver.test.80.dd34752a738ee965a2a4298dc16db6c5", "versions" + ) + + # Verify a cached copy of the versions file exists + unless File.binread(cached_versions_path).size > 0 + raise("Cached versions file should be present and have content") + end + + # Verify that a partial request is made, starting from the index of the + # final byte of the cached file. + unless env.delete("HTTP_RANGE") + raise("Expected client to write the full response on the first try") + end + + etag_response do + file = tmp("versions.list") + FileUtils.rm_f(file) + file = CompactIndex::VersionsFile.new(file.to_s) + file.create(gems) + file.contents + end + end +end + +require_relative "helpers/artifice" + +Artifice.activate_with(CompactIndexRangeIgnored) diff --git a/spec/bundler/support/artifice/helpers/compact_index.rb b/spec/bundler/support/artifice/helpers/compact_index.rb index dd9e94ef9b1ef8..8e7acb41a90442 100644 --- a/spec/bundler/support/artifice/helpers/compact_index.rb +++ b/spec/bundler/support/artifice/helpers/compact_index.rb @@ -4,6 +4,7 @@ $LOAD_PATH.unshift Dir[Spec::Path.base_system_gem_path.join("gems/compact_index*/lib")].first.to_s require "compact_index" +require "digest" class CompactIndexAPI < Endpoint helpers do @@ -17,9 +18,10 @@ def load_spec(name, version, platform, gem_repo) def etag_response response_body = yield - checksum = Digest(:MD5).hexdigest(response_body) - return if not_modified?(checksum) - headers "ETag" => quote(checksum) + etag = Digest::MD5.hexdigest(response_body) + return if not_modified?(etag) + headers "ETag" => quote(etag) + headers "Repr-Digest" => "sha-256=:#{Digest::SHA256.base64digest(response_body)}:" headers "Surrogate-Control" => "max-age=2592000, stale-while-revalidate=60" content_type "text/plain" requested_range_for(response_body) @@ -29,11 +31,11 @@ def etag_response raise end - def not_modified?(checksum) + def not_modified?(etag) etags = parse_etags(request.env["HTTP_IF_NONE_MATCH"]) - return unless etags.include?(checksum) - headers "ETag" => quote(checksum) + return unless etags.include?(etag) + headers "ETag" => quote(etag) status 304 body "" end