Skip to content

Commit

Permalink
Opaque Etags for compact index requests
Browse files Browse the repository at this point in the history
This changes the CompactIndexClient to store etags received from the
compact index in separate files rather than relying on the MD5 checksum
of the file as the etag.

Smoothes the upgrade from md5 etags to opaque by generating them when no
etag file exists. This should reduce the initial impact of changing the
caching behavior by reducing cache misses when the MD5 etag is the same.

Eventually, the MD5 behavior should be retired and the etag should be
considered completely opaque with no assumption that MD5 would match.
  • Loading branch information
simi authored and hsbt committed Nov 27, 2023
1 parent 794c879 commit 71a8dae
Show file tree
Hide file tree
Showing 16 changed files with 702 additions and 145 deletions.
21 changes: 14 additions & 7 deletions lib/bundler/compact_index_client.rb
Expand Up @@ -5,7 +5,13 @@

module Bundler
class CompactIndexClient
# NOTE: MD5 is here not because we expect a server to respond with it, but
# because we use it to generate the etag on first request during the upgrade
# to the compact index client that uses opaque etags saved to files.
# Remove once 2.5.0 has been out for a while.
SUPPORTED_DIGESTS = { "sha-256" => :SHA256, "md5" => :MD5 }.freeze
DEBUG_MUTEX = Thread::Mutex.new

def self.debug
return unless ENV["DEBUG_COMPACT_INDEX"]
DEBUG_MUTEX.synchronize { warn("[#{self}] #{yield}") }
Expand All @@ -14,6 +20,7 @@ def self.debug
class Error < StandardError; end

require_relative "compact_index_client/cache"
require_relative "compact_index_client/cache_file"
require_relative "compact_index_client/updater"

attr_reader :directory
Expand Down Expand Up @@ -54,13 +61,13 @@ def sequentially

def names
Bundler::CompactIndexClient.debug { "/names" }
update(@cache.names_path, "names")
update("names", @cache.names_path, @cache.names_etag_path)
@cache.names
end

def versions
Bundler::CompactIndexClient.debug { "/versions" }
update(@cache.versions_path, "versions")
update("versions", @cache.versions_path, @cache.versions_etag_path)
versions, @info_checksums_by_name = @cache.versions
versions
end
Expand All @@ -76,36 +83,36 @@ def dependencies(names)
def update_and_parse_checksums!
Bundler::CompactIndexClient.debug { "update_and_parse_checksums!" }
return @info_checksums_by_name if @parsed_checksums
update(@cache.versions_path, "versions")
update("versions", @cache.versions_path, @cache.versions_etag_path)
@info_checksums_by_name = @cache.checksums
@parsed_checksums = true
end

private

def update(local_path, remote_path)
def update(remote_path, local_path, local_etag_path)
Bundler::CompactIndexClient.debug { "update(#{local_path}, #{remote_path})" }
unless synchronize { @endpoints.add?(remote_path) }
Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" }
return
end
@updater.update(local_path, url(remote_path))
@updater.update(url(remote_path), local_path, local_etag_path)
end

def update_info(name)
Bundler::CompactIndexClient.debug { "update_info(#{name})" }
path = @cache.info_path(name)
checksum = @updater.checksum_for_file(path)
unless existing = @info_checksums_by_name[name]
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since it is missing from versions" }
return
end
checksum = SharedHelpers.checksum_for_file(path, :MD5)
if checksum == existing
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since the versions checksum matches the local checksum" }
return
end
Bundler::CompactIndexClient.debug { "updating info for #{name} since the versions checksum #{existing} != the local checksum #{checksum}" }
update(path, "info/#{name}")
update("info/#{name}", path, @cache.info_etag_path(name))
end

def url(path)
Expand Down
30 changes: 25 additions & 5 deletions lib/bundler/compact_index_client/cache.rb
Expand Up @@ -9,11 +9,8 @@ class Cache

def initialize(directory)
@directory = Pathname.new(directory).expand_path
info_roots.each do |dir|
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
end
end
info_roots.each {|dir| mkdir(dir) }
mkdir(info_etag_root)
end

def names
Expand All @@ -24,6 +21,10 @@ def names_path
directory.join("names")
end

def names_etag_path
directory.join("names.etag")
end

def versions
versions_by_name = Hash.new {|hash, key| hash[key] = [] }
info_checksums_by_name = {}
Expand All @@ -49,6 +50,10 @@ def versions_path
directory.join("versions")
end

def versions_etag_path
directory.join("versions.etag")
end

def checksums
checksums = {}

Expand Down Expand Up @@ -76,8 +81,19 @@ def info_path(name)
end
end

def info_etag_path(name)
name = name.to_s
info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}")
end

private

def mkdir(dir)
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
end
end

def lines(path)
return [] unless path.file?
lines = SharedHelpers.filesystem_access(path, :read, &:read).split("\n")
Expand All @@ -96,6 +112,10 @@ def info_roots
directory.join("info-special-characters"),
]
end

def info_etag_root
directory.join("info-etags")
end
end
end
end
153 changes: 153 additions & 0 deletions lib/bundler/compact_index_client/cache_file.rb
@@ -0,0 +1,153 @@
# frozen_string_literal: true

require_relative "../vendored_fileutils"
require "rubygems/package"

module Bundler
class CompactIndexClient
# write cache files in a way that is robust to concurrent modifications
# if digests are given, the checksums will be verified
class CacheFile
DEFAULT_FILE_MODE = 0o644
private_constant :DEFAULT_FILE_MODE

class Error < RuntimeError; end
class ClosedError < Error; end

class DigestMismatchError < Error
def initialize(digests, expected_digests)
super "Calculated checksums #{digests.inspect} did not match expected #{expected_digests.inspect}."
end
end

# Initialize with a copy of the original file, then yield the instance.
def self.copy(path, &block)
new(path) do |file|
file.initialize_digests

SharedHelpers.filesystem_access(path, :read) do
path.open("rb") do |s|
file.open {|f| IO.copy_stream(s, f) }
end
end

yield file
end
end

# Write data to a temp file, then replace the original file with it verifying the digests if given.
def self.write(path, data, digests = nil)
return unless data
new(path) do |file|
file.digests = digests
file.write(data)
end
end

attr_reader :original_path, :path

def initialize(original_path, &block)
@original_path = original_path
@perm = original_path.file? ? original_path.stat.mode : DEFAULT_FILE_MODE
@path = original_path.sub(/$/, ".#{$$}.tmp")
return unless block_given?
begin
yield self
ensure
close
end
end

def size
path.size
end

# initialize the digests using CompactIndexClient::SUPPORTED_DIGESTS, or a subset based on keys.
def initialize_digests(keys = nil)
@digests = keys ? SUPPORTED_DIGESTS.slice(*keys) : SUPPORTED_DIGESTS.dup
@digests.transform_values! {|algo_class| SharedHelpers.digest(algo_class).new }
end

# reset the digests so they don't contain any previously read data
def reset_digests
@digests&.each_value(&:reset)
end

# set the digests that will be verified at the end
def digests=(expected_digests)
@expected_digests = expected_digests

if @expected_digests.nil?
@digests = nil
elsif @digests
@digests = @digests.slice(*@expected_digests.keys)
else
initialize_digests(@expected_digests.keys)
end
end

# remove this method when we stop generating md5 digests for legacy etags
def md5
@digests && @digests["md5"]
end

def digests?
@digests&.any?
end

# Open the temp file for writing, reusing original permissions, yielding the IO object.
def open(write_mode = "wb", perm = @perm, &block)
raise ClosedError, "Cannot reopen closed file" if @closed
SharedHelpers.filesystem_access(path, :write) do
path.open(write_mode, perm) do |f|
yield digests? ? Gem::Package::DigestIO.new(f, @digests) : f
end
end
end

# Returns false without appending when no digests since appending is too error prone to do without digests.
def append(data)
return false unless digests?
open("a") {|f| f.write data }
verify && commit
end

def write(data)
reset_digests
open {|f| f.write data }
commit!
end

def commit!
verify || raise(DigestMismatchError.new(@base64digests, @expected_digests))
commit
end

# Verify the digests, returning true on match, false on mismatch.
def verify
return true unless @expected_digests && digests?
@base64digests = @digests.transform_values!(&:base64digest)
@digests = nil
@base64digests.all? {|algo, digest| @expected_digests[algo] == digest }
end

# Replace the original file with the temp file without verifying digests.
# The file is permanently closed.
def commit
raise ClosedError, "Cannot commit closed file" if @closed
SharedHelpers.filesystem_access(original_path, :write) do
FileUtils.mv(path, original_path)
end
@closed = true
end

# Remove the temp file without replacing the original file.
# The file is permanently closed.
def close
return if @closed
FileUtils.remove_file(path) if @path&.file?
@closed = true
end
end
end
end

0 comments on commit 71a8dae

Please sign in to comment.