Skip to content

Commit

Permalink
cache bucket HEAD requests
Browse files Browse the repository at this point in the history
  • Loading branch information
BenTheElder committed Apr 26, 2022
1 parent b4351c2 commit e9bb6fe
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 14 deletions.
63 changes: 55 additions & 8 deletions cmd/archeio/app/buckets.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ limitations under the License.

package app

import "net/http"
import (
"net/http"
"sync"
)

// awsRegionToS3URL returns the base S3 bucket URL for an OCI layer blob given the AWS region
//
Expand All @@ -31,26 +34,70 @@ func awsRegionToS3URL(region string) string {

// blobChecker are used to check if a blob exists, possibly with caching
type blobChecker interface {
// layerHash may be used for caching purposes
BlobExists(blobURL, layerHash string) bool
// BlobExists should check that blobURL exists
// bucket and layerHash may be used for caching purposes
BlobExists(blobURL, bucket, layerHash string) bool
}

// simpleBlobChecker just performs an HTTP HEAD check against the blob
// cachedBlobChecker just performs an HTTP HEAD check against the blob
//
// TODO: potentially replace with a caching implementation
// should be plenty fast for now, HTTP HEAD on s3 is cheap
type simpleBlobChecker struct {
type cachedBlobChecker struct {
http.Client
blobCache
}

func (s *simpleBlobChecker) BlobExists(blobURL, layerHash string) bool {
r, err := s.Client.Head(blobURL)
func newCachedBlobChecker() *cachedBlobChecker {
return &cachedBlobChecker{
blobCache: blobCache{
cache: make(map[string]map[string]struct{}),
},
}
}

type blobCache struct {
// cache contains bucket:key for observed keys
// it is not bounded, we can afford to store all keys if need be
// and the cloud run container will spin down after an idle period
cache map[string]map[string]struct{}
lock sync.RWMutex
}

func (b *blobCache) Get(bucket, layerHash string) bool {
b.lock.RLock()
defer b.lock.RUnlock()
if m, exists := b.cache[bucket]; exists {
_, exists = m[layerHash]
return exists
}
return false
}

func (b *blobCache) Put(bucket, layerHash string) {
b.lock.Lock()
defer b.lock.Unlock()
if _, exists := b.cache[bucket]; !exists {
b.cache[bucket] = make(map[string]struct{})
}
b.cache[bucket][layerHash] = struct{}{}
}

func (c *cachedBlobChecker) BlobExists(blobURL, bucket, layerHash string) bool {
if c.blobCache.Get(bucket, layerHash) {
return true
}
r, err := c.Client.Head(blobURL)
// fallback to assuming blob is unavailable on errors
if err != nil {
return false
}
r.Body.Close()
// if the blob exists it HEAD should return 200 OK
// this is true for S3 and for OCI registries
return r.StatusCode == http.StatusOK
if r.StatusCode == http.StatusOK {
c.blobCache.Put(bucket, layerHash)
return true
}
return false
}
24 changes: 21 additions & 3 deletions cmd/archeio/app/buckets_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,42 +23,60 @@ import (
"testing"
)

func TestSimpleBlobChecker(t *testing.T) {
func TestCachedBlobChecker(t *testing.T) {
bucket := awsRegionToS3URL("us-east-1")
blobs := &simpleBlobChecker{}
blobs := newCachedBlobChecker()
testCases := []struct {
Name string
BlobURL string
Bucket string
HashKey string
ExpectExists bool
}{
{
Name: "known bucket entry",
BlobURL: bucket + "/containers/images/sha256%3Ada86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
Bucket: bucket,
HashKey: "3Ada86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
ExpectExists: true,
},
{
Name: "known bucket, bad entry",
Bucket: bucket,
BlobURL: bucket + "/c0ntainers/images/sha256%3Ada86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
ExpectExists: false,
},
{
Name: "bogus bucket on domain without webserver",
Bucket: "http://bogus.k8s.io/",
BlobURL: "http://bogus.k8s.io/foo",
HashKey: "b0guS",
ExpectExists: false,
},
}
// run test cases in parallel and then serial
// this populates the cache on the first run while doing parallel testing
// and allows us to check cached behavior on the second run
for i := range testCases {
tc := testCases[i]
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
url := tc.BlobURL
exists := blobs.BlobExists(url, tc.HashKey)
exists := blobs.BlobExists(url, tc.Bucket, tc.HashKey)
if exists != tc.ExpectExists {
t.Fatalf("expected: %v but got: %v", tc.ExpectExists, exists)
}
})
}
for i := range testCases {
tc := testCases[i]
t.Run(tc.Name, func(t *testing.T) {
url := tc.BlobURL
exists := blobs.BlobExists(url, tc.Bucket, tc.HashKey)
if exists != tc.ExpectExists {
t.Fatalf("expected: %v but got: %v", tc.ExpectExists, exists)
}
})
}

}
4 changes: 2 additions & 2 deletions cmd/archeio/app/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const (
// upstream registry should be the url to the primary registry
// archeio is fronting.
func MakeHandler(upstreamRegistry string) http.Handler {
blobs := &simpleBlobChecker{}
blobs := newCachedBlobChecker()
doV2 := makeV2Handler(upstreamRegistry, blobs)
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// all valid registry requests should be at /v2/
Expand Down Expand Up @@ -95,7 +95,7 @@ func makeV2Handler(upstreamRegistry string, blobs blobChecker) func(w http.Respo
hash := matches[1]
// this matches GCR's GCS layout, which we will use for other buckets
blobURL := bucketURL + "/containers/images/sha256%3A" + hash
if blobs.BlobExists(blobURL, hash) {
if blobs.BlobExists(blobURL, bucketURL, hash) {
// blob known to be available in S3, redirect client there
klog.V(2).InfoS("redirecting blob request to S3", "path", path)
http.Redirect(w, r, blobURL, http.StatusPermanentRedirect)
Expand Down
2 changes: 1 addition & 1 deletion cmd/archeio/app/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ type fakeBlobsChecker struct {
knownURLs map[string]bool
}

func (f *fakeBlobsChecker) BlobExists(blobURL, hashKey string) bool {
func (f *fakeBlobsChecker) BlobExists(blobURL, bucket, hashKey string) bool {
return f.knownURLs[blobURL]
}

Expand Down

0 comments on commit e9bb6fe

Please sign in to comment.