Skip to content

Commit

Permalink
Simple non-image redirect server
Browse files Browse the repository at this point in the history
Copying the archeio design for non-image artifacts.  We serve hash
files (and gpg files) directly, everything else can be redirected to a
mirror.
  • Loading branch information
justinsb committed Nov 3, 2022
1 parent 81e35ad commit 4e751bd
Show file tree
Hide file tree
Showing 24 changed files with 2,340 additions and 16 deletions.
7 changes: 5 additions & 2 deletions Makefile
Expand Up @@ -56,8 +56,11 @@ all: build
# builds archeio, outputs to $(OUT_DIR)
archeio:
go build -v -o "$(OUT_DIR)/$(ARCHEIO_BINARY_NAME)" $(ARCHEIO_BUILD_FLAGS) ./cmd/archeio
# alias for building archeio
build: archeio
redirectserver:
go build -v -o "$(OUT_DIR)/redirectserver" $(ARCHEIO_BUILD_FLAGS) ./cmd/redirectserver
# alias for building archeio and redirectserver
build: archeio redirectserver

# build images to local tarball
images:
hack/make-rules/images.sh
Expand Down
11 changes: 6 additions & 5 deletions cmd/archeio/app/buckets.go
Expand Up @@ -78,8 +78,8 @@ func awsRegionToS3URL(region string) string {
}
}

// blobChecker are used to check if a blob exists, possibly with caching
type blobChecker interface {
// BlobChecker are used to check if a blob exists, possibly with caching
type BlobChecker interface {
// BlobExists should check that blobURL exists
// bucket and layerHash may be used for caching purposes
BlobExists(blobURL, bucket, layerHash string) bool
Expand All @@ -90,12 +90,13 @@ type blobChecker interface {
// TODO: potentially replace with a caching implementation
// should be plenty fast for now, HTTP HEAD on s3 is cheap
type cachedBlobChecker struct {
http.Client
httpClient *http.Client
blobCache
}

func newCachedBlobChecker() *cachedBlobChecker {
func NewCachedBlobChecker(httpClient *http.Client) BlobChecker {
return &cachedBlobChecker{
httpClient: httpClient,
blobCache: blobCache{
cache: make(map[string]map[string]struct{}),
},
Expand Down Expand Up @@ -135,7 +136,7 @@ func (c *cachedBlobChecker) BlobExists(blobURL, bucket, layerHash string) bool {
return true
}
klog.V(3).InfoS("blob existence cache miss", "url", blobURL)
r, err := c.Client.Head(blobURL)
r, err := c.httpClient.Head(blobURL)
// fallback to assuming blob is unavailable on errors
if err != nil {
return false
Expand Down
3 changes: 2 additions & 1 deletion cmd/archeio/app/buckets_integration_test.go
Expand Up @@ -20,12 +20,13 @@ limitations under the License.
package app

import (
"net/http"
"testing"
)

func TestCachedBlobChecker(t *testing.T) {
bucket := awsRegionToS3URL("us-east-1")
blobs := newCachedBlobChecker()
blobs := NewCachedBlobChecker(http.DefaultClient)
testCases := []struct {
Name string
BlobURL string
Expand Down
2 changes: 1 addition & 1 deletion cmd/archeio/app/clientip.go
Expand Up @@ -31,7 +31,7 @@ import (
// 2. behind Google Cloud LoadBalancer
//
// At this time we have no need to complicate it further
func getClientIP(r *http.Request) (netip.Addr, error) {
func GetClientIP(r *http.Request) (netip.Addr, error) {
// Upstream docs:
// https://cloud.google.com/load-balancing/docs/https#x-forwarded-for_header
//
Expand Down
2 changes: 1 addition & 1 deletion cmd/archeio/app/clientip_test.go
Expand Up @@ -88,7 +88,7 @@ func TestGetClientIP(t *testing.T) {
tc := testCases[i]
t.Run(tc.Name, func(t *testing.T) {
//t.Parallel()
ip, err := getClientIP(&tc.Request)
ip, err := GetClientIP(&tc.Request)
if err != nil {
if !tc.ExpectError {
t.Fatalf("unexpted error: %v", err)
Expand Down
6 changes: 3 additions & 3 deletions cmd/archeio/app/handlers.go
Expand Up @@ -41,7 +41,7 @@ type RegistryConfig struct {
//
// Exact behavior should be documented in docs/request-handling.md
func MakeHandler(rc RegistryConfig) http.Handler {
blobs := newCachedBlobChecker()
blobs := NewCachedBlobChecker(http.DefaultClient)
doV2 := makeV2Handler(rc, blobs)
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// only allow GET, HEAD
Expand All @@ -68,7 +68,7 @@ func MakeHandler(rc RegistryConfig) http.Handler {
})
}

func makeV2Handler(rc RegistryConfig, blobs blobChecker) func(w http.ResponseWriter, r *http.Request) {
func makeV2Handler(rc RegistryConfig, blobs BlobChecker) func(w http.ResponseWriter, r *http.Request) {
// matches blob requests, captures the requested blob hash
reBlob := regexp.MustCompile("^/v2/.*/blobs/sha256:([0-9a-f]{64})$")
// initialize map of clientIP to AWS region
Expand Down Expand Up @@ -111,7 +111,7 @@ func makeV2Handler(rc RegistryConfig, blobs blobChecker) func(w http.ResponseWri
hash := matches[1]

// for blob requests, check the client IP and determine the best backend
clientIP, err := getClientIP(r)
clientIP, err := GetClientIP(r)
if err != nil {
// this should not happen
klog.ErrorS(err, "failed to get client IP")
Expand Down
2 changes: 1 addition & 1 deletion cmd/archeio/main_test.go
Expand Up @@ -67,7 +67,7 @@ func TestIntegrationMain(t *testing.T) {
// wait for server to be up and running
startErr := <-serverErrChan
if startErr != nil {
t.Fatalf("Failed to start archeio: %v", err)
t.Fatalf("Failed to start archeio: %v", startErr)
}
if !tryUntil(time.Now().Add(time.Second), func() bool {
_, err := http.Get("http://" + testAddr + "/v2/")
Expand Down
53 changes: 53 additions & 0 deletions cmd/redirectserver/README.md
@@ -0,0 +1,53 @@
# Archeio

αρχείο (archeío) is roughly? Greek for "registry"

This binary is a custom redirect/alias server for the Kubernetes project's
OCI artifact ("docker image") hosting.

Current design details will be detailed here as they mature.

The original design doc is shared with members of
[dev@kubernetes.io](https://groups.google.com/a/kubernetes.io/g/dev),
anyone can join this list and gain access to read
[the document](https://docs.google.com/document/d/1yNQ7DaDE5LbDJf9ku82YtlKZK0tcg5Wpk9L72-x2S2k/).
It is not accessible to accounts that are not members of the Kubernetes mailinglist
due to organization constraints and joining the list is the most reliable way to gain
access. See https://git.k8s.io/community/community-membership.md

For more current details see also: https://github.com/kubernetes/k8s.io/wiki/New-Registry-url-for-Kubernetes-(registry.k8s.io)

**NOTE**: The code in this binary is **not** intended to be fully reusable,
it is the most efficient and expedient implementation of
Kubernetes SIG K8s-Infra's needs. However, some of the packages under
[`pkg/`](./../../pkg/) may be useful if you have a similar problem,
and they should be pretty generalized and re-usable.

We are looking towards making this application easily re-usable by other projects
in the future, if you're interested please reach out to SIG K8s Infra to discuss.

-----

For a rough TLDR of the current design:

- Images are hosted primarily in the existing Kubernetes [GCR](https://gcr.io/) registry
- Mirrors *of content* blobs are hosted in S3 buckets in AWS
- AWS clients are detected by client IP address and redirect to a local S3 bucket copy
*only* when requesting content blobs, *not* manifests, manifest lists, tags etc.
- All other requests are redirected to the original upstream registry

For more detail see:
- [docs/request-handling.md](./docs/request-handling.md)
- [docs/testing.md](./docs/testing.md)

In addition, in order to get the registry.k8s.io domain in place, initially this
binary is *only* serving the trivial redirect to the existing registry
(https://k8s.gcr.io), so we can safely move users / clients to the new domain
that will eventually serve the more complex version.

Development is at https://registry-sandbox.k8s.io which is *not* supported for
any usage outside of the development of this project and may or may not be
working at any given time. Changes will be deployed there before we deploy
to production, and be exercised by a subset of Kubernetes' own CI.

For AWS client-IP matching, see [`pkg/net/cidrs/aws`](./../../pkg/net/cidrs/aws)
72 changes: 72 additions & 0 deletions cmd/redirectserver/app/buckets.go
@@ -0,0 +1,72 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package app

// awsRegionToS3URL returns the base S3 bucket URL for an OCI layer blob given the AWS region
//
// blobs in the buckets should be stored at /containers/images/sha256:$hash
func awsRegionToS3URL(region string) string {
switch region {
// each of these has the region in which we have a bucket listed first
// and then additional regions we're mapping to that bucket
// based roughly on physical adjacency (and therefore _presumed_ latency)
//
// if you add a bucket, add a case for the region it is in, and consider
// shifting other regions that do not have their own bucket

// US East (N. Virginia)
case "us-east-1", "sa-east-1", "us-gov-east-1", "GLOBAL":
return "https://prod-artifacts-k8s-io-us-east-1.s3.dualstack.us-east-1.amazonaws.com"
// US East (Ohio)
case "us-east-2", "ca-central-1":
return "https://prod-artifacts-k8s-io-us-east-2.s3.dualstack.us-east-2.amazonaws.com"
// US West (N. California)
case "us-west-1", "us-gov-west-1":
return "https://prod-artifacts-k8s-io-us-west-1.s3.dualstack.us-west-1.amazonaws.com"
// US West (Oregon)
case "us-west-2", "ca-west-1":
return "https://prod-artifacts-k8s-io-us-west-2.s3.dualstack.us-west-2.amazonaws.com"
// Asia Pacific (Mumbai)
case "ap-south-1", "ap-south-2", "me-south-1", "me-central-1":
return "https://prod-artifacts-k8s-io-ap-south-1.s3.dualstack.ap-south-1.amazonaws.com"
// Asia Pacific (Tokyo)
case "ap-northeast-1", "ap-northeast-2", "ap-northeast-3":
return "https://prod-artifacts-k8s-io-ap-northeast-1.s3.dualstack.ap-northeast-1.amazonaws.com"
// Asia Pacific (Singapore)
case "ap-southeast-1", "ap-southeast-2", "ap-southeast-3", "ap-southeast-4", "ap-southeast-6", "ap-east-1", "cn-northwest-1", "cn-north-1":
return "https://prod-artifacts-k8s-io-ap-southeast-1.s3.dualstack.ap-southeast-1.amazonaws.com"
// Europe (Frankfurt)
case "eu-central-1", "eu-central-2", "eu-south-1", "eu-south-2", "il-central-1":
return "https://prod-artifacts-k8s-io-eu-central-1.s3.dualstack.eu-central-1.amazonaws.com"
// Europe (Ireland)
case "eu-west-1", "af-south-1":
return "https://prod-artifacts-k8s-io-eu-west-1.s3.dualstack.eu-west-1.amazonaws.com"
// Europe (London)
case "eu-west-2", "eu-west-3", "eu-north-1":
return "https://prod-artifacts-k8s-io-eu-west-2.s3.dualstack.eu-west-2.amazonaws.com"
default:
// TestRegionToAWSRegionToS3URL checks we return a non-empty result for all regions
// that this app knows about
//
// we will not attempt to route to a region we do now know about
//
// if we see empty string returned, then we've failed to account for all regions
//
// we want to precompute the mapping for all regions
return ""
}
}
38 changes: 38 additions & 0 deletions cmd/redirectserver/app/buckets_test.go
@@ -0,0 +1,38 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package app

import (
"testing"

"k8s.io/registry.k8s.io/pkg/net/cidrs/aws"
)

func TestRegionToAWSRegionToS3URL(t *testing.T) {
// ensure all known regions return a configured bucket
regions := aws.Regions()
for region := range regions {
url := awsRegionToS3URL(region)
if url == "" {
t.Fatalf("received empty string for known region %q url", region)
}
}
// ensure bogus region would return "" so we know above test is valid
if url := awsRegionToS3URL("nonsensical-region"); url != "" {
t.Fatalf("received non-empty URL string for made up region \"nonsensical-region\": %q", url)
}
}

0 comments on commit 4e751bd

Please sign in to comment.