Skip to content

Commit

Permalink
Support for local kind cluster (#190)
Browse files Browse the repository at this point in the history
  • Loading branch information
nstogner committed Aug 14, 2023
1 parent 581b6e2 commit 75467f6
Show file tree
Hide file tree
Showing 45 changed files with 2,691 additions and 1,723 deletions.
27 changes: 27 additions & 0 deletions Dockerfile.sci-kind
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Start from the latest go base image
FROM golang:1.19 AS builder
ARG TARGETOS=linux
ARG TARGETARCH=amd64

WORKDIR /workspace
COPY go.mod go.sum ./
RUN go mod download

COPY cmd/sci-kind/main.go cmd/sci-kind/main.go
COPY internal/ internal/

# Build the app
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
go build -a -o main cmd/sci-kind/main.go

FROM gcr.io/distroless/static:nonroot
WORKDIR /

# Copy the Pre-built binary file from the previous stage
COPY --from=builder /workspace/main .
USER root
EXPOSE 10080
EXPOSE 8080

# run the executable
CMD ["/main"]
49 changes: 44 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
VERSION ?= v0.8.1
IMG ?= docker.io/substratusai/controller-manager:${VERSION}
IMG_GCPMANAGER ?= docker.io/substratusai/gcp-manager:${VERSION}
IMG_SCI_KIND ?= docker.io/substratusai/sci-kind:${VERSION}

# Set to false if you don't want GPU nodepools created
ATTACH_GPU_NODEPOOLS=true
Expand Down Expand Up @@ -36,7 +37,11 @@ ifeq ($(UNAME_M),arm64)
SKAFFOLD_ARCH := arm64
else
PROTOC_ARCH := $(UNAME_M)
SKAFFOLD_ARCH := $(UNAME_M)
ifeq ($(UNAME_M),x86_64)
SKAFFOLD_ARCH := amd64
else
SKAFFOLD_ARCH := $(UNAME_M)
endif
endif

PROTOC_PLATFORM := $(PROTOC_OS)-$(PROTOC_ARCH)
Expand Down Expand Up @@ -142,6 +147,34 @@ dev-down-gcp: build-installer
substratus-installer gcp-down.sh
rm ./secrets/gcp-manager-key.json

.PHONY: dev-up-kind
dev-up-kind:
cd install/scripts && ./kind-up.sh

#
# TODO(nstogner): Running outside of cluster is tricky to support b/c of how substratus
# Pods need to mount the same directories as the SCI.
#
# .PHONY: dev-run-kind
# dev-run-kind:
# ...
#

.PHONY: dev-skaffold-kind
dev-skaffold-kind: skaffold
# NOTE: Installing the registry restarts containerd which causes
# skaffold to lose its connections to the Pods. To fix this, the registry is
# installed before running "skaffold dev".
$(SKAFFOLD) run -f skaffold.kind.yaml -m registry
$(SKAFFOLD) dev -f skaffold.kind.yaml -m install \
--cache-artifacts=true \
--tolerate-failures-until-deadline=true

.PHONY: dev-down-kind
dev-down-kind:
cd install/scripts && ./kind-down.sh


.PHONY: dev-up-aws
dev-up-aws: build-installer
docker run -it \
Expand Down Expand Up @@ -257,7 +290,10 @@ installation-scripts:
installation-manifests: manifests kustomize
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
cd config/gcpmanager && $(KUSTOMIZE) edit set image gcp-manager=${IMG_GCPMANAGER}
$(KUSTOMIZE) build config/default > install/kubernetes/system.yaml
cd config/sci-kind && $(KUSTOMIZE) edit set image sci=${IMG_SCI_KIND}
# TODO: Fix in another PR:
#$(KUSTOMIZE) build config/install-gcp > install/kubernetes/system.yaml
$(KUSTOMIZE) build config/install-kind > install/kubernetes/kind/system.yaml

.PHONY: prepare-release
prepare-release: installation-scripts installation-manifests docs
Expand All @@ -273,6 +309,7 @@ $(LOCALBIN):
KUSTOMIZE ?= $(LOCALBIN)/kustomize
CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
ENVTEST ?= $(LOCALBIN)/setup-envtest
SKAFFOLD ?= $(LOCALBIN)/skaffold
EMBEDMD ?= $(LOCALBIN)/embedmd
CRD_REF_DOCS ?= $(LOCALBIN)/crd-ref-docs
PROTOC ?= $(LOCALBIN)/protoc
Expand All @@ -284,6 +321,7 @@ CRD_REF_DOCS_VERSION ?= v0.0.9
PROTOC_VERSION ?= 23.4
PROTOC_GEN_GO_GRPC_VERSION ?= v1.1.0
PROTOC_GEN_GO_VERSION ?= v1.31.0
SKAFFOLD_VERSION ?= v2.6.3

KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh"
.PHONY: kustomize
Expand Down Expand Up @@ -332,11 +370,12 @@ $(PROTOC): $(LOCALBIN)
fi

.PHONY: skaffold
skaffold:
skaffold: $(SKAFFOLD)
$(SKAFFOLD): $(LOCALBIN)
@ test -s $(LOCALBIN)/skaffold || \
( curl -Lo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-$(SKAFFOLD_PLATFORM) && \
curl -Lo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-$(SKAFFOLD_PLATFORM) && \
chmod +x skaffold && \
mv skaffold $(LOCALBIN)/skaffold )
mv skaffold $(LOCALBIN)/skaffold

.PHONY: envsubst
envsubst:
Expand Down
2 changes: 1 addition & 1 deletion cmd/controllermanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func main() {
var sciAddr string
flag.StringVar(&configDumpPath, "config-dump-path", "", "The filepath to dump the running config to.")
// TODO: Change SCI Service name to be cloud-agnostic.
flag.StringVar(&sciAddr, "sci-address", "gcp-manager.substratus.svc.cluster.local:10080", "The address of the Substratus Cloud Interface server.")
flag.StringVar(&sciAddr, "sci-address", "sci.substratus.svc.cluster.local:10080", "The address of the Substratus Cloud Interface server.")
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
Expand Down
59 changes: 59 additions & 0 deletions cmd/sci-kind/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package main

import (
"flag"
"fmt"
"log"
"net"
"net/http"

"github.com/substratusai/substratus/internal/sci"
scikind "github.com/substratusai/substratus/internal/sci/kind"
"google.golang.org/grpc"
"google.golang.org/grpc/health"
hv1 "google.golang.org/grpc/health/grpc_health_v1"
)

func main() {
var cfg struct {
port int
signedURLPort int
hostSignedURLAddress string
}
flag.IntVar(&cfg.port, "port", 10080, "port number to listen on")
flag.IntVar(&cfg.signedURLPort, "signed-url-port", 8080, "port to listen for signed url traffic")
flag.StringVar(&cfg.hostSignedURLAddress, "host-signed-url-address", "http://localhost:30080",
"host address that port forwards to the signed url port within the cluster. this should be set in kind config.yaml.")
flag.Parse()

s := &scikind.Server{
SignedURLAddress: cfg.hostSignedURLAddress,
}
signedURLServer := &http.Server{
Addr: fmt.Sprintf(":%v", cfg.signedURLPort),
Handler: s,
}
go func() {
log.Printf("Listening for signed URL traffic on address: %v", cfg.signedURLPort)
log.Fatal(signedURLServer.ListenAndServe())
}()

gs := grpc.NewServer()
sci.RegisterControllerServer(gs, s)

// Setup Health Check
hs := health.NewServer()
hs.SetServingStatus("", hv1.HealthCheckResponse_SERVING)
hv1.RegisterHealthServer(gs, hs)

addr := fmt.Sprintf(":%v", cfg.port)
log.Printf("Listening for gRPC traffic on address: %v", addr)
lis, err := net.Listen("tcp", addr)
if err != nil {
log.Fatalf("failed to listen: %v", err)
}

if err := gs.Serve(lis); err != nil {
log.Fatalf("failed to serve: %v", err)
}
}
File renamed without changes.
58 changes: 58 additions & 0 deletions config/install-gcp/manager_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# This patch inject a sidecar container which is a HTTP proxy for the
# controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews.
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
spec:
template:
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values:
- amd64
- arm64
- ppc64le
- s390x
- key: kubernetes.io/os
operator: In
values:
- linux
containers:
- name: kube-rbac-proxy
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "ALL"
image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1
args:
- "--secure-listen-address=0.0.0.0:8443"
- "--upstream=http://127.0.0.1:8080/"
- "--logtostderr=true"
- "--v=0"
ports:
- containerPort: 8443
protocol: TCP
name: https
resources:
limits:
cpu: 500m
memory: 128Mi
requests:
cpu: 5m
memory: 64Mi
- name: manager
envFrom:
- configMapRef:
name: system
args:
- "--health-probe-bind-address=:8081"
- "--metrics-bind-address=127.0.0.1:8080"
- "--leader-elect"
9 changes: 9 additions & 0 deletions config/install-kind/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: system
namespace: substratus
data:
CLOUD: kind
CLUSTER_NAME: substratus
PRINCIPAL: unused
32 changes: 32 additions & 0 deletions config/install-kind/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Adds namespace to all resources.
namespace: substratus

# Labels to add to all resources and selectors.
#labels:
#- includeSelectors: true
# pairs:
# someName: someValue

resources:
- ./namespace.yaml
- ./config.yaml
- ../crd
- ../rbac
- ../manager
- ../registry-kind
- ../sci-kind
# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
# crd/kustomization.yaml
#- ../webhook
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
#- ../certmanager
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
#- ../prometheus

# Protect the /metrics endpoint by putting it behind auth.
# If you want your controller-manager to expose the /metrics
# endpoint w/o any authn/z, please comment the following line.
patches:
- path: manager_patch.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
File renamed without changes.
12 changes: 12 additions & 0 deletions config/install-kind/namespace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Namespace
metadata:
name: substratus
labels:
control-plane: controller-manager
app.kubernetes.io/name: namespace
app.kubernetes.io/instance: system
app.kubernetes.io/component: manager
app.kubernetes.io/created-by: substratus
app.kubernetes.io/part-of: substratus
app.kubernetes.io/managed-by: kustomize
3 changes: 0 additions & 3 deletions config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,3 @@ images:
- name: controller
newName: docker.io/substratusai/controller-manager
newTag: v0.8.1
- name: gcp-manager
newName: docker.io/substratusai/gcp-manager
newTag: v0.6.5-alpha
34 changes: 34 additions & 0 deletions config/registry-kind/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: configure-cri
data:
configure-cri.sh: |
#!/usr/bin/env bash
set -x
# Exit on non-existant variable.
set -u
# Exit on error.
set -e
export IMAGE_REGISTRY=$REGISTRY_PORT_5000_TCP_ADDR:5000
if ! grep -q $IMAGE_REGISTRY /mnt/etc/containerd/config.toml; then
containerd_version=$(nsenter --target 1 --mount bash -c "containerd --version | awk '{ print substr(\$3,0,4) }'")
if [ "$containerd_version" = "1.3." ] || [ "$containerd_version" = "1.4." ]; then
cat <<EOF >> /mnt/etc/containerd/config.toml
[plugins.cri.registry.configs."$IMAGE_REGISTRY"]
endpoint = ["http://$IMAGE_REGISTRY"]
EOF
else
# Correct config for containerd 1.5 and above
cat <<EOF >> /mnt/etc/containerd/config.toml
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."$IMAGE_REGISTRY"]
endpoint = ["http://$IMAGE_REGISTRY"]
EOF
fi
nsenter --target 1 --mount bash -c "systemctl is-active --quiet containerd && echo 'Restarting containerd' && systemctl restart containerd"
# Wait for containerd to be ready so that skaffold doesn't fail.
nsenter --target 1 --mount bash -c "while ! ctr -n k8s.io containers ls; do sleep 1; done"
fi
41 changes: 41 additions & 0 deletions config/registry-kind/daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: configure-cri
labels:
app: configure-cri
spec:
selector:
matchLabels:
app: configure-cri
template:
metadata:
labels:
app: configure-cri
spec:
hostPID: true
initContainers:
- name: configure-cri
image: ubuntu:22.04
command: ["/scripts/configure-cri.sh"]
volumeMounts:
- name: etc
mountPath: "/mnt/etc"
- mountPath: /scripts
name: scripts
securityContext:
privileged: true
volumes:
- name: etc
hostPath:
path: /etc
- name: scripts
configMap:
name: configure-cri
defaultMode: 0744
containers:
- name: pause
image: gcr.io/google_containers/pause
tolerations:
- effect: NoSchedule
operator: Exists

0 comments on commit 75467f6

Please sign in to comment.