Skip to content

Commit

Permalink
GCP: Updated install flow and managed infra (#185)
Browse files Browse the repository at this point in the history
  • Loading branch information
samos123 committed Aug 16, 2023
1 parent 75467f6 commit 4eb846e
Show file tree
Hide file tree
Showing 48 changed files with 2,388 additions and 1,071 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/docker-build-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta-installer.outputs.tags }}
labels: ${{ steps.meta-installer.outputs.labels }}
gcp-manager:
sci-gcp:
runs-on: ubuntu-latest
steps:
- name: Checkout
Expand All @@ -82,17 +82,17 @@ jobs:
username: "${{ secrets.DOCKERHUB_USERNAME }}"
password: "${{ secrets.DOCKERHUB_TOKEN }}"
- name: Docker meta
id: meta-gcp-manager
id: meta
uses: docker/metadata-action@v4
with:
images: substratusai/gcp-manager
images: substratusai/sci-gcp
- name: Build and push
id: build-and-push-gcp-manager
id: build-and-push-sci-gcp
uses: docker/build-push-action@v4
with:
context: .
file: Dockerfile.gcpmanager
file: Dockerfile.sci-gcp
platforms: "linux/amd64,linux/arm64"
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta-gcp-manager.outputs.tags }}
labels: ${{ steps.meta-gcp-manager.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
8 changes: 4 additions & 4 deletions Dockerfile.gcpmanager → Dockerfile.sci-gcp
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@ WORKDIR /workspace
COPY go.mod go.sum ./
RUN go mod download

COPY cmd/gcpmanager/main.go cmd/gcpmanager/main.go
COPY cmd/sci-gcp/main.go cmd/sci-gcp/main.go
COPY internal/ internal/

# Build the app
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
go build -a -o main cmd/gcpmanager/main.go
go build -a -o sci-gcp cmd/sci-gcp/main.go

FROM gcr.io/distroless/static:nonroot
WORKDIR /

# Copy the Pre-built binary file from the previous stage
COPY --from=builder /workspace/main .
COPY --from=builder /workspace/sci-gcp .
# use nobody:nogroup
USER 65532:65532
EXPOSE 10080

# run the executable
CMD ["/main"]
CMD ["/sci-gcp"]
43 changes: 15 additions & 28 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,8 @@
# Image URL to use all building/pushing image targets
VERSION ?= v0.8.1
IMG ?= docker.io/substratusai/controller-manager:${VERSION}
IMG_GCPMANAGER ?= docker.io/substratusai/gcp-manager:${VERSION}
IMG_SCI_KIND ?= docker.io/substratusai/sci-kind:${VERSION}

# Set to false if you don't want GPU nodepools created
ATTACH_GPU_NODEPOOLS=true

IMG_SCI_GCP ?= docker.io/substratusai/sci-gcp:${VERSION}

# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.26.1
Expand Down Expand Up @@ -106,21 +102,7 @@ test: manifests generate protogen fmt vet envtest ## Run tests.
test-kubectl: manifests fmt vet envtest ## Run tests.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./kubectl/internal/commands -v

.PHONY: render-skaffold-manifests
render-skaffold-manifests: envsubst ## run envsubs against skaffold manifest tesmplates
@ if [ -n ${PROJECT_ID} ]; then export PROJECT_ID=$(shell gcloud config get-value project); fi && \
envsubst < config/skaffold-dependencies.sh.tpl > config/skaffold-dependencies.sh && \
chmod +x config/skaffold-dependencies.sh && \
envsubst < config/gcpmanager/gcpmanager-dependencies.yaml.tpl > config/gcpmanager/gcpmanager-dependencies.yaml && \
envsubst < config/gcpmanager/gcpmanager-skaffold.yaml.tpl > config/gcpmanager/gcpmanager-skaffold.yaml

.PHONY: skaffold-dev-gcpmanager
skaffold-dev-gcpmanager: protoc skaffold protogen render-skaffold-manifests ## Run skaffold dev against gcpmanager
config/skaffold-dependencies.sh && \
skaffold dev -f config/gcpmanager/gcpmanager-skaffold.yaml

##@ Build

.PHONY: build
build: manifests generate fmt vet ## Build manager binary.
go build -o bin/manager cmd/controllermanager/main.go
Expand All @@ -131,11 +113,12 @@ dev-up-gcp: build-installer
-v ${HOME}/.kube:/root/.kube \
-e PROJECT=$(shell gcloud config get project) \
-e TOKEN=$(shell gcloud auth print-access-token) \
-e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \
-e INSTALL_OPERATOR=false \
substratus-installer gcp-up.sh
mkdir -p secrets
gcloud iam service-accounts keys create --iam-account=substratus-gcp-manager@$(shell gcloud config get project).iam.gserviceaccount.com ./secrets/gcp-manager-key.json
gcloud iam service-accounts keys create \
--iam-account=substratus@$(shell gcloud config get project).iam.gserviceaccount.com \
./secrets/substratus-sa.json

.PHONY: dev-down-gcp
dev-down-gcp: build-installer
Expand All @@ -145,7 +128,7 @@ dev-down-gcp: build-installer
-e TOKEN=$(shell gcloud auth print-access-token) \
-e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \
substratus-installer gcp-down.sh
rm ./secrets/gcp-manager-key.json
rm ./secrets/substratus-sa.json

.PHONY: dev-up-kind
dev-up-kind:
Expand Down Expand Up @@ -174,6 +157,11 @@ dev-skaffold-kind: skaffold
dev-down-kind:
cd install/scripts && ./kind-down.sh

.PHONY: dev-skaffold-gcp
dev-skaffold-gcp: export PROJECT_ID=$(shell gcloud config get project)
dev-skaffold-gcp: export SKAFFOLD_DEFAULT_REPO=gcr.io/${PROJECT_ID}
dev-skaffold-gcp:
skaffold dev -f skaffold.gcp.yaml

.PHONY: dev-up-aws
dev-up-aws: build-installer
Expand All @@ -199,15 +187,15 @@ dev-down-aws: build-installer
.PHONY: dev-run-gcp
# Controller manager configuration #
dev-run-gcp: export CLOUD=gcp
dev-run-gcp: export GPU_TYPE=nvidia-l4
dev-run-gcp: export PROJECT_ID=$(shell gcloud config get project)
dev-run-gcp: export CLUSTER_NAME=substratus
dev-run-gcp: export CLUSTER_LOCATION=us-central1
dev-run-gcp: export PRINCIPAL=substratus@${PROJECT_ID}.iam.gserviceaccount.com
# Cloud manager configuration #
dev-run-gcp: export GOOGLE_APPLICATION_CREDENTIALS=./secrets/gcp-manager-key.json
dev-run-gcp: export GOOGLE_APPLICATION_CREDENTIALS=./secrets/substratus-sa.json
# Run the controller manager and the cloud manager.
dev-run-gcp: manifests kustomize install-crds
go run ./cmd/gcpmanager & \
go run ./cmd/sci-gcp & \
go run ./cmd/controllermanager/main.go \
--sci-address=localhost:10080 \
--config-dump-path=/tmp/substratus-config.yaml
Expand Down Expand Up @@ -289,11 +277,10 @@ installation-scripts:
.PHONY: installation-manifests
installation-manifests: manifests kustomize
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
cd config/gcpmanager && $(KUSTOMIZE) edit set image gcp-manager=${IMG_GCPMANAGER}
cd config/sci-kind && $(KUSTOMIZE) edit set image sci=${IMG_SCI_KIND}
# TODO: Fix in another PR:
#$(KUSTOMIZE) build config/install-gcp > install/kubernetes/system.yaml
$(KUSTOMIZE) build config/install-kind > install/kubernetes/kind/system.yaml
cd config/sci-gcp && $(KUSTOMIZE) edit set image sci=${IMG_SCI_GCP}
$(KUSTOMIZE) build config/install-gcp > install/kubernetes/gcp/system.yaml

.PHONY: prepare-release
prepare-release: installation-scripts installation-manifests docs
Expand Down
11 changes: 11 additions & 0 deletions cmd/controllermanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"gopkg.in/yaml.v2"
"k8s.io/client-go/kubernetes"
_ "k8s.io/client-go/plugin/pkg/client/auth"

"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -116,6 +117,16 @@ func main() {
// Create a client using the connection
sciClient := sci.NewControllerClient(conn)

kubernetesClient, err := kubernetes.NewForConfig(mgr.GetConfig())
if err != nil {
setupLog.Error(err, "error creating K8s client-go client")
}
err = controller.AssociatePrincipalSCIServiceAccount(context.Background(), kubernetesClient, cld)
if err != nil {
setupLog.Error(err, "error associating principal to SCI K8s ServiceAccount")
os.Exit(1)
}

if err = (&controller.ModelReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Expand Down
73 changes: 0 additions & 73 deletions cmd/gcpmanager/main.go

This file was deleted.

103 changes: 103 additions & 0 deletions cmd/sci-gcp/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package main

import (
"context"
"flag"
"fmt"
"net"
"net/http"
"os"
"strconv"

ctrl "sigs.k8s.io/controller-runtime"

"cloud.google.com/go/compute/metadata"
credentials "cloud.google.com/go/iam/credentials/apiv1"
"cloud.google.com/go/storage"
"github.com/substratusai/substratus/internal/sci"
"github.com/substratusai/substratus/internal/sci/gcp"
"google.golang.org/api/iam/v1"
"google.golang.org/grpc"
"google.golang.org/grpc/health"
hv1 "google.golang.org/grpc/health/grpc_health_v1"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
)

var setupLog = ctrl.Log.WithName("setup")

func main() {
// serve by default on port 10080
var port int
flag.IntVar(&port, "port", 10080, "port number to listen on")

opts := zap.Options{
Development: true,
}
opts.BindFlags(flag.CommandLine)
flag.Parse()

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))

ctx := context.Background()
iamCredClient, err := credentials.NewIamCredentialsClient(ctx)
if err != nil {
setupLog.Error(err, "failed to create iam credentials client")
os.Exit(1)
}

iamService, err := iam.NewService(ctx)
if err != nil {
setupLog.Error(err, "failed to create iam client")
os.Exit(1)
}

storageClient, err := storage.NewClient(context.Background())
if err != nil {
setupLog.Error(err, "failed to create storage client")
os.Exit(1)
}

hc := &http.Client{}
mc := metadata.NewClient(hc)

s, err := gcp.NewServer()
if err != nil {
setupLog.Error(err, "failed to create server")
os.Exit(1)
}
s.Clients = gcp.Clients{
IAMCredentialsClient: iamCredClient,
IAM: iamService,
Metadata: mc,
Storage: storageClient,
HTTP: hc,
}
if err := s.AutoConfigure(mc); err != nil {
setupLog.Error(err, "failed to AutoConfigure server")
os.Exit(1)
}

if err := s.Validate(); err != nil {
setupLog.Error(err, "failed to validate server")
os.Exit(1)
}
gs := grpc.NewServer()
sci.RegisterControllerServer(gs, s)

// Setup Health Check
hs := health.NewServer()
hs.SetServingStatus("", hv1.HealthCheckResponse_SERVING)
hv1.RegisterHealthServer(gs, hs)

fmt.Printf("sci.gcp server listening on port %v...", port)
lis, err := net.Listen("tcp", ":"+strconv.Itoa(port))
if err != nil {
setupLog.Error(err, "failed to listen", "port", port)
os.Exit(1)
}

if err := gs.Serve(lis); err != nil {
setupLog.Error(err, "failed to serve", "port", port)
os.Exit(1)
}
}
4 changes: 2 additions & 2 deletions cmd/gcpmanager/main_test.go → cmd/sci-gcp/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ package main_test
// --upload-file the-file.tar.gz \
// $URL

// the following function was successfully used to exercise gcpmanager.Server.CreateSignedURL()
// the following function was successfully used to exercise sci.gcp.Server.CreateSignedURL()
// func invokeManually(storageClient *storage.Client) {
// payload := sci.CreateSignedURLRequest{
// BucketName: "substratus-ai-001-substratus-notebooks",
// ObjectName: "notebook.tar.gz",
// ExpirationSeconds: 300,
// }
// serv := gcpmanager.Server{
// serv := gcp.Server{
// StorageClient: storageClient,
// }
// fmt.Println("calling CreateSignedURL with payload:")
Expand Down

0 comments on commit 4eb846e

Please sign in to comment.