From 4da3427eddda82113c19aac6b06f1be211ecd555 Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Fri, 6 Apr 2018 12:30:48 -0700 Subject: [PATCH 1/4] Initial ksonnet package for Pachyderm. * Packages were created based on the helm chart https://github.com/kubernetes/charts/tree/master/stable/pachyderm/templates * Instructions for adding new packages is being added in #609. * I've confirmed that the resulting components can successfully be deployed but I don't know if Pachyderm is working yet. * We most likely need to expose more options e.g. to control things like GCS bucket. * Related to #151 --- kubeflow/pachyderm/all.libsonnet | 446 ++++++++++++++++++ kubeflow/pachyderm/parts.yaml | 22 + .../pachyderm/prototypes/pachyderm.jsonnet | 11 + kubeflow/registry.yaml | 8 +- 4 files changed, 486 insertions(+), 1 deletion(-) create mode 100644 kubeflow/pachyderm/all.libsonnet create mode 100644 kubeflow/pachyderm/parts.yaml create mode 100644 kubeflow/pachyderm/prototypes/pachyderm.jsonnet diff --git a/kubeflow/pachyderm/all.libsonnet b/kubeflow/pachyderm/all.libsonnet new file mode 100644 index 00000000000..43e25e136bc --- /dev/null +++ b/kubeflow/pachyderm/all.libsonnet @@ -0,0 +1,446 @@ +{ + // Various prototypes we want to support. + + // TODO(jlewi): Need to configure the storage backend; e.g. to use GCS I think its + // using LOCAL right now. + // + // Also The docs http://pachyderm.readthedocs.io/en/latest/deployment/google_cloud_platform.html + // say it should be creating a storage class but I don't see that. + + + // All pachyderm components + all(params, env):: [ + $.parts(params, env).secret, + $.parts(params, env).serviceAccount, + $.parts(params, env).role, + $.parts(params, env).roleBinding, + $.parts(params, env).etcdService, + $.parts(params, env).pachydService, + $.parts(params, env).etcd, + $.parts(params, env).pachyd, + ], + + // Parts should be a dictionary containing jsonnet representations of the various + // K8s resources used to construct the prototypes listed above. + parts(params, env):: { + // All ksonnet environments are associated with a namespace and we + // generally want to use that namespace for a component. + // However, in some cases an application may use multiple namespaces in which + // case the namespace for a particular component will be a parameter. + local namespace = if std.objectHas(params, "namespace") then params.namespace else env.namespace, + + secret:: { + apiVersion: "v1", + data: null, + kind: "Secret", + metadata: { + labels: { + app: params.name + "-pachyderm", + }, + name: "pachyderm-storage-secret", + namespace: namespace, + }, + }, + + serviceAccount:: { + apiVersion: "v1", + kind: "ServiceAccount", + metadata: { + labels: { + app: params.name + "-pachyderm", + chart: "pachyderm-0.1.6", + suite: "pachyderm", + }, + name: "pachyderm", + namespace: namespace, + }, + }, + + role:: { + apiVersion: "rbac.authorization.k8s.io/v1beta1", + kind: "Role", + metadata: { + creationTimestamp: null, + labels: { + app: "", + suite: "pachyderm", + }, + name: "pachyderm", + namespace: namespace, + }, + rules: [ + { + apiGroups: [ + "", + ], + resources: [ + "nodes", + "pods", + "pods/log", + "endpoints", + ], + verbs: [ + "get", + "list", + "watch", + ], + }, + { + apiGroups: [ + "", + ], + resources: [ + "replicationcontrollers", + "services", + ], + verbs: [ + "get", + "list", + "watch", + "create", + "update", + "delete", + ], + }, + { + apiGroups: [ + "", + ], + resourceNames: [ + "pachyderm-storage-secret", + ], + resources: [ + "secrets", + ], + verbs: [ + "get", + "list", + "watch", + "create", + "update", + "delete", + ], + }, + ], + }, + + roleBinding:: { + apiVersion: "rbac.authorization.k8s.io/v1beta1", + kind: "RoleBinding", + metadata: { + name: "pachyderm", + namespace: namespace, + }, + roleRef: { + apiGroup: "", + kind: "Role", + name: "pachyderm", + }, + subjects: [ + { + kind: "ServiceAccount", + name: "pachyderm", + namespace: "kubeflow", + }, + ], + }, + + etcdService:: { + apiVersion: "v1", + kind: "Service", + metadata: { + labels: { + app: params.name + "-etcd", + suite: "pachyderm", + }, + name: "etcd", + namespace: namespace, + }, + spec: { + ports: [ + { + name: "client-port", + port: 2379, + targetPort: 0, + }, + ], + selector: { + app: params.name + "-etcd", + }, + type: "NodePort", + }, + }, + + pachydService:: { + apiVersion: "v1", + kind: "Service", + metadata: { + labels: { + app: params.name + "-pachd", + chart: "pachyderm-0.1.6", + suite: "pachyderm", + }, + name: "pachd", + namespace: namespace, + }, + spec: { + ports: [ + { + name: "api-grpc-port", + nodePort: 30650, + port: 650, + targetPort: 650, + }, + { + name: "trace-port", + nodePort: 30651, + port: 651, + targetPort: 651, + }, + { + name: "api-http-port", + nodePort: 30652, + port: 652, + targetPort: 652, + }, + ], + selector: { + app: "pachd", + }, + type: "NodePort", + }, + }, + + etcd:: { + apiVersion: "extensions/v1beta1", + kind: "Deployment", + metadata: { + labels: { + app: params.name + "-etcd", + release: "RELEASE-NAME", + suite: "pachyderm", + }, + name: "etcd", + namespace: namespace, + }, + spec: { + replicas: 1, + selector: { + matchLabels: { + app: params.name + "-etcd", + suite: "pachyderm", + }, + }, + template: { + metadata: { + labels: { + app: params.name + "-etcd", + suite: "pachyderm", + }, + name: "etcd", + }, + spec: { + containers: [ + { + command: [ + "/usr/local/bin/etcd", + "--listen-client-urls=http://0.0.0.0:2379", + "--advertise-client-urls=http://0.0.0.0:2379", + "--data-dir=/var/data/etcd", + "--auto-compaction-retention=1", + ], + image: "pachyderm/etcd:v3.2.7", + imagePullPolicy: "IfNotPresent", + name: "etcd", + ports: [ + { + containerPort: 2379, + name: "client-port", + }, + { + containerPort: 2380, + name: "peer-port", + }, + ], + resources: { + requests: { + cpu: "250m", + memory: "256M", + }, + }, + volumeMounts: [ + { + mountPath: "/var/data/etcd", + name: "etcdvol", + }, + ], + }, + ], + volumes: [ + { + hostPath: { + path: "/var/pachyderm/etcd", + }, + name: "etcdvol", + }, + ], + }, + }, + }, + }, + + pachyd:: { + apiVersion: "extensions/v1beta1", + kind: "Deployment", + metadata: { + labels: { + app: params.name + "-pachd", + release: params.name, + suite: "pachyderm", + }, + name: "pachd", + namespace: namespace, + }, + spec: { + replicas: 1, + selector: { + matchLabels: { + app: "pachd", + suite: "pachyderm", + }, + }, + strategy: {}, + template: { + metadata: { + labels: { + app: "pachd", + suite: "pachyderm", + }, + name: "pachd", + }, + spec: { + containers: [ + { + env: [ + { + name: "PACH_ROOT", + value: "/pach", + }, + { + name: "NUM_SHARDS", + value: "16", + }, + { + name: "STORAGE_BACKEND", + value: "LOCAL", + }, + { + name: "STORAGE_HOST_PATH", + value: "/var/pachyderm/pachd", + }, + { + name: "PACHD_POD_NAMESPACE", + valueFrom: { + fieldRef: { + apiVersion: "v1", + fieldPath: "metadata.namespace", + }, + }, + }, + { + name: "WORKER_IMAGE", + value: "pachyderm/worker:1.7.0", + }, + { + name: "WORKER_SIDECAR_IMAGE", + value: "pachyderm/pachd:1.7.0", + }, + { + name: "WORKER_IMAGE_PULL_POLICY", + value: "IfNotPresent", + }, + { + name: "PACHD_VERSION", + value: "1.7.0", + }, + { + name: "METRICS", + value: "true", + }, + { + name: "LOG_LEVEL", + value: "info", + }, + { + name: "BLOCK_CACHE_BYTES", + value: "0G", + }, + { + name: "IAM_ROLE", + }, + { + name: "PACHYDERM_AUTHENTICATION_DISABLED_FOR_TESTING", + value: "false", + }, + ], + image: "pachyderm/pachd:1.7.0", + imagePullPolicy: "Always", + name: "pachd", + ports: [ + { + containerPort: 650, + name: "api-grpc-port", + protocol: "TCP", + }, + { + containerPort: 651, + name: "trace-port", + }, + { + containerPort: 652, + name: "api-http-port", + }, + ], + resources: { + requests: { + cpu: "250m", + memory: "512M", + }, + }, + securityContext: { + privileged: true, + }, + volumeMounts: [ + { + mountPath: "/pach", + name: "pachdvol", + }, + { + mountPath: "/pachyderm-storage-secret", + name: "pachyderm-storage-secret", + }, + ], + }, + ], + serviceAccountName: "pachyderm", + volumes: [ + { + hostPath: { + path: "/var/pachyderm/pachd", + }, + name: "pachdvol", + }, + { + name: "pachyderm-storage-secret", + secret: { + secretName: "pachyderm-storage-secret", + }, + }, + ], + }, + }, + }, + }, // pachd + + }, +} diff --git a/kubeflow/pachyderm/parts.yaml b/kubeflow/pachyderm/parts.yaml new file mode 100644 index 00000000000..2dfc1449edc --- /dev/null +++ b/kubeflow/pachyderm/parts.yaml @@ -0,0 +1,22 @@ +{ + "name": "pachyderm", + "apiVersion": "0.0.1", + "kind": "ksonnet.io/parts", + "description": "Pacyderm http://www.pachyderm.io/.\n", + "author": "kubeflow-team ", + "contributors": [ + ], + "repository": { + "type": "git", + "url": "https://github.com/kubeflow/kubeflow" + }, + "bugs": { + "url": "https://github.com/kubeflow/kubeflow/issues" + }, + "keywords": [ + "kubernetes", + "kubeflow", + "machine learning" + ], + "license": "Apache 2.0", +} diff --git a/kubeflow/pachyderm/prototypes/pachyderm.jsonnet b/kubeflow/pachyderm/prototypes/pachyderm.jsonnet new file mode 100644 index 00000000000..bad55b41806 --- /dev/null +++ b/kubeflow/pachyderm/prototypes/pachyderm.jsonnet @@ -0,0 +1,11 @@ +// @apiVersion 0.1 +// @name io.ksonnet.pkg.pachyderm +// @description Pachyderm enables reproducible data science. +// @shortDescription Pachyderm components. +// @param name string Name to give to each of the components + +local k = import "k.libsonnet"; + +local all = import "kubeflow/pachyderm/all.libsonnet"; + +std.prune(k.core.v1.list.new(all.all(params, env))) diff --git a/kubeflow/registry.yaml b/kubeflow/registry.yaml index 8d00dc29185..4a5cb6cdfd8 100644 --- a/kubeflow/registry.yaml +++ b/kubeflow/registry.yaml @@ -12,4 +12,10 @@ libraries: path: tf-serving argo: version: master - path: argo \ No newline at end of file + path: argo + pachyderm: + version: master + path: pachyderm + new-package-stub: + version: master + path: new-package-stub \ No newline at end of file From 44b75a1f8624ef9e46af56867839e8ee7588e1af Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Fri, 6 Apr 2018 19:01:06 -0700 Subject: [PATCH 2/4] Added dash. --- kubeflow/pachyderm/all.libsonnet | 90 +++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 13 deletions(-) diff --git a/kubeflow/pachyderm/all.libsonnet b/kubeflow/pachyderm/all.libsonnet index 43e25e136bc..2b6f58960b7 100644 --- a/kubeflow/pachyderm/all.libsonnet +++ b/kubeflow/pachyderm/all.libsonnet @@ -12,12 +12,13 @@ all(params, env):: [ $.parts(params, env).secret, $.parts(params, env).serviceAccount, - $.parts(params, env).role, + $.parts(params, env).role, $.parts(params, env).roleBinding, $.parts(params, env).etcdService, $.parts(params, env).pachydService, $.parts(params, env).etcd, $.parts(params, env).pachyd, + $.parts(params, env).dash, ], // Parts should be a dictionary containing jsonnet representations of the various @@ -28,7 +29,7 @@ // However, in some cases an application may use multiple namespaces in which // case the namespace for a particular component will be a parameter. local namespace = if std.objectHas(params, "namespace") then params.namespace else env.namespace, - + secret:: { apiVersion: "v1", data: null, @@ -37,7 +38,7 @@ labels: { app: params.name + "-pachyderm", }, - name: "pachyderm-storage-secret", + name: "pachyderm-storage-secret", namespace: namespace, }, }, @@ -51,7 +52,7 @@ chart: "pachyderm-0.1.6", suite: "pachyderm", }, - name: "pachyderm", + name: "pachyderm", namespace: namespace, }, }, @@ -65,7 +66,7 @@ app: "", suite: "pachyderm", }, - name: "pachyderm", + name: "pachyderm", namespace: namespace, }, rules: [ @@ -128,7 +129,7 @@ apiVersion: "rbac.authorization.k8s.io/v1beta1", kind: "RoleBinding", metadata: { - name: "pachyderm", + name: "pachyderm", namespace: namespace, }, roleRef: { @@ -150,10 +151,10 @@ kind: "Service", metadata: { labels: { - app: params.name + "-etcd", + app: params.name + "-etcd", suite: "pachyderm", }, - name: "etcd", + name: "etcd", namespace: namespace, }, spec: { @@ -177,10 +178,10 @@ metadata: { labels: { app: params.name + "-pachd", - chart: "pachyderm-0.1.6", + chart: "pachyderm-0.1.6", suite: "pachyderm", }, - name: "pachd", + name: "pachd", namespace: namespace, }, spec: { @@ -216,7 +217,7 @@ kind: "Deployment", metadata: { labels: { - app: params.name + "-etcd", + app: params.name + "-etcd", release: "RELEASE-NAME", suite: "pachyderm", }, @@ -317,6 +318,7 @@ suite: "pachyderm", }, name: "pachd", + namespace: namespace, }, spec: { containers: [ @@ -440,7 +442,69 @@ }, }, }, - }, // pachd + }, // pachd + + dash:: { + kind: "Deployment", + apiVersion: "apps/v1beta1", + + metadata: { + labels: { + app: "dash", + suite: "pachyderm", + }, + name: "dash", + namespace: namespace, + }, + + spec: { + replicas: 1, + selector: { + matchLabels: { + app: "dash", + suite: "pachyderm", + }, + }, + strategy: {}, + template: { + metadata: { + labels: { + app: "dash", + suite: "pachyderm", + }, + name: "dash", + namespace: namespace, + }, + spec: { + containers: + [ + { + name: "dash", + image: "pachyderm/dash:1.7-preview-8", + ports: [ + { + ContainerPort: 8080, + Name: "dash-http", + }, + ], + imagePullPolicy: "IfNotPresent", + }, + { + name: "grpc-proxy", + image: "pachyderm/grpc-proxy:0.4.2", + ports: [ + { + ContainerPort: 8081, + Name: "grpc-proxy-http", + }, + ], + ImagePullPolicy: "IfNotPresent", + }, + ], // containers + }, // spec + }, // template + }, // spec + }, // dash - }, + }, // parts } From b59f2bbc803aa0350fb5361954c8386612d9f0f6 Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Fri, 6 Apr 2018 19:04:10 -0700 Subject: [PATCH 3/4] Fix the dashboard; need to set DASH_SERVICE_HOST. --- kubeflow/pachyderm/all.libsonnet | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kubeflow/pachyderm/all.libsonnet b/kubeflow/pachyderm/all.libsonnet index 2b6f58960b7..c0786b49f18 100644 --- a/kubeflow/pachyderm/all.libsonnet +++ b/kubeflow/pachyderm/all.libsonnet @@ -481,6 +481,16 @@ { name: "dash", image: "pachyderm/dash:1.7-preview-8", + env: [ + { + name: "DASH_SERVICE_HOST", + valueFrom: { + fieldRef: { + fieldPath: "metadata.name", + }, + }, + }, + ], ports: [ { ContainerPort: 8080, From cd7ac940d10b2ce9356fc65cb7e0ae26c17a3328 Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Mon, 9 Apr 2018 05:18:22 -0700 Subject: [PATCH 4/4] Remove the dashboard because its behind a paywall. --- kubeflow/pachyderm/all.libsonnet | 74 -------------------------------- 1 file changed, 74 deletions(-) diff --git a/kubeflow/pachyderm/all.libsonnet b/kubeflow/pachyderm/all.libsonnet index c0786b49f18..73f880220e6 100644 --- a/kubeflow/pachyderm/all.libsonnet +++ b/kubeflow/pachyderm/all.libsonnet @@ -18,7 +18,6 @@ $.parts(params, env).pachydService, $.parts(params, env).etcd, $.parts(params, env).pachyd, - $.parts(params, env).dash, ], // Parts should be a dictionary containing jsonnet representations of the various @@ -443,78 +442,5 @@ }, }, }, // pachd - - dash:: { - kind: "Deployment", - apiVersion: "apps/v1beta1", - - metadata: { - labels: { - app: "dash", - suite: "pachyderm", - }, - name: "dash", - namespace: namespace, - }, - - spec: { - replicas: 1, - selector: { - matchLabels: { - app: "dash", - suite: "pachyderm", - }, - }, - strategy: {}, - template: { - metadata: { - labels: { - app: "dash", - suite: "pachyderm", - }, - name: "dash", - namespace: namespace, - }, - spec: { - containers: - [ - { - name: "dash", - image: "pachyderm/dash:1.7-preview-8", - env: [ - { - name: "DASH_SERVICE_HOST", - valueFrom: { - fieldRef: { - fieldPath: "metadata.name", - }, - }, - }, - ], - ports: [ - { - ContainerPort: 8080, - Name: "dash-http", - }, - ], - imagePullPolicy: "IfNotPresent", - }, - { - name: "grpc-proxy", - image: "pachyderm/grpc-proxy:0.4.2", - ports: [ - { - ContainerPort: 8081, - Name: "grpc-proxy-http", - }, - ], - ImagePullPolicy: "IfNotPresent", - }, - ], // containers - }, // spec - }, // template - }, // spec - }, // dash - }, // parts }