Skip to content
This repository has been archived by the owner on Sep 19, 2022. It is now read-only.

Commit

Permalink
Adding examples for v1beta1 (#98)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnugeorge authored and k8s-ci-robot committed Nov 9, 2018
1 parent 33e7390 commit 9f03606
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 0 deletions.
22 changes: 22 additions & 0 deletions examples/ddp/mnist/cpu/v1beta1/job_mnist_ddp_cpu.yaml
@@ -0,0 +1,22 @@
apiVersion: "kubeflow.org/v1beta1"
kind: "PyTorchJob"
metadata:
name: "pytorch-mnist-ddp-cpu"
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: docker.io/andreyvelichkevich/pytorch-mnist-ddp-cpu
Worker:
replicas: 3
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: docker.io/andreyvelichkevich/pytorch-mnist-ddp-cpu
29 changes: 29 additions & 0 deletions examples/ddp/mnist/gpu/v1beta1/job_mnist_ddp_gpu.yaml
@@ -0,0 +1,29 @@
apiVersion: "kubeflow.org/v1beta1"
kind: "PyTorchJob"
metadata:
name: "pytorch-mnist-ddp-gpu"
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: docker.io/deepermind/pytorch-mnist-ddp-gpu
resources:
limits:
nvidia.com/gpu: 1
Worker:
replicas: 3
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: docker.io/deepermind/pytorch-mnist-ddp-gpu
resources:
limits:
nvidia.com/gpu: 1

23 changes: 23 additions & 0 deletions examples/gloo-dist/mnist/v1beta1/pytorch_job_mnist.yaml
@@ -0,0 +1,23 @@
apiVersion: "kubeflow.org/v1beta1"
kind: "PyTorchJob"
metadata:
name: "pytorch-gloo-dist-mnist"
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: docker.io/anugarg/pytorch-dist
Worker:
replicas: 3
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: docker.io/anugarg/pytorch-dist

22 changes: 22 additions & 0 deletions examples/mpi-dist/mnist/cpu/v1beta1/mpi_mnist_job_cpu.yaml
@@ -0,0 +1,22 @@
apiVersion: "kubeflow.org/v1beta1"
kind: "PyTorchJob"
metadata:
name: "pytorch-mpi-mnist-cpu"
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-mpi-mnist-cpu:1.0
Worker:
replicas: 3
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-mpi-mnist-cpu:1.0
30 changes: 30 additions & 0 deletions examples/mpi-dist/mnist/gpu/v1beta1/mpi_mnist_job_gpu.yaml
@@ -0,0 +1,30 @@
apiVersion: "kubeflow.org/v1beta1"
kind: "PyTorchJob"
metadata:
name: "pytorch-mpi-mnist-gpu"
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: docker.io/akado2009/mpi-mnist-gpu
imagePullPolicy: Always
resources:
limits:
nvidia.com/gpu: 1
Worker:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: docker.io/akado2009/mpi-mnist-gpu
imagePullPolicy: Always
resources:
limits:
nvidia.com/gpu: 1
22 changes: 22 additions & 0 deletions examples/smoke-dist/v1beta1/pytorch_job_sendrecv.yaml
@@ -0,0 +1,22 @@
apiVersion: "kubeflow.org/v1beta1"
kind: "PyTorchJob"
metadata:
name: "pytorch-dist-basic-sendrecv"
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-sendrecv-test:1.0
Worker:
replicas: 3
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-sendrecv-test:1.0
22 changes: 22 additions & 0 deletions examples/tcp-dist/cifar10/v1beta1/pytorch_job_cifar.yaml
@@ -0,0 +1,22 @@
apiVersion: "kubeflow.org/v1beta1"
kind: "PyTorchJob"
metadata:
name: "pytorch-dist-cifar"
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-cifar-test:1.0
Worker:
replicas: 3
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-cifar-test:1.0
22 changes: 22 additions & 0 deletions examples/tcp-dist/mnist/v1beta1/pytorch_job_mnist.yaml
@@ -0,0 +1,22 @@
apiVersion: "kubeflow.org/v1beta1"
kind: "PyTorchJob"
metadata:
name: "pytorch-tcp-dist-mnist"
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0
Worker:
replicas: 3
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-mnist_test:1.0

0 comments on commit 9f03606

Please sign in to comment.