Skip to content

Commit

Permalink
chore: add more cuda version cache (#953)
Browse files Browse the repository at this point in the history
* chore: add more cuda version

rebase main for envd linter

Signed-off-by: Keming <kemingyang@tensorchord.ai>

* fix hash

Signed-off-by: Keming <kemingyang@tensorchord.ai>

* fix cache ref

Signed-off-by: Keming <kemingyang@tensorchord.ai>

* fix envd linter ci name

Signed-off-by: Keming <kemingyang@tensorchord.ai>

Signed-off-by: Keming <kemingyang@tensorchord.ai>
  • Loading branch information
kemingy committed Sep 29, 2022
1 parent 0b73548 commit 60708ba
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/envd-lint.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CI
name: envd lint

on:
push:
Expand Down
4 changes: 3 additions & 1 deletion base-images/remote-cache/build-and-push-remote-cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ DOCKER_HUB_ORG="${DOCKER_HUB_ORG:-tensorchord}"
cd ${ROOT_DIR}

envd build --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION} --force
envd build -f build.envd:build_gpu --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION} --force
envd build -f build.envd:build_gpu_11_2 --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION}-cuda-11.2.0-cudnn-8 --force
envd build -f build.envd:build_gpu_11_3 --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION}-cuda-11.3.0-cudnn-8 --force
envd build -f build.envd:build_gpu_11_6 --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION}-cuda-11.6.0-cudnn-8 --force

cd - > /dev/null
15 changes: 14 additions & 1 deletion base-images/remote-cache/build.envd
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@ def build():
base(os="ubuntu20.04", language="python3")


def build_gpu():
def build_gpu_11_2():
"""tensorflow"""
base(os="ubuntu20.04", language="python3")
install.cuda(version="11.2.0", cudnn="8")


def build_gpu_11_3():
"""pytorch"""
base(os="ubuntu20.04", language="python3")
install.cuda(version="11.3.0", cudnn="8")


def build_gpu_11_6():
"""pytorch"""
base(os="ubuntu20.04", language="python3")
install.cuda(version="11.6.0", cudnn="8")
2 changes: 1 addition & 1 deletion pkg/lang/frontend/starlark/interpreter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ var _ = Describe("Starlark", func() {
filename := "testdata/test.envd"
hash, err := GetEnvdProgramHash(filename)
Expect(err).NotTo(HaveOccurred())
Expect(hash).To(Equal("6beb7c4921722246"))
Expect(hash).To(Equal("cff1c81818116d42"))
})
})
3 changes: 1 addition & 2 deletions pkg/lang/ir/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ import (
)

func (g Graph) CacheID(filename string) string {
gpu := g.CUDA != nil || g.CUDNN != nil
var cacheID string
if gpu {
if g.CUDA != nil {
cacheID = fmt.Sprintf("%s/%s-gpu", filename, g.EnvironmentName)
} else {
cacheID = fmt.Sprintf("%s/%s-cpu", filename, g.EnvironmentName)
Expand Down
22 changes: 14 additions & 8 deletions pkg/lang/ir/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func NewGraph() *Graph {
Version: &langVersion,
},
CUDA: nil,
CUDNN: nil,
CUDNN: "8", // default version
NumGPUs: -1,

PyPIPackages: []string{},
Expand Down Expand Up @@ -136,9 +136,7 @@ func (g Graph) Labels() (map[string]string, error) {
if g.GPUEnabled() {
labels[types.ImageLabelGPU] = "true"
labels[types.ImageLabelCUDA] = *g.CUDA
if g.CUDNN != nil {
labels[types.ImageLabelCUDNN] = *g.CUDNN
}
labels[types.ImageLabelCUDNN] = g.CUDNN
}
labels[types.ImageLabelVendor] = types.ImageVendorEnvd
code, err := g.RuntimeGraph.Dump()
Expand Down Expand Up @@ -185,10 +183,18 @@ func (g Graph) EnvString() []string {

func (g Graph) DefaultCacheImporter() (*string, error) {
// The base remote cache should work for all languages.
res := fmt.Sprintf(
"type=registry,ref=docker.io/%s/python-cache:envd-%s",
viper.GetString(flag.FlagDockerOrganization),
version.GetVersionForImageTag())
var res string
if g.CUDA != nil {
res = fmt.Sprintf(
"type=registry,ref=docker.io/%s/python-cache:envd-%s-cuda-%s-cudnn-%s",
viper.GetString(flag.FlagDockerOrganization),
version.GetVersionForImageTag(), *g.CUDA, g.CUDNN)
} else {
res = fmt.Sprintf(
"type=registry,ref=docker.io/%s/python-cache:envd-%s",
viper.GetString(flag.FlagDockerOrganization),
version.GetVersionForImageTag())
}
return &res, nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/lang/ir/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func GPU(numGPUs int) {

func CUDA(version, cudnn string) {
DefaultGraph.CUDA = &version
DefaultGraph.CUDNN = &cudnn
DefaultGraph.CUDNN = cudnn
}

func VSCodePlugins(plugins []string) error {
Expand Down
4 changes: 2 additions & 2 deletions pkg/lang/ir/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func (g Graph) compileCopy(root llb.State) llb.State {
func (g *Graph) compileCUDAPackages(org string) llb.State {
return g.preparePythonBase(llb.Image(fmt.Sprintf(
"docker.io/%s:%s-cudnn%s-devel-%s",
org, *g.CUDA, *g.CUDNN, g.OS)))
org, *g.CUDA, g.CUDNN, g.OS)))
}

func (g Graph) compileSystemPackages(root llb.State) llb.State {
Expand Down Expand Up @@ -194,7 +194,7 @@ func (g *Graph) compileBase() (llb.State, error) {
if g.Image != nil {
logger.WithField("image", *g.Image).Debugf("using custom base image")
return llb.Image(*g.Image), nil
} else if g.CUDA == nil && g.CUDNN == nil {
} else if g.CUDA == nil {
switch g.Language.Name {
case "r":
base = llb.Image(fmt.Sprintf("docker.io/%s/r-base:4.2-envd-%s", org, v))
Expand Down
2 changes: 1 addition & 1 deletion pkg/lang/ir/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ type Graph struct {

Shell string
CUDA *string
CUDNN *string
CUDNN string
NumGPUs int

UbuntuAPTSource *string
Expand Down

0 comments on commit 60708ba

Please sign in to comment.