diff --git a/.github/workflows/envd-lint.yml b/.github/workflows/envd-lint.yml index 8a02974a9..45fe3bc0d 100644 --- a/.github/workflows/envd-lint.yml +++ b/.github/workflows/envd-lint.yml @@ -1,4 +1,4 @@ -name: CI +name: envd lint on: push: diff --git a/base-images/remote-cache/build-and-push-remote-cache.sh b/base-images/remote-cache/build-and-push-remote-cache.sh index 57abc4d1f..66c57f388 100755 --- a/base-images/remote-cache/build-and-push-remote-cache.sh +++ b/base-images/remote-cache/build-and-push-remote-cache.sh @@ -11,6 +11,8 @@ DOCKER_HUB_ORG="${DOCKER_HUB_ORG:-tensorchord}" cd ${ROOT_DIR} envd build --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION} --force -envd build -f build.envd:build_gpu --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION} --force +envd build -f build.envd:build_gpu_11_2 --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION}-cuda-11.2.0-cudnn-8 --force +envd build -f build.envd:build_gpu_11_3 --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION}-cuda-11.3.0-cudnn-8 --force +envd build -f build.envd:build_gpu_11_6 --export-cache type=registry,ref=docker.io/${DOCKER_HUB_ORG}/python-cache:envd-v${ENVD_VERSION}-cuda-11.6.0-cudnn-8 --force cd - > /dev/null diff --git a/base-images/remote-cache/build.envd b/base-images/remote-cache/build.envd index 0b12a9fa5..35a62cccd 100644 --- a/base-images/remote-cache/build.envd +++ b/base-images/remote-cache/build.envd @@ -2,6 +2,19 @@ def build(): base(os="ubuntu20.04", language="python3") -def build_gpu(): +def build_gpu_11_2(): + """tensorflow""" base(os="ubuntu20.04", language="python3") install.cuda(version="11.2.0", cudnn="8") + + +def build_gpu_11_3(): + """pytorch""" + base(os="ubuntu20.04", language="python3") + install.cuda(version="11.3.0", cudnn="8") + + +def build_gpu_11_6(): + """pytorch""" + base(os="ubuntu20.04", language="python3") + install.cuda(version="11.6.0", cudnn="8") diff --git a/pkg/lang/frontend/starlark/interpreter_test.go b/pkg/lang/frontend/starlark/interpreter_test.go index 446c33e37..48359edd4 100644 --- a/pkg/lang/frontend/starlark/interpreter_test.go +++ b/pkg/lang/frontend/starlark/interpreter_test.go @@ -24,6 +24,6 @@ var _ = Describe("Starlark", func() { filename := "testdata/test.envd" hash, err := GetEnvdProgramHash(filename) Expect(err).NotTo(HaveOccurred()) - Expect(hash).To(Equal("6beb7c4921722246")) + Expect(hash).To(Equal("cff1c81818116d42")) }) }) diff --git a/pkg/lang/ir/cache.go b/pkg/lang/ir/cache.go index 5848978dd..3e2271c63 100644 --- a/pkg/lang/ir/cache.go +++ b/pkg/lang/ir/cache.go @@ -21,9 +21,8 @@ import ( ) func (g Graph) CacheID(filename string) string { - gpu := g.CUDA != nil || g.CUDNN != nil var cacheID string - if gpu { + if g.CUDA != nil { cacheID = fmt.Sprintf("%s/%s-gpu", filename, g.EnvironmentName) } else { cacheID = fmt.Sprintf("%s/%s-cpu", filename, g.EnvironmentName) diff --git a/pkg/lang/ir/compile.go b/pkg/lang/ir/compile.go index 09a7f14e8..7b5081a44 100644 --- a/pkg/lang/ir/compile.go +++ b/pkg/lang/ir/compile.go @@ -48,7 +48,7 @@ func NewGraph() *Graph { Version: &langVersion, }, CUDA: nil, - CUDNN: nil, + CUDNN: "8", // default version NumGPUs: -1, PyPIPackages: []string{}, @@ -136,9 +136,7 @@ func (g Graph) Labels() (map[string]string, error) { if g.GPUEnabled() { labels[types.ImageLabelGPU] = "true" labels[types.ImageLabelCUDA] = *g.CUDA - if g.CUDNN != nil { - labels[types.ImageLabelCUDNN] = *g.CUDNN - } + labels[types.ImageLabelCUDNN] = g.CUDNN } labels[types.ImageLabelVendor] = types.ImageVendorEnvd code, err := g.RuntimeGraph.Dump() @@ -185,10 +183,18 @@ func (g Graph) EnvString() []string { func (g Graph) DefaultCacheImporter() (*string, error) { // The base remote cache should work for all languages. - res := fmt.Sprintf( - "type=registry,ref=docker.io/%s/python-cache:envd-%s", - viper.GetString(flag.FlagDockerOrganization), - version.GetVersionForImageTag()) + var res string + if g.CUDA != nil { + res = fmt.Sprintf( + "type=registry,ref=docker.io/%s/python-cache:envd-%s-cuda-%s-cudnn-%s", + viper.GetString(flag.FlagDockerOrganization), + version.GetVersionForImageTag(), *g.CUDA, g.CUDNN) + } else { + res = fmt.Sprintf( + "type=registry,ref=docker.io/%s/python-cache:envd-%s", + viper.GetString(flag.FlagDockerOrganization), + version.GetVersionForImageTag()) + } return &res, nil } diff --git a/pkg/lang/ir/interface.go b/pkg/lang/ir/interface.go index bc4c629f8..dda34f0f5 100644 --- a/pkg/lang/ir/interface.go +++ b/pkg/lang/ir/interface.go @@ -66,7 +66,7 @@ func GPU(numGPUs int) { func CUDA(version, cudnn string) { DefaultGraph.CUDA = &version - DefaultGraph.CUDNN = &cudnn + DefaultGraph.CUDNN = cudnn } func VSCodePlugins(plugins []string) error { diff --git a/pkg/lang/ir/system.go b/pkg/lang/ir/system.go index a5efb3382..9f1d63ad1 100644 --- a/pkg/lang/ir/system.go +++ b/pkg/lang/ir/system.go @@ -94,7 +94,7 @@ func (g Graph) compileCopy(root llb.State) llb.State { func (g *Graph) compileCUDAPackages(org string) llb.State { return g.preparePythonBase(llb.Image(fmt.Sprintf( "docker.io/%s:%s-cudnn%s-devel-%s", - org, *g.CUDA, *g.CUDNN, g.OS))) + org, *g.CUDA, g.CUDNN, g.OS))) } func (g Graph) compileSystemPackages(root llb.State) llb.State { @@ -194,7 +194,7 @@ func (g *Graph) compileBase() (llb.State, error) { if g.Image != nil { logger.WithField("image", *g.Image).Debugf("using custom base image") return llb.Image(*g.Image), nil - } else if g.CUDA == nil && g.CUDNN == nil { + } else if g.CUDA == nil { switch g.Language.Name { case "r": base = llb.Image(fmt.Sprintf("docker.io/%s/r-base:4.2-envd-%s", org, v)) diff --git a/pkg/lang/ir/types.go b/pkg/lang/ir/types.go index bda42c6cc..05d2659ca 100644 --- a/pkg/lang/ir/types.go +++ b/pkg/lang/ir/types.go @@ -34,7 +34,7 @@ type Graph struct { Shell string CUDA *string - CUDNN *string + CUDNN string NumGPUs int UbuntuAPTSource *string