Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ ARGS:
[tags.{index}] List of tags to apply to the deployment
[min-size] Defines the minimum size of the pool
[max-size] Defines the maximum size of the pool
[endpoints.{index}.is-public=true] Will configure your public endpoint if true
[endpoints.{index}.private-network.private-network-id]
[endpoints.{index}.disable-auth=false] Disable the authentication on the endpoint.
[quantization.bits] The number of bits each model parameter should be quantized to. The quantization method is chosen based on this value.
[region=fr-par] Region to target. If none is passed will use default region from the config (fr-par)

FLAGS:
-h, --help help for create
-w, --wait wait until the deployment is ready

GLOBAL FLAGS:
-c, --config string The path to the config file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ ARGS:

FLAGS:
-h, --help help for delete
-w, --wait wait until the deployment is ready

GLOBAL FLAGS:
-c, --config string The path to the config file
Expand Down
1 change: 1 addition & 0 deletions docs/commands/inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ scw inference deployment create [arg=value ...]
| tags.{index} | | List of tags to apply to the deployment |
| min-size | | Defines the minimum size of the pool |
| max-size | | Defines the maximum size of the pool |
| endpoints.{index}.is-public | Default: `true` | Will configure your public endpoint if true |
| endpoints.{index}.private-network.private-network-id | | |
| endpoints.{index}.disable-auth | Default: `false` | Disable the authentication on the endpoint. |
| quantization.bits | | The number of bits each model parameter should be quantized to. The quantization method is chosen based on this value. |
Expand Down
16 changes: 15 additions & 1 deletion internal/namespaces/inference/v1/custom.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
package inference

import "github.com/scaleway/scaleway-cli/v2/core"
import (
"github.com/scaleway/scaleway-cli/v2/core"
"github.com/scaleway/scaleway-cli/v2/core/human"
"github.com/scaleway/scaleway-sdk-go/api/inference/v1"
)

func GetCommands() *core.Commands {
cmds := GetGeneratedCommands()

cmds.MustFind("inference").Groups = []string{"ai"}

human.RegisterMarshalerFunc(
inference.DeploymentStatus(""),
human.EnumMarshalFunc(deploymentStateMarshalSpecs),
)

human.RegisterMarshalerFunc(inference.Deployment{}, DeploymentMarshalerFunc)

cmds.MustFind("inference", "deployment", "create").Override(deploymentCreateBuilder)
cmds.MustFind("inference", "deployment", "delete").Override(deploymentDeleteBuilder)

return cmds
}
153 changes: 153 additions & 0 deletions internal/namespaces/inference/v1/custom_deployment.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package inference

import (
"context"
"errors"
"fmt"
"net/http"
"reflect"
"time"

"github.com/fatih/color"
"github.com/scaleway/scaleway-cli/v2/core"
"github.com/scaleway/scaleway-cli/v2/core/human"
"github.com/scaleway/scaleway-sdk-go/api/inference/v1"
"github.com/scaleway/scaleway-sdk-go/scw"
)

const (
deploymentActionTimeout = 60 * time.Minute
deploymentActionCreate = 1
deploymentActionDelete = 2
)

var deploymentStateMarshalSpecs = human.EnumMarshalSpecs{
inference.DeploymentStatusCreating: &human.EnumMarshalSpec{Attribute: color.FgBlue},
inference.DeploymentStatusDeploying: &human.EnumMarshalSpec{Attribute: color.FgBlue},
inference.DeploymentStatusDeleting: &human.EnumMarshalSpec{Attribute: color.FgBlue},
inference.DeploymentStatusError: &human.EnumMarshalSpec{Attribute: color.FgRed},
inference.DeploymentStatusReady: &human.EnumMarshalSpec{Attribute: color.FgGreen},
inference.DeploymentStatusLocked: &human.EnumMarshalSpec{Attribute: color.FgRed},
}

func DeploymentMarshalerFunc(i any, opt *human.MarshalOpt) (string, error) {
type tmp inference.Deployment
deployment := tmp(i.(inference.Deployment))
opt.Sections = []*human.MarshalSection{
{
FieldName: "Endpoints",
Title: "Endpoints",
},
}
str, err := human.Marshal(deployment, opt)
if err != nil {
return "", err
}

return str, nil
}

func deploymentDeleteBuilder(c *core.Command) *core.Command {
c.WaitFunc = waitForDeploymentFunc(deploymentActionDelete)

return c
}

func deploymentCreateBuilder(c *core.Command) *core.Command {
type llmInferenceEndpointSpecCustom struct {
*inference.EndpointSpec
IsPublic bool `json:"is-public"`
}

type llmInferenceCreateDeploymentRequestCustom struct {
*inference.CreateDeploymentRequest
Endpoints []*llmInferenceEndpointSpecCustom `json:"endpoints"`
}

c.ArgSpecs.AddBefore("endpoints.{index}.private-network.private-network-id", &core.ArgSpec{
Name: "endpoints.{index}.is-public",
Short: "Will configure your public endpoint if true",
Required: false,
Default: core.DefaultValueSetter("true"),
})

c.ArgsType = reflect.TypeOf(llmInferenceCreateDeploymentRequestCustom{})

c.WaitFunc = waitForDeploymentFunc(deploymentActionCreate)

c.Interceptor = func(ctx context.Context, argsI any, runner core.CommandRunner) (any, error) {
deploymentCreateCustomRequest := argsI.(*llmInferenceCreateDeploymentRequestCustom)
deploymentRequest := deploymentCreateCustomRequest.CreateDeploymentRequest
if deploymentCreateCustomRequest.Endpoints == nil {
publicEndpoint := &inference.EndpointPublicNetworkDetails{}
endpoint := inference.EndpointSpec{
PublicNetwork: publicEndpoint,
PrivateNetwork: nil,
DisableAuth: false,
}
deploymentRequest.Endpoints = append(deploymentRequest.Endpoints, &endpoint)

return runner(ctx, deploymentRequest)
}
for _, ep := range deploymentCreateCustomRequest.Endpoints {
if ep.IsPublic {
deploymentRequest.Endpoints = append(
deploymentRequest.Endpoints,
&inference.EndpointSpec{
PublicNetwork: &inference.EndpointPublicNetworkDetails{},
DisableAuth: ep.DisableAuth,
},
)
}

if ep.PrivateNetwork != nil {
deploymentRequest.Endpoints = append(
deploymentRequest.Endpoints,
&inference.EndpointSpec{
PrivateNetwork: &inference.EndpointPrivateNetworkDetails{
PrivateNetworkID: ep.PrivateNetwork.PrivateNetworkID,
},
DisableAuth: ep.DisableAuth,
},
)
}
}

return runner(ctx, deploymentRequest)
}

return c
}

func waitForDeploymentFunc(action int) core.WaitFunc {
return func(ctx context.Context, _, respI any) (any, error) {
deployment, err := inference.NewAPI(core.ExtractClient(ctx)).
WaitForDeployment(&inference.WaitForDeploymentRequest{
DeploymentID: respI.(*inference.Deployment).ID,
Region: respI.(*inference.Deployment).Region,
Timeout: scw.TimeDurationPtr(deploymentActionTimeout),
RetryInterval: core.DefaultRetryInterval,
})

switch action {
case deploymentActionCreate:
return deployment, err
case deploymentActionDelete:
if err != nil {
// if we get a 404 here, it means the resource was successfully deleted
notFoundError := &scw.ResourceNotFoundError{}
responseError := &scw.ResponseError{}
if errors.As(err, &responseError) &&
responseError.StatusCode == http.StatusNotFound ||
errors.As(err, &notFoundError) {
return fmt.Sprintf(
"Server %s successfully deleted.",
respI.(*inference.Deployment).ID,
), nil
}
}
}

return nil, err
}
}
86 changes: 86 additions & 0 deletions internal/namespaces/inference/v1/custom_deployment_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package inference_test

import (
"fmt"
"testing"

"github.com/scaleway/scaleway-cli/v2/core"
inference "github.com/scaleway/scaleway-cli/v2/internal/namespaces/inference/v1"
"github.com/scaleway/scaleway-cli/v2/internal/namespaces/vpc/v2"
)

const (
ModelID = "739d51ae-4f1e-4193-a4bf-f7380c090d46"
NodeTypeName = "H100-2"
)

func Test_DeploymentCreate(t *testing.T) {
cmds := inference.GetCommands()

t.Run("Simple deployment", core.Test(&core.TestConfig{
Commands: cmds,
Cmd: fmt.Sprintf(
"scw inference deployment create node-type-name=%s model-id=%s",
NodeTypeName,
ModelID,
),
Check: core.TestCheckGolden(),
AfterFunc: core.ExecAfterCmd(
"scw inference deployment delete {{ .CmdResult.ID }}",
),
}))

t.Run("Deployment with wait flag", core.Test(&core.TestConfig{
Commands: cmds,
Cmd: fmt.Sprintf(
"scw inference deployment create node-type-name=%s model-id=%s accept-eula=true --wait",
NodeTypeName, ModelID,
),
Check: core.TestCheckGolden(),
AfterFunc: core.ExecAfterCmd(
"scw inference deployment delete {{ .CmdResult.ID }}",
),
}))

t.Run("Deployment with no endpoints must fail", core.Test(&core.TestConfig{
Commands: cmds,
Cmd: fmt.Sprintf(
"scw inference deployment create node-type-name=%s model-id=%s endpoints.0.is-public=false",
NodeTypeName,
ModelID,
),
Check: core.TestCheckGolden(),
}))
}

func Test_CreateDeploymentPrivateEndpoint(t *testing.T) {
cmds := inference.GetCommands()
cmds.Merge(vpc.GetCommands())

t.Run("Create Deployment Private Endpoint", core.Test(&core.TestConfig{
Commands: cmds,
BeforeFunc: CreatePN(),
Cmd: fmt.Sprintf(
"scw inference deployment create model-id=%s node-type-name=H100-SXM-2 accept-eula=true endpoints.0.private-network.private-network-id={{ .PN.ID }}",
ModelID,
),
Check: core.TestCheckCombine(
core.TestCheckGolden(),
),
AfterFunc: core.AfterFuncCombine(
core.ExecAfterCmd("scw inference deployment delete {{ .CmdResult.ID }} --wait"),
DeletePrivateNetwork(),
),
}))
}

func Test_DeploymentDelete(t *testing.T) {
cmds := inference.GetCommands()

t.Run("Delete deployment with wait flag", core.Test(&core.TestConfig{
Commands: cmds,
BeforeFunc: CreateDeploymentPublicEndpoint(),
Cmd: "scw inference deployment delete {{ .DEPLOYMENT.ID }} --wait",
Check: core.TestCheckGolden(),
}))
}
29 changes: 29 additions & 0 deletions internal/namespaces/inference/v1/helper_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package inference_test

import (
"fmt"

"github.com/scaleway/scaleway-cli/v2/core"
)

func CreateDeploymentPublicEndpoint() core.BeforeFunc {
return core.ExecStoreBeforeCmd(
"DEPLOYMENT",
fmt.Sprintf(
"scw inference deployment create node-type-name=%s model-id=%s -w",
NodeTypeName,
ModelID,
),
)
}

func CreatePN() core.BeforeFunc {
return core.ExecStoreBeforeCmd(
"PN",
"scw vpc private-network create",
)
}

func DeletePrivateNetwork() core.AfterFunc {
return core.ExecAfterCmd("scw vpc private-network delete {{ .PN.ID }}")
}
Loading
Loading