scaleway · remyleone · Nov 21, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
@@ -14,13 +14,15 @@ ARGS:
   [tags.{index}]                                           List of tags to apply to the deployment
   [min-size]                                               Defines the minimum size of the pool
   [max-size]                                               Defines the maximum size of the pool
+  [endpoints.{index}.is-public=true]                       Will configure your public endpoint if true
   [endpoints.{index}.private-network.private-network-id]   
   [endpoints.{index}.disable-auth=false]                   Disable the authentication on the endpoint.
   [quantization.bits]                                      The number of bits each model parameter should be quantized to. The quantization method is chosen based on this value.
   [region=fr-par]                                          Region to target. If none is passed will use default region from the config (fr-par)
 
 FLAGS:
   -h, --help   help for create
+  -w, --wait   wait until the deployment is ready
 
 GLOBAL FLAGS:
   -c, --config string    The path to the config file

@@ -11,6 +11,7 @@ ARGS:
 
 FLAGS:
   -h, --help   help for delete
+  -w, --wait   wait until the deployment is ready
 
 GLOBAL FLAGS:
   -c, --config string    The path to the config file

@@ -50,6 +50,7 @@ scw inference deployment create [arg=value ...]
 | tags.{index} |  | List of tags to apply to the deployment |
 | min-size |  | Defines the minimum size of the pool |
 | max-size |  | Defines the maximum size of the pool |
+| endpoints.{index}.is-public | Default: `true` | Will configure your public endpoint if true |
 | endpoints.{index}.private-network.private-network-id |  |  |
 | endpoints.{index}.disable-auth | Default: `false` | Disable the authentication on the endpoint. |
 | quantization.bits |  | The number of bits each model parameter should be quantized to. The quantization method is chosen based on this value. |

@@ -1,11 +1,25 @@
 package inference
 
-import "github.com/scaleway/scaleway-cli/v2/core"
+import (
+	"github.com/scaleway/scaleway-cli/v2/core"
+	"github.com/scaleway/scaleway-cli/v2/core/human"
+	"github.com/scaleway/scaleway-sdk-go/api/inference/v1"
+)
 
 func GetCommands() *core.Commands {
 	cmds := GetGeneratedCommands()
 
 	cmds.MustFind("inference").Groups = []string{"ai"}
 
+	human.RegisterMarshalerFunc(
+		inference.DeploymentStatus(""),
+		human.EnumMarshalFunc(deploymentStateMarshalSpecs),
+	)
+
+	human.RegisterMarshalerFunc(inference.Deployment{}, DeploymentMarshalerFunc)
+
+	cmds.MustFind("inference", "deployment", "create").Override(deploymentCreateBuilder)
+	cmds.MustFind("inference", "deployment", "delete").Override(deploymentDeleteBuilder)
+
 	return cmds
 }
@@ -0,0 +1,153 @@
+package inference
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"reflect"
+	"time"
+
+	"github.com/fatih/color"
+	"github.com/scaleway/scaleway-cli/v2/core"
+	"github.com/scaleway/scaleway-cli/v2/core/human"
+	"github.com/scaleway/scaleway-sdk-go/api/inference/v1"
+	"github.com/scaleway/scaleway-sdk-go/scw"
+)
+
+const (
+	deploymentActionTimeout = 60 * time.Minute
+	deploymentActionCreate  = 1
+	deploymentActionDelete  = 2
+)
+
+var deploymentStateMarshalSpecs = human.EnumMarshalSpecs{
+	inference.DeploymentStatusCreating:  &human.EnumMarshalSpec{Attribute: color.FgBlue},
+	inference.DeploymentStatusDeploying: &human.EnumMarshalSpec{Attribute: color.FgBlue},
+	inference.DeploymentStatusDeleting:  &human.EnumMarshalSpec{Attribute: color.FgBlue},
+	inference.DeploymentStatusError:     &human.EnumMarshalSpec{Attribute: color.FgRed},
+	inference.DeploymentStatusReady:     &human.EnumMarshalSpec{Attribute: color.FgGreen},
+	inference.DeploymentStatusLocked:    &human.EnumMarshalSpec{Attribute: color.FgRed},
+}
+
+func DeploymentMarshalerFunc(i any, opt *human.MarshalOpt) (string, error) {
+	type tmp inference.Deployment
+	deployment := tmp(i.(inference.Deployment))
+	opt.Sections = []*human.MarshalSection{
+		{
+			FieldName: "Endpoints",
+			Title:     "Endpoints",
+		},
+	}
+	str, err := human.Marshal(deployment, opt)
+	if err != nil {
+		return "", err
+	}
+
+	return str, nil
+}
+
+func deploymentDeleteBuilder(c *core.Command) *core.Command {
+	c.WaitFunc = waitForDeploymentFunc(deploymentActionDelete)
+
+	return c
+}
+
+func deploymentCreateBuilder(c *core.Command) *core.Command {
+	type llmInferenceEndpointSpecCustom struct {
+		*inference.EndpointSpec
+		IsPublic bool `json:"is-public"`
+	}
+
+	type llmInferenceCreateDeploymentRequestCustom struct {
+		*inference.CreateDeploymentRequest
+		Endpoints []*llmInferenceEndpointSpecCustom `json:"endpoints"`
+	}
+
+	c.ArgSpecs.AddBefore("endpoints.{index}.private-network.private-network-id", &core.ArgSpec{
+		Name:     "endpoints.{index}.is-public",
+		Short:    "Will configure your public endpoint if true",
+		Required: false,
+		Default:  core.DefaultValueSetter("true"),
+	})
+
+	c.ArgsType = reflect.TypeOf(llmInferenceCreateDeploymentRequestCustom{})
+
+	c.WaitFunc = waitForDeploymentFunc(deploymentActionCreate)
+
+	c.Interceptor = func(ctx context.Context, argsI any, runner core.CommandRunner) (any, error) {
+		deploymentCreateCustomRequest := argsI.(*llmInferenceCreateDeploymentRequestCustom)
+		deploymentRequest := deploymentCreateCustomRequest.CreateDeploymentRequest
+		if deploymentCreateCustomRequest.Endpoints == nil {
+			publicEndpoint := &inference.EndpointPublicNetworkDetails{}
+			endpoint := inference.EndpointSpec{
+				PublicNetwork:  publicEndpoint,
+				PrivateNetwork: nil,
+				DisableAuth:    false,
+			}
+			deploymentRequest.Endpoints = append(deploymentRequest.Endpoints, &endpoint)
+
+			return runner(ctx, deploymentRequest)
+		}
+		for _, ep := range deploymentCreateCustomRequest.Endpoints {
+			if ep.IsPublic {
+				deploymentRequest.Endpoints = append(
+					deploymentRequest.Endpoints,
+					&inference.EndpointSpec{
+						PublicNetwork: &inference.EndpointPublicNetworkDetails{},
+						DisableAuth:   ep.DisableAuth,
+					},
+				)
+			}
+
+			if ep.PrivateNetwork != nil {
+				deploymentRequest.Endpoints = append(
+					deploymentRequest.Endpoints,
+					&inference.EndpointSpec{
+						PrivateNetwork: &inference.EndpointPrivateNetworkDetails{
+							PrivateNetworkID: ep.PrivateNetwork.PrivateNetworkID,
+						},
+						DisableAuth: ep.DisableAuth,
+					},
+				)
+			}
+		}
+
+		return runner(ctx, deploymentRequest)
+	}
+
+	return c
+}
+
+func waitForDeploymentFunc(action int) core.WaitFunc {
+	return func(ctx context.Context, _, respI any) (any, error) {
+		deployment, err := inference.NewAPI(core.ExtractClient(ctx)).
+			WaitForDeployment(&inference.WaitForDeploymentRequest{
+				DeploymentID:  respI.(*inference.Deployment).ID,
+				Region:        respI.(*inference.Deployment).Region,
+				Timeout:       scw.TimeDurationPtr(deploymentActionTimeout),
+				RetryInterval: core.DefaultRetryInterval,
+			})
+
+		switch action {
+		case deploymentActionCreate:
+			return deployment, err
+		case deploymentActionDelete:
+			if err != nil {
+				// if we get a 404 here, it means the resource was successfully deleted
+				notFoundError := &scw.ResourceNotFoundError{}
+				responseError := &scw.ResponseError{}
+				if errors.As(err, &responseError) &&
+					responseError.StatusCode == http.StatusNotFound ||
+					errors.As(err, &notFoundError) {
+					return fmt.Sprintf(
+						"Server %s successfully deleted.",
+						respI.(*inference.Deployment).ID,
+					), nil
+				}
+			}
+		}
+
+		return nil, err
+	}
+}
@@ -0,0 +1,86 @@
+package inference_test
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/scaleway/scaleway-cli/v2/core"
+	inference "github.com/scaleway/scaleway-cli/v2/internal/namespaces/inference/v1"
+	"github.com/scaleway/scaleway-cli/v2/internal/namespaces/vpc/v2"
+)
+
+const (
+	ModelID      = "739d51ae-4f1e-4193-a4bf-f7380c090d46"
+	NodeTypeName = "H100-2"
+)
+
+func Test_DeploymentCreate(t *testing.T) {
+	cmds := inference.GetCommands()
+
+	t.Run("Simple deployment", core.Test(&core.TestConfig{
+		Commands: cmds,
+		Cmd: fmt.Sprintf(
+			"scw inference deployment create node-type-name=%s model-id=%s",
+			NodeTypeName,
+			ModelID,
+		),
+		Check: core.TestCheckGolden(),
+		AfterFunc: core.ExecAfterCmd(
+			"scw inference deployment delete {{ .CmdResult.ID }}",
+		),
+	}))
+
+	t.Run("Deployment with wait flag", core.Test(&core.TestConfig{
+		Commands: cmds,
+		Cmd: fmt.Sprintf(
+			"scw inference deployment create node-type-name=%s model-id=%s accept-eula=true --wait",
+			NodeTypeName, ModelID,
+		),
+		Check: core.TestCheckGolden(),
+		AfterFunc: core.ExecAfterCmd(
+			"scw inference deployment delete {{ .CmdResult.ID }}",
+		),
+	}))
+
+	t.Run("Deployment with no endpoints must fail", core.Test(&core.TestConfig{
+		Commands: cmds,
+		Cmd: fmt.Sprintf(
+			"scw inference deployment create node-type-name=%s model-id=%s endpoints.0.is-public=false",
+			NodeTypeName,
+			ModelID,
+		),
+		Check: core.TestCheckGolden(),
+	}))
+}
+
+func Test_CreateDeploymentPrivateEndpoint(t *testing.T) {
+	cmds := inference.GetCommands()
+	cmds.Merge(vpc.GetCommands())
+
+	t.Run("Create Deployment Private Endpoint", core.Test(&core.TestConfig{
+		Commands:   cmds,
+		BeforeFunc: CreatePN(),
+		Cmd: fmt.Sprintf(
+			"scw inference deployment create model-id=%s node-type-name=H100-SXM-2 accept-eula=true endpoints.0.private-network.private-network-id={{ .PN.ID }}",
+			ModelID,
+		),
+		Check: core.TestCheckCombine(
+			core.TestCheckGolden(),
+		),
+		AfterFunc: core.AfterFuncCombine(
+			core.ExecAfterCmd("scw inference deployment delete {{ .CmdResult.ID }} --wait"),
+			DeletePrivateNetwork(),
+		),
+	}))
+}
+
+func Test_DeploymentDelete(t *testing.T) {
+	cmds := inference.GetCommands()
+
+	t.Run("Delete deployment with wait flag", core.Test(&core.TestConfig{
+		Commands:   cmds,
+		BeforeFunc: CreateDeploymentPublicEndpoint(),
+		Cmd:        "scw inference deployment delete {{ .DEPLOYMENT.ID }} --wait",
+		Check:      core.TestCheckGolden(),
+	}))
+}
@@ -0,0 +1,29 @@
+package inference_test
+
+import (
+	"fmt"
+
+	"github.com/scaleway/scaleway-cli/v2/core"
+)
+
+func CreateDeploymentPublicEndpoint() core.BeforeFunc {
+	return core.ExecStoreBeforeCmd(
+		"DEPLOYMENT",
+		fmt.Sprintf(
+			"scw inference deployment create node-type-name=%s model-id=%s -w",
+			NodeTypeName,
+			ModelID,
+		),
+	)
+}
+
+func CreatePN() core.BeforeFunc {
+	return core.ExecStoreBeforeCmd(
+		"PN",
+		"scw vpc private-network create",
+	)
+}
+
+func DeletePrivateNetwork() core.AfterFunc {
+	return core.ExecAfterCmd("scw vpc private-network delete {{ .PN.ID }}")
+}