Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add production mode to k8s #1963

Merged
merged 40 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
614013d
cascading storage class changes
h4ck3rk3y Dec 12, 2023
fdfe9f7
added some docs
h4ck3rk3y Dec 12, 2023
3c840fa
apic has storage class stuff now
h4ck3rk3y Dec 12, 2023
2ee34a4
remove some dupes
h4ck3rk3y Dec 12, 2023
bd2117c
remove apic launcher change
h4ck3rk3y Dec 12, 2023
592755c
get rid of some more functions
h4ck3rk3y Dec 12, 2023
2ea0a01
fix missing struct value
h4ck3rk3y Dec 12, 2023
e9e31d1
fix changelog image building pr
h4ck3rk3y Dec 12, 2023
8980073
Merge branch 'main' into gyani/storage-class
h4ck3rk3y Dec 13, 2023
5e8e7a3
feat: allow specifying size of persistent directories (#1939)
h4ck3rk3y Dec 13, 2023
9d91c26
claim -> volume
h4ck3rk3y Dec 13, 2023
03e436a
fix ci
h4ck3rk3y Dec 13, 2023
a59a366
dev stuff, do revert
h4ck3rk3y Dec 13, 2023
3e912df
fix constant volume szie
h4ck3rk3y Dec 13, 2023
c260fa7
added back funciton
h4ck3rk3y Dec 14, 2023
f45cde3
--amend
h4ck3rk3y Dec 14, 2023
438c5a1
Revert "dev stuff, do revert"
h4ck3rk3y Dec 14, 2023
acb5a1f
Merge branch 'main' into gyani/persistent-volumes
h4ck3rk3y Dec 14, 2023
55db506
Revert "Revert "dev stuff, do revert""
h4ck3rk3y Dec 14, 2023
ae64ac5
Revert "Revert "Revert "dev stuff, do revert"""
h4ck3rk3y Dec 14, 2023
5709553
better k3s instructions
h4ck3rk3y Dec 14, 2023
62e4d63
added a note on deletion
h4ck3rk3y Dec 14, 2023
722f7ca
remove unused code
h4ck3rk3y Dec 14, 2023
3bcb536
remove unused variable
h4ck3rk3y Dec 14, 2023
a8b338e
added a restart policy
h4ck3rk3y Dec 14, 2023
d435280
added restart policy to a few more places
h4ck3rk3y Dec 14, 2023
0f7e441
remove unused label
h4ck3rk3y Dec 14, 2023
a6abe07
this should build
h4ck3rk3y Dec 14, 2023
8447820
Revert "Revert "Revert "Revert "dev stuff, do revert""""
h4ck3rk3y Dec 14, 2023
b48d44c
this should build
h4ck3rk3y Dec 14, 2023
8faccc0
fix docs
h4ck3rk3y Dec 14, 2023
7cbc65a
Revert "Revert "Revert "Revert "Revert "dev stuff, do revert"""""
h4ck3rk3y Dec 15, 2023
c75d49f
fix linter
h4ck3rk3y Dec 15, 2023
1d918c0
production mode governs whether APIC has persistent data or not
h4ck3rk3y Dec 15, 2023
54001cf
work based on fragment + enclave-id
h4ck3rk3y Dec 15, 2023
4692e9a
Revert "production mode governs whether APIC has persistent data or not"
h4ck3rk3y Dec 15, 2023
7f4ceb4
make restart policy on failure always as volume is always persistent
h4ck3rk3y Dec 15, 2023
80a30de
Merge branch 'main' into gyani/production
h4ck3rk3y Dec 15, 2023
981554c
Merge branch 'main' into gyani/production
h4ck3rk3y Dec 15, 2023
08e74bb
fix config
h4ck3rk3y Dec 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ const (

containerTypeLabelKeyStr = labelNamespaceStr + "container-type"
volumeTypeLabelKeyStr = labelNamespaceStr + "volume-type"
enclaveTypeLabelKeyStr = labelNamespaceStr + "enclave-type"

// A label to identify a Kurtosis resource (e.g. network, container, etc.) by its id
idLabelKeyStr = labelNamespaceStr + "id"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,8 @@ func createEnginePod(
engineContainers,
engineVolumes,
serviceAccountName,
// Engine doesn't auto restart
apiv1.RestartPolicyNever,
)
if err != nil {
return nil, nil, stacktrace.Propagate(err, "An error occurred while creating the pod with name '%s' in namespace '%s' with image '%s'", enginePodName, namespace, containerImageAndTag)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package kubernetes_kurtosis_backend
import (
"context"
"io"
apiv1 "k8s.io/api/core/v1"

"github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_kurtosis_backend/engine_functions"
"github.com/kurtosis-tech/kurtosis/container-engine-lib/lib/backend_impls/kubernetes/kubernetes_kurtosis_backend/shared_helpers"
Expand All @@ -26,6 +27,7 @@ import (

const (
isResourceInformationComplete = false
noProductionMode = false
)

type KubernetesKurtosisBackend struct {
Expand All @@ -39,6 +41,9 @@ type KubernetesKurtosisBackend struct {

// Will only be filled out for the API container
apiContainerModeArgs *shared_helpers.ApiContainerModeArgs

// Whether services should be restarted
productionMode bool
}

func (backend *KubernetesKurtosisBackend) DumpKurtosis(ctx context.Context, outputDirpath string) error {
Expand All @@ -52,6 +57,7 @@ func newKubernetesKurtosisBackend(
cliModeArgs *shared_helpers.CliModeArgs,
engineServerModeArgs *shared_helpers.EngineServerModeArgs,
apiContainerModeArgs *shared_helpers.ApiContainerModeArgs,
productionMoe bool,
) *KubernetesKurtosisBackend {
objAttrsProvider := object_attributes_provider.GetKubernetesObjectAttributesProvider()
return &KubernetesKurtosisBackend{
Expand All @@ -60,6 +66,7 @@ func newKubernetesKurtosisBackend(
cliModeArgs: cliModeArgs,
engineServerModeArgs: engineServerModeArgs,
apiContainerModeArgs: apiContainerModeArgs,
productionMode: productionMoe,
}
}

Expand All @@ -68,13 +75,15 @@ func NewAPIContainerKubernetesKurtosisBackend(
ownEnclaveUuid enclave.EnclaveUUID,
ownNamespaceName string,
storageClassName string,
productionMode bool,
) *KubernetesKurtosisBackend {
modeArgs := shared_helpers.NewApiContainerModeArgs(ownEnclaveUuid, ownNamespaceName, storageClassName)
return newKubernetesKurtosisBackend(
kubernetesManager,
nil,
nil,
modeArgs,
productionMode,
)
}

Expand All @@ -87,6 +96,7 @@ func NewEngineServerKubernetesKurtosisBackend(
nil,
modeArgs,
nil,
noProductionMode,
)
}

Expand All @@ -99,28 +109,10 @@ func NewCLIModeKubernetesKurtosisBackend(
modeArgs,
nil,
nil,
noProductionMode,
)
}

func NewKubernetesKurtosisBackend(
kubernetesManager *kubernetes_manager.KubernetesManager,
// TODO Remove the necessity for these different args by splitting the *KubernetesKurtosisBackend into multiple
// backends per consumer, e.g. APIContainerKurtosisBackend, CLIKurtosisBackend, EngineKurtosisBackend, etc.
// This can only happen once the CLI no longer uses the same functionality as API container, engine, etc. though
cliModeArgs *shared_helpers.CliModeArgs,
engineServerModeArgs *shared_helpers.EngineServerModeArgs,
apiContainerModeargs *shared_helpers.ApiContainerModeArgs,
) *KubernetesKurtosisBackend {
objAttrsProvider := object_attributes_provider.GetKubernetesObjectAttributesProvider()
return &KubernetesKurtosisBackend{
kubernetesManager: kubernetesManager,
objAttrsProvider: objAttrsProvider,
cliModeArgs: cliModeArgs,
engineServerModeArgs: engineServerModeArgs,
apiContainerModeArgs: apiContainerModeargs,
}
}

func (backend *KubernetesKurtosisBackend) FetchImage(ctx context.Context, image string, downloadMode image_download_mode.ImageDownloadMode) (bool, string, error) {
logrus.Warnf("FetchImage isn't implemented for Kubernetes yet")
return false, "", nil
Expand Down Expand Up @@ -261,14 +253,20 @@ func (backend *KubernetesKurtosisBackend) StartRegisteredUserServices(
map[service.ServiceUUID]error,
error,
) {
restartPolicy := apiv1.RestartPolicyNever
if backend.productionMode {
restartPolicy = apiv1.RestartPolicyOnFailure
}

successfullyStartedServices, failedServices, err := user_services_functions.StartRegisteredUserServices(
ctx,
enclaveUuid,
services,
backend.cliModeArgs,
backend.apiContainerModeArgs,
backend.engineServerModeArgs,
backend.kubernetesManager)
backend.kubernetesManager,
restartPolicy)
if err != nil {
var serviceUuids []service.ServiceUUID
for serviceUuid := range services {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ const (
timeBetweenWaitForApiContainerContainerAvailabilityRetries = 1 * time.Second

enclaveDataDirVolumeName = "enclave-data"

enclaveDataDirVolumeSize int64 = 1 * 1024 * 1024 * 1024 // 1g minimum size on Kubernetes
)

var noWait *port_spec.Wait = nil
Expand Down Expand Up @@ -412,13 +414,44 @@ func (backend *KubernetesKurtosisBackend) CreateAPIContainer(
return nil, stacktrace.Propagate(err, "An error occurred getting container ports from the API container's private port specs")
}

volumeAttrs, err := enclaveAttributesProvider.ForEnclaveDataDirVolume()
if err != nil {
return nil, stacktrace.Propagate(err, "An error occurred creating the labels for enclave data dir volume")
}

volumeLabelsStrs := map[string]string{}
for key, value := range volumeAttrs.GetLabels() {
volumeLabelsStrs[key.GetString()] = value.GetString()
}
if _, err = backend.kubernetesManager.CreatePersistentVolumeClaim(ctx, enclaveNamespaceName, enclaveDataDirVolumeName, volumeLabelsStrs, enclaveDataDirVolumeSize); err != nil {
return nil, stacktrace.Propagate(err, "An error occurred creating the persistent volume claim for enclave data dir volume for enclave '%s'", enclaveDataDirVolumeName)
}
shouldDeleteVolumeClaim := true

defer func() {
if !shouldDeleteVolumeClaim {
return
}
if err := backend.kubernetesManager.RemovePersistentVolumeClaim(context.Background(), enclaveNamespaceName, enclaveDataDirVolumeName); err != nil {
logrus.Warnf(
"Creating pod didn't finish successfully - we tried removing the PVC %v but failed with error %v",
enclaveDataDirVolumeName,
err,
)
logrus.Warnf("You'll need to clean up volume claim '%v' manually!", enclaveDataDirVolumeName)
}
}()

apiContainerContainers, apiContainerVolumes, err := getApiContainerContainersAndVolumes(image, containerPorts, envVarsWithOwnIp, enclaveDataVolumeDirpath)
if err != nil {
return nil, stacktrace.Propagate(err, "An error occurred getting API containers and volumes")
}

apiContainerInitContainers := []apiv1.Container{}

// Data is always persistent we can always restart like Docker
apiContainerRestartPolicy := apiv1.RestartPolicyOnFailure
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@leoporoli Questions here -

This is very similar to what Docker does; and docker always has persistent volume for /kurtosis-data but on Kubernetes that adds time on DO/AWS; on k3s it seems like no time is added

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we talked about this on Slack and I agree with adding this for all enclaves so far.


// Create pods with api container containers and volumes in Kubernetes
apiContainerPod, err := backend.kubernetesManager.CreatePod(
ctx,
Expand All @@ -430,6 +463,7 @@ func (backend *KubernetesKurtosisBackend) CreateAPIContainer(
apiContainerContainers,
apiContainerVolumes,
apiContainerServiceAccountName,
apiContainerRestartPolicy,
)
if err != nil {
return nil, stacktrace.Propagate(err, "An error occurred while creating the pod with name '%s' in namespace '%s' with image '%s'", apiContainerPodName, enclaveNamespaceName, image)
Expand Down Expand Up @@ -481,6 +515,7 @@ func (backend *KubernetesKurtosisBackend) CreateAPIContainer(
shouldRemoveServiceAccount = false
shouldRemovePod = false
shouldRemoveService = false
shouldDeleteVolumeClaim = false
return resultApiContainer, nil
}

Expand Down Expand Up @@ -995,7 +1030,7 @@ func getApiContainerContainersAndVolumes(
enclaveDataVolumeDirpath string,
) (
resultContainers []apiv1.Container,
resultPodVolumes []apiv1.Volume,
resultVolumes []apiv1.Volume,
resultErr error,
) {
if _, found := envVars[ApiContainerOwnNamespaceNameEnvVar]; found {
Expand Down Expand Up @@ -1047,38 +1082,38 @@ func getApiContainerContainersAndVolumes(
{
Name: enclaveDataDirVolumeName,
VolumeSource: apiv1.VolumeSource{
HostPath: nil,
EmptyDir: &apiv1.EmptyDirVolumeSource{
Medium: "",
SizeLimit: nil,
HostPath: nil,
EmptyDir: nil,
GCEPersistentDisk: nil,
AWSElasticBlockStore: nil,
GitRepo: nil,
Secret: nil,
NFS: nil,
ISCSI: nil,
Glusterfs: nil,
PersistentVolumeClaim: &apiv1.PersistentVolumeClaimVolumeSource{
ClaimName: enclaveDataDirVolumeName,
ReadOnly: false,
},
GCEPersistentDisk: nil,
AWSElasticBlockStore: nil,
GitRepo: nil,
Secret: nil,
NFS: nil,
ISCSI: nil,
Glusterfs: nil,
PersistentVolumeClaim: nil,
RBD: nil,
FlexVolume: nil,
Cinder: nil,
CephFS: nil,
Flocker: nil,
DownwardAPI: nil,
FC: nil,
AzureFile: nil,
ConfigMap: nil,
VsphereVolume: nil,
Quobyte: nil,
AzureDisk: nil,
PhotonPersistentDisk: nil,
Projected: nil,
PortworxVolume: nil,
ScaleIO: nil,
StorageOS: nil,
CSI: nil,
Ephemeral: nil,
RBD: nil,
FlexVolume: nil,
Cinder: nil,
CephFS: nil,
Flocker: nil,
DownwardAPI: nil,
FC: nil,
AzureFile: nil,
ConfigMap: nil,
VsphereVolume: nil,
Quobyte: nil,
AzureDisk: nil,
PhotonPersistentDisk: nil,
Projected: nil,
PortworxVolume: nil,
ScaleIO: nil,
StorageOS: nil,
CSI: nil,
Ephemeral: nil,
},
},
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ func GetEngineServerBackend(
func GetApiContainerBackend(
ctx context.Context,
storageClass string,
productionMode bool,
) (backend_interface.KurtosisBackend, error) {
kubernetesConfig, err := rest.InClusterConfig()
if err != nil {
Expand Down Expand Up @@ -107,6 +108,7 @@ func GetApiContainerBackend(
enclaveId,
namespaceName,
storageClass,
productionMode,
), nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func preparePersistentDirectoriesResources(
serviceMountpointsToPersistentKey map[string]service_directory.PersistentDirectory,
kubernetesManager *kubernetes_manager.KubernetesManager,
) (map[string]*kubernetesVolumeWithClaim, error) {
shouldDeleteVolumesAndClaimsCreated := true
shouldDeleteVolumeClaims := true
volumeClaimsCreated := map[string]*apiv1.PersistentVolumeClaim{}

persistentVolumesAndClaims := map[string]*kubernetesVolumeWithClaim{}
Expand Down Expand Up @@ -109,7 +109,7 @@ func preparePersistentDirectoriesResources(
}

defer func() {
if !shouldDeleteVolumesAndClaimsCreated {
if !shouldDeleteVolumeClaims {
return
}
for volumeClaimNameStr := range volumeClaimsCreated {
Expand All @@ -125,6 +125,6 @@ func preparePersistentDirectoriesResources(
}
}()

shouldDeleteVolumesAndClaimsCreated = false
shouldDeleteVolumeClaims = false
return persistentVolumesAndClaims, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ func StartRegisteredUserServices(
apiContainerModeArgs *shared_helpers.ApiContainerModeArgs,
engineServerModeArgs *shared_helpers.EngineServerModeArgs,
kubernetesManager *kubernetes_manager.KubernetesManager,
restartPolicy apiv1.RestartPolicy,
) (
map[service.ServiceUUID]*service.Service,
map[service.ServiceUUID]error,
Expand Down Expand Up @@ -184,7 +185,8 @@ func StartRegisteredUserServices(
enclaveUuid,
serviceRegisteredThatCanBeStarted,
existingObjectsAndResources,
kubernetesManager)
kubernetesManager,
restartPolicy)
if err != nil {
return nil, nil, stacktrace.Propagate(err, "An error occurred while trying to start services in parallel.")
}
Expand Down Expand Up @@ -246,6 +248,7 @@ func runStartServiceOperationsInParallel(
services map[service.ServiceUUID]*service.ServiceConfig,
servicesObjectsAndResources map[service.ServiceUUID]*shared_helpers.UserServiceObjectsAndKubernetesResources,
kubernetesManager *kubernetes_manager.KubernetesManager,
restartPolicy apiv1.RestartPolicy,
) (
map[service.ServiceUUID]*service.Service,
map[service.ServiceUUID]error,
Expand All @@ -259,7 +262,8 @@ func runStartServiceOperationsInParallel(
config,
servicesObjectsAndResources,
enclaveUUID,
kubernetesManager)
kubernetesManager,
restartPolicy)
}

successfulServiceObjs, failedOperations := operation_parallelizer.RunOperationsInParallel(startServiceOperations)
Expand Down Expand Up @@ -292,7 +296,8 @@ func createStartServiceOperation(
serviceConfig *service.ServiceConfig,
servicesObjectsAndResources map[service.ServiceUUID]*shared_helpers.UserServiceObjectsAndKubernetesResources,
enclaveUuid enclave.EnclaveUUID,
kubernetesManager *kubernetes_manager.KubernetesManager) operation_parallelizer.Operation {
kubernetesManager *kubernetes_manager.KubernetesManager,
restartPolicy apiv1.RestartPolicy) operation_parallelizer.Operation {

return func() (interface{}, error) {
filesArtifactsExpansion := serviceConfig.GetFilesArtifactsExpansion()
Expand Down Expand Up @@ -413,6 +418,7 @@ func createStartServiceOperation(
podContainers,
podVolumes,
userServiceServiceAccountName,
restartPolicy,
)
if err != nil {
return nil, stacktrace.Propagate(err, "An error occurred creating pod '%v' using image '%v'", podName, containerImageName)
Expand Down