Skip to content

Commit

Permalink
[cherrypick for 1.13.2] (#317)
Browse files Browse the repository at this point in the history
* chore(aut-check): skip autStatus check if appinfo not provided (#304)

Signed-off-by: shubhamchaudhary <shubham@chaosnative.com>

* chore(n/w-chaos): Handling the unknown hosts (#302)

Signed-off-by: shubhamchaudhary <shubham.chaudhary@mayadata.io>

* chore(signal): Adding signal for the crio/containerd runtime (#306)

Signed-off-by: shubhamchaudhary <shubham@chaosnative.com>

* chore(disk-fill): Addding option to specify ephemeral storage(Mibi) explictly via env (#313)

Signed-off-by: shubhamchaudhary <shubham@chaosnative.com>

* refactor(kafka-broker-pod-failure): Refactor the kafka broker pod failure (#309)

Signed-off-by: shubhamchaudhary <shubham@chaosnative.com>

* Remove init container for changing permission of the container runtime socket (#315)

* Removed init container from container-kill and network-chaos experiments

Signed-off-by: Radu Domnu <rdomnu@redhat.com>

* Running crictl/docker commands with sudo

Signed-off-by: Radu Domnu <rdomnu@redhat.com>

* Removed init container for network experiments

Signed-off-by: Radu Domnu <rdomnu@redhat.com>

* chore(k8sProbe): Updating the k8s probe schema (#308)

Signed-off-by: shubhamchaudhary <shubham@chaosnative.com>

* api response time updated for http probe (#307)

Signed-off-by: oumkale <oum.kale@mayadata.io>

* chore(container-status): checking only target container status (#303)

Signed-off-by: shubhamchaudhary <shubham@chaosnative.com>

* enhance(aws-ec2): Add support for terminating node of a cluster with self-managed nodegroup (#298)

* enhanc(aws-ec2): Add support for terminating node of a cluster with self-managed nodegroup

Signed-off-by: udit <udit@chaosnative.com>

Co-authored-by: Radu Domnu <39598837+radudd@users.noreply.github.com>
Co-authored-by: OUM NIVRATHI KALE <oum.kale@mayadata.io>
Co-authored-by: Udit Gaurav <35391335+uditgaurav@users.noreply.github.com>
  • Loading branch information
4 people committed Mar 15, 2021
1 parent 32438d8 commit 7e0b0cf
Show file tree
Hide file tree
Showing 83 changed files with 652 additions and 658 deletions.
16 changes: 12 additions & 4 deletions chaoslib/litmus/container-kill/helper/container-kill.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func KillContainer(experimentsDetails *experimentTypes.ExperimentDetails, client
return err
}
case "containerd", "crio":
if err := StopContainerdContainer(containerID, experimentsDetails.SocketPath); err != nil {
if err := StopContainerdContainer(containerID, experimentsDetails.SocketPath, experimentsDetails.Signal); err != nil {
return err
}
default:
Expand Down Expand Up @@ -152,10 +152,18 @@ func GetContainerID(experimentsDetails *experimentTypes.ExperimentDetails, clien
}

//StopContainerdContainer kill the application container
func StopContainerdContainer(containerID, socketPath string) error {
func StopContainerdContainer(containerID, socketPath, signal string) error {
var errOut bytes.Buffer
var cmd *exec.Cmd
endpoint := "unix://" + socketPath
cmd := exec.Command("crictl", "-i", endpoint, "-r", endpoint, "stop", string(containerID))
switch signal {
case "SIGKILL":
cmd = exec.Command("sudo", "crictl", "-i", endpoint, "-r", endpoint, "stop", "--timeout=0", string(containerID))
case "SIGTERM":
cmd = exec.Command("sudo", "crictl", "-i", endpoint, "-r", endpoint, "stop", string(containerID))
default:
return errors.Errorf("{%v} signal not supported, use either SIGTERM or SIGKILL", signal)
}
cmd.Stderr = &errOut
if err := cmd.Run(); err != nil {
return errors.Errorf("Unable to run command, err: %v; error output: %v", err, errOut.String())
Expand All @@ -167,7 +175,7 @@ func StopContainerdContainer(containerID, socketPath string) error {
func StopDockerContainer(containerID, socketPath, signal string) error {
var errOut bytes.Buffer
host := "unix://" + socketPath
cmd := exec.Command("docker", "--host", host, "kill", string(containerID), "--signal", signal)
cmd := exec.Command("sudo", "docker", "--host", host, "kill", string(containerID), "--signal", signal)
cmd.Stderr = &errOut
if err := cmd.Run(); err != nil {
return errors.Errorf("Unable to run command, err: %v; error output: %v", err, errOut.String())
Expand Down
20 changes: 0 additions & 20 deletions chaoslib/litmus/container-kill/lib/container-kill.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,26 +272,6 @@ func CreateHelperPod(experimentsDetails *experimentTypes.ExperimentDetails, clie
},
},
},
InitContainers: []apiv1.Container{
{
Name: "setup-" + experimentsDetails.ExperimentName,
Image: experimentsDetails.LIBImage,
ImagePullPolicy: apiv1.PullPolicy(experimentsDetails.LIBImagePullPolicy),
Command: []string{
"/bin/bash",
"-c",
"sudo chmod 777 " + experimentsDetails.SocketPath,
},
Resources: experimentsDetails.Resources,
Env: GetPodEnv(experimentsDetails, podName),
VolumeMounts: []apiv1.VolumeMount{
{
Name: "cri-socket",
MountPath: experimentsDetails.SocketPath,
},
},
},
},
Containers: []apiv1.Container{
{
Name: experimentsDetails.ExperimentName,
Expand Down
59 changes: 33 additions & 26 deletions chaoslib/litmus/disk-fill/helper/disk-fill.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,26 +59,13 @@ func main() {

//DiskFill contains steps to inject disk-fill chaos
func DiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails) error {
// GetEphemeralStorageAttributes derive the ephemeral storage attributes from the target container
ephemeralStorageLimit, ephemeralStorageRequest, err := GetEphemeralStorageAttributes(experimentsDetails, clients)
if err != nil {
return err
}

// Derive the container id of the target container
containerID, err := GetContainerID(experimentsDetails, clients)
if err != nil {
return err
}

log.InfoWithValues("[Info]: Details of application under chaos injection", logrus.Fields{
"PodName": experimentsDetails.TargetPods,
"ContainerName": experimentsDetails.TargetContainer,
"ephemeralStorageLimit": ephemeralStorageLimit,
"ephemeralStorageRequest": ephemeralStorageRequest,
"ContainerID": containerID,
})

// derive the used ephemeral storage size from the target container
du := fmt.Sprintf("sudo du /diskfill/%v", containerID)
cmd := exec.Command("/bin/bash", "-c", du)
Expand All @@ -94,12 +81,29 @@ func DiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clients cli
if err != nil {
return errors.Errorf("Unable to filter used ephemeral storage size, err: %v", err)
}
log.Infof("used ephemeral storage space: %v", strconv.Itoa(usedEphemeralStorageSize))
log.Infof("used ephemeral storage space: %vKB", strconv.Itoa(usedEphemeralStorageSize))

// GetEphemeralStorageAttributes derive the ephemeral storage attributes from the target container
ephemeralStorageLimit, err := GetEphemeralStorageAttributes(experimentsDetails, clients)
if err != nil {
return err
}

if ephemeralStorageLimit == 0 && experimentsDetails.EphemeralStorageMebibytes == 0 {
return errors.Errorf("Either provide ephemeral storage limit inside target container or define EPHEMERAL_STORAGE_MEBIBYTES ENV")
}

// deriving the ephemeral storage size to be filled
sizeTobeFilled := GetSizeToBeFilled(experimentsDetails, usedEphemeralStorageSize, int(ephemeralStorageLimit))

log.Infof("ephemeral storage size to be filled: %v", strconv.Itoa(sizeTobeFilled))
log.InfoWithValues("[Info]: Details of application under chaos injection", logrus.Fields{
"PodName": experimentsDetails.TargetPods,
"ContainerName": experimentsDetails.TargetContainer,
"ephemeralStorageLimit(KB)": ephemeralStorageLimit,
"ContainerID": containerID,
})

log.Infof("ephemeral storage size to be filled: %vKB", strconv.Itoa(sizeTobeFilled))

// record the event inside chaosengine
if experimentsDetails.EngineName != "" {
Expand Down Expand Up @@ -174,32 +178,27 @@ func DiskFill(experimentsDetails *experimentTypes.ExperimentDetails, clients cli
}

// GetEphemeralStorageAttributes derive the ephemeral storage attributes from the target pod
func GetEphemeralStorageAttributes(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) (int64, int64, error) {
func GetEphemeralStorageAttributes(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets) (int64, error) {

pod, err := clients.KubeClient.CoreV1().Pods(experimentsDetails.AppNS).Get(experimentsDetails.TargetPods, v1.GetOptions{})

if err != nil {
return 0, 0, err
return 0, err
}

var ephemeralStorageLimit, ephemeralStorageRequest int64
var ephemeralStorageLimit int64
containers := pod.Spec.Containers

// Extracting ephemeral storage limit & requested value from the target container
// It will be in the form of Kb
for _, container := range containers {
if container.Name == experimentsDetails.TargetContainer {
ephemeralStorageLimit = container.Resources.Limits.StorageEphemeral().ToDec().ScaledValue(resource.Kilo)
ephemeralStorageRequest = container.Resources.Requests.StorageEphemeral().ToDec().ScaledValue(resource.Kilo)
break
}
}

if ephemeralStorageRequest == 0 || ephemeralStorageLimit == 0 {
return 0, 0, fmt.Errorf("No Ephemeral storage details found inside %v container", experimentsDetails.TargetContainer)
}

return ephemeralStorageLimit, ephemeralStorageRequest, nil
return ephemeralStorageLimit, nil
}

// GetContainerID derive the container id of the target container
Expand Down Expand Up @@ -242,9 +241,16 @@ func FilterUsedEphemeralStorage(ephemeralStorageDetails string) (int, error) {

// GetSizeToBeFilled generate the ephemeral storage size need to be filled
func GetSizeToBeFilled(experimentsDetails *experimentTypes.ExperimentDetails, usedEphemeralStorageSize int, ephemeralStorageLimit int) int {
var requirementToBeFill int

switch ephemeralStorageLimit {
case 0:
requirementToBeFill = experimentsDetails.EphemeralStorageMebibytes * 1024
default:
// deriving size need to be filled from the used size & requirement size to fill
requirementToBeFill = (ephemeralStorageLimit * experimentsDetails.FillPercentage) / 100
}

// deriving size need to be filled from the used size & requirement size to fill
requirementToBeFill := (ephemeralStorageLimit * experimentsDetails.FillPercentage) / 100
needToBeFilled := requirementToBeFill - usedEphemeralStorageSize
return needToBeFilled
}
Expand Down Expand Up @@ -288,6 +294,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails, name string) {
experimentDetails.ChaosUID = clientTypes.UID(Getenv("CHAOS_UID", ""))
experimentDetails.ChaosPodName = Getenv("POD_NAME", "")
experimentDetails.FillPercentage, _ = strconv.Atoi(Getenv("FILL_PERCENTAGE", ""))
experimentDetails.EphemeralStorageMebibytes, _ = strconv.Atoi(Getenv("EPHEMERAL_STORAGE_MEBIBYTES", ""))
}

// Getenv fetch the env and set the default value, if any
Expand Down
19 changes: 10 additions & 9 deletions chaoslib/litmus/disk-fill/lib/disk-fill.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,15 +283,16 @@ func GetPodEnv(experimentsDetails *experimentTypes.ExperimentDetails, podName st

var envVar []apiv1.EnvVar
ENVList := map[string]string{
"APP_NS": experimentsDetails.AppNS,
"APP_POD": podName,
"APP_CONTAINER": experimentsDetails.TargetContainer,
"TOTAL_CHAOS_DURATION": strconv.Itoa(experimentsDetails.ChaosDuration),
"CHAOS_NAMESPACE": experimentsDetails.ChaosNamespace,
"CHAOS_ENGINE": experimentsDetails.EngineName,
"CHAOS_UID": string(experimentsDetails.ChaosUID),
"EXPERIMENT_NAME": experimentsDetails.ExperimentName,
"FILL_PERCENTAGE": strconv.Itoa(experimentsDetails.FillPercentage),
"APP_NS": experimentsDetails.AppNS,
"APP_POD": podName,
"APP_CONTAINER": experimentsDetails.TargetContainer,
"TOTAL_CHAOS_DURATION": strconv.Itoa(experimentsDetails.ChaosDuration),
"CHAOS_NAMESPACE": experimentsDetails.ChaosNamespace,
"CHAOS_ENGINE": experimentsDetails.EngineName,
"CHAOS_UID": string(experimentsDetails.ChaosUID),
"EXPERIMENT_NAME": experimentsDetails.ExperimentName,
"FILL_PERCENTAGE": strconv.Itoa(experimentsDetails.FillPercentage),
"EPHEMERAL_STORAGE_MEBIBYTES": strconv.Itoa(experimentsDetails.EphemeralStorageMebibytes),
}
for key, value := range ENVList {
var perEnv apiv1.EnvVar
Expand Down
36 changes: 19 additions & 17 deletions chaoslib/litmus/ec2-terminate/lib/ec2-terminate.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,6 @@ func InjectEC2Terminate(experimentsDetails *experimentTypes.ExperimentDetails, c
return errors.Errorf("ec2 instance failed to stop, err: %v", err)
}

//Wait for ec2 instance to completely stop
log.Info("[Wait]: Wait for EC2 instance to come in stopped state")
if err = WaitForEC2Down(experimentsDetails); err != nil {
return errors.Errorf("unable to stop the ec2 instance, err: %v", err)
}

// run the probes during chaos
if len(resultDetails.ProbeDetails) != 0 {
if err = probe.RunProbes(chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil {
Expand All @@ -54,16 +48,12 @@ func InjectEC2Terminate(experimentsDetails *experimentTypes.ExperimentDetails, c
time.Sleep(time.Duration(experimentsDetails.ChaosDuration) * time.Second)

//Starting the EC2 instance
log.Info("[Chaos]: Starting back the EC2 instance")
err = EC2Start(experimentsDetails)
if err != nil {
return errors.Errorf("ec2 instance failed to start, err: %v", err)
}

//Wait for ec2 instance to come in running state
log.Info("[Wait]: Wait for EC2 instance to get in running state")
if err = WaitForEC2Up(experimentsDetails); err != nil {
return errors.Errorf("unable to start the ec2 instance, err: %v", err)
if experimentsDetails.ManagedNodegroup != "enable" {
log.Info("[Chaos]: Starting back the EC2 instance")
err = EC2Start(experimentsDetails)
if err != nil {
return errors.Errorf("ec2 instance failed to start, err: %v", err)
}
}

//Waiting for the ramp time after chaos injection
Expand Down Expand Up @@ -109,6 +99,12 @@ func EC2Stop(experimentsDetails *experimentTypes.ExperimentDetails) error {
"InstanceId": *result.StoppingInstances[0].InstanceId,
})

//Wait for ec2 instance to completely stop
log.Info("[Wait]: Wait for EC2 instance to come in stopped state")
if err = WaitForEC2Down(experimentsDetails); err != nil {
return errors.Errorf("unable to stop the ec2 instance, err: %v", err)
}

return nil
}

Expand Down Expand Up @@ -148,6 +144,12 @@ func EC2Start(experimentsDetails *experimentTypes.ExperimentDetails) error {
"InstanceId": *result.StartingInstances[0].InstanceId,
})

//Wait for ec2 instance to come in running state
log.Info("[Wait]: Wait for EC2 instance to get in running state")
if err = WaitForEC2Up(experimentsDetails); err != nil {
return errors.Errorf("unable to start the ec2 instance, err: %v", err)
}

return nil
}

Expand All @@ -164,7 +166,7 @@ func WaitForEC2Down(experimentsDetails *experimentTypes.ExperimentDetails) error
if err != nil {
return errors.Errorf("failed to get the instance status")
}
if instanceState != "stopped" {
if (experimentsDetails.ManagedNodegroup != "enable" && instanceState != "stopped") || (experimentsDetails.ManagedNodegroup == "enable" && instanceState != "terminated") {
log.Infof("The instance state is %v", instanceState)
return errors.Errorf("instance is not yet in stopped state")
}
Expand Down
Loading

0 comments on commit 7e0b0cf

Please sign in to comment.