Skip to content

Commit

Permalink
*: some fixes for e2e test on krane-based k8s (#443)
Browse files Browse the repository at this point in the history
**change resources for `deploy/k8s/dev`**
To test the Varlog in a Krane-based cluster, the size of the cluster
should be at least 20 nodes. Recommended numbers are as follows: the
number of MRs is three and the number of SNs is 3 or 4 and the
replication factor is 3.

**daemonset doesn't respect podAntiAffinity**
See kubernetes/kubernetes#29276.
We should attach label `varlog-type=telemetry` to nodes running
jaeger, prometheus, otel-collector and grafana. Daemonset for MR and SN
won't be deployed to those nodes. The e2e testing module ignores nodes
labed with `varlog-type=telemetry`.

Resolves [#VARLOG-509](https://jira.daumkakao.com/browse/VARLOG-509).
  • Loading branch information
ijsong authored and GitHub Enterprise committed Jul 12, 2021
1 parent 852fc4b commit 5b6248c
Show file tree
Hide file tree
Showing 13 changed files with 71 additions and 46 deletions.
6 changes: 3 additions & 3 deletions deploy/k8s/base/jaeger.yaml
Expand Up @@ -49,19 +49,19 @@ spec:
port: 14250
dnsPolicy: ClusterFirst
affinity:
podAffinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: NotIn
operator: In
values:
- "varlog-mr"
- "varlog-sn"
- "varlog-vms"
- "prometheus"
- key: "component"
operator: NotIn
operator: In
values:
- "otel-collector"
topologyKey: "kubernetes.io/hostname"
5 changes: 5 additions & 0 deletions deploy/k8s/base/mr.yaml
Expand Up @@ -86,6 +86,11 @@ spec:
operator: In
values:
- varlog-mr
- key: varlog-type
operator: NotIn
values:
- "telemetry"
# The podAntiAffinity in this is not effective. See https://github.com/kubernetes/kubernetes/issues/29276.
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
Expand Down
4 changes: 2 additions & 2 deletions deploy/k8s/base/otel-collector.yaml
Expand Up @@ -158,12 +158,12 @@ spec:
path: otel-collector-config.yaml
dnsPolicy: ClusterFirst
affinity:
podAffinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: NotIn
operator: In
values:
- "varlog-mr"
- "varlog-sn"
Expand Down
7 changes: 4 additions & 3 deletions deploy/k8s/base/prometheus.yaml
Expand Up @@ -76,18 +76,19 @@ spec:
- name: prometheus-storage-volume
emptyDir: {}
affinity:
podAffinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: NotIn
operator: In
values:
- "varlog-mr"
- "varlog-sn"
- "varlog-vms"
- "jaeger"
- key: "component"
operator: NotIn
operator: In
values:
- "otel-collector"
topologyKey: "kubernetes.io/hostname"
5 changes: 5 additions & 0 deletions deploy/k8s/base/sn.yaml
Expand Up @@ -85,6 +85,11 @@ spec:
operator: In
values:
- varlog-sn
- key: varlog-type
operator: NotIn
values:
- "telemetry"
# The podAntiAffinity in this is not effective. See https://github.com/kubernetes/kubernetes/issues/29276.
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
Expand Down
10 changes: 5 additions & 5 deletions deploy/k8s/dev/jaeger-patch.yaml
Expand Up @@ -6,15 +6,15 @@ metadata:
labels:
app: jaeger
spec:
replicas: 0
replicas: 1
template:
spec:
containers:
- name: jaeger
resources:
limits:
cpu: 1000m
memory: 1Gi
cpu: 4000m
memory: 4Gi
requests:
cpu: 1000m
memory: 1Gi
cpu: 4000m
memory: 4Gi
18 changes: 9 additions & 9 deletions deploy/k8s/dev/otel-agent-patch.yaml
Expand Up @@ -9,12 +9,12 @@ metadata:
spec:
template:
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: type
operator: In
values:
- skip-otel-agent
containers:
- name: otel-agent
resources:
limits:
cpu: 1000m
memory: 1Gi
requests:
cpu: 1000m
memory: 1Gi
10 changes: 5 additions & 5 deletions deploy/k8s/dev/otel-collector-patch.yaml
Expand Up @@ -7,15 +7,15 @@ metadata:
app: opentelemetry
component: otel-collector
spec:
replicas: 0
replicas: 1
template:
spec:
containers:
- name: otel-collector
resources:
limits:
cpu: 1000m
memory: 1Gi
cpu: 4000m
memory: 4Gi
requests:
cpu: 1000m
memory: 1Gi
cpu: 4000m
memory: 4Gi
10 changes: 5 additions & 5 deletions deploy/k8s/dev/prometheus-patch.yaml
Expand Up @@ -6,15 +6,15 @@ metadata:
labels:
app: prometheus
spec:
replicas: 0
replicas: 1
template:
spec:
containers:
- name: prometheus
resources:
limits:
cpu: 1000m
memory: 1Gi
cpu: 4000m
memory: 4Gi
requests:
cpu: 1000m
memory: 1Gi
cpu: 4000m
memory: 4Gi
8 changes: 4 additions & 4 deletions deploy/k8s/dev/varlog-mr-patch.yaml
Expand Up @@ -23,11 +23,11 @@ spec:
- name: varlog-mr
resources:
limits:
cpu: 3000m
memory: 3Gi
cpu: 4000m
memory: 4Gi
requests:
cpu: 3000m
memory: 3Gi
cpu: 4000m
memory: 4Gi
env:
- name: VMR_HOME
value: "/varlog/mr"
Expand Down
8 changes: 4 additions & 4 deletions deploy/k8s/dev/varlog-sn-patch.yaml
Expand Up @@ -23,11 +23,11 @@ spec:
- name: varlog-sn
resources:
limits:
cpu: 3000m
memory: 3000Mi
cpu: 4000m
memory: 4000Mi
requests:
cpu: 3000m
memory: 3000Mi
cpu: 4000m
memory: 4000Mi
env:
- name: VOLUMES
value: "/varlog/sn"
Expand Down
8 changes: 4 additions & 4 deletions test/e2e/e2e_long_test.go
Expand Up @@ -20,11 +20,11 @@ import (
func TestK8sVarlogAppendLongTime(t *testing.T) {
const (
testTimeout = 15 * time.Minute
numRepFactor = 2
numRepFactor = 3
numMRs = 3
numSNs = 2
numLSs = 1
numClients = 50
numSNs = 9
numLSs = 3
numClients = 10
clusterID = types.ClusterID(1)
)

Expand Down
18 changes: 16 additions & 2 deletions test/e2e/k8s_util.go
Expand Up @@ -49,6 +49,9 @@ const (
IngressNginxNamespace = "ingress-nginx"

ENV_REP_FACTOR = "REP_FACTOR"

// telemetry
TelemetryLabelValue = "telemetry"
)

type K8sVarlogPodGetter interface {
Expand Down Expand Up @@ -444,7 +447,10 @@ func (k8s *K8sVarlogCluster) RemoveLabelAll() (err error) {
}

for _, node := range nodes.Items {
if _, ok := node.Labels[TypeLabelKey]; ok {
if labelValue, ok := node.Labels[TypeLabelKey]; ok {
if labelValue == TelemetryLabelValue {
continue
}
if erri := k8s.RemoveLabel(node.GetName(), TypeLabelKey); erri != nil {
err = multierr.Append(err, erri)
}
Expand Down Expand Up @@ -582,6 +588,14 @@ func (k8s *K8sVarlogCluster) clearMRDatas() error {
return err
}

targetNodes := 0
for _, node := range nodes.Items {
if _, ok := node.Labels[TypeLabelKey]; ok {
continue
}
targetNodes++
}

for _, node := range nodes.Items {
if _, ok := node.Labels[TypeLabelKey]; ok {
continue
Expand All @@ -595,7 +609,7 @@ func (k8s *K8sVarlogCluster) clearMRDatas() error {

if err := testutil.CompareWaitErrorWithRetryIntervalN(1000, 10*time.Second, func() (bool, error) {
numPods, err := k8s.numPodsReady(VarlogNamespace, podSelector)
return numPods == len(nodes.Items), errors.Wrap(err, "k8s")
return numPods == targetNodes, errors.Wrap(err, "k8s")
}); err != nil {
return err
}
Expand Down

0 comments on commit 5b6248c

Please sign in to comment.