Skip to content

Commit

Permalink
Merge branch 'OWLS-109579' into 'main'
Browse files Browse the repository at this point in the history
OWLS-109579 - Potential fixes for issues observed in CAGBU environment with large K8s cluster.

See merge request weblogic-cloud/weblogic-kubernetes-operator!4271
  • Loading branch information
rjeberhard committed Jun 7, 2023
2 parents c6bb1c0 + f45b235 commit 326ec06
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ public void completeProcessing(Packet packet) {
.forEach(info -> adjustClusterResources(c, info)));
executeMakeRightForClusterEvents(dp);
getActiveDomainPresenceInfos().forEach(info -> activateDomain(dp, info));
getDomainPresenceInfoMap().values().forEach(DomainResourcesValidation.this::removeDeletedPodsFromDPI);
getDomainPresenceInfoMap().values().forEach(DomainPresenceInfo::clearServerPodNamesFromList);
}
};
}
Expand Down Expand Up @@ -134,15 +136,11 @@ private boolean isForDomain(ClusterResource clusterResource, DomainPresenceInfo
}

private void addPodList(V1PodList list) {
getDomainPresenceInfoMap().values().forEach(dpi -> removeDeletedPodsFromDPI(list, dpi));
list.getItems().forEach(this::addPod);
}

private void removeDeletedPodsFromDPI(V1PodList list, DomainPresenceInfo dpi) {
Collection<String> serverNamesFromPodList = list.getItems().stream()
.map(PodHelper::getPodServerName).collect(Collectors.toList());

dpi.getServerNames().stream().filter(s -> !serverNamesFromPodList.contains(s)).collect(Collectors.toList())
private void removeDeletedPodsFromDPI(DomainPresenceInfo dpi) {
dpi.getServerNames().stream().filter(s -> !dpi.getServerNamesFromPodList().contains(s)).collect(Collectors.toList())
.forEach(name -> dpi.deleteServerPodFromEvent(name, null));
}

Expand All @@ -157,11 +155,14 @@ private void addOperatorEventList(CoreV1EventList list) {
private void addPod(V1Pod pod) {
String domainUid = PodHelper.getPodDomainUid(pod);
String serverName = PodHelper.getPodServerName(pod);
DomainPresenceInfo info = getExistingDomainPresenceInfo(domainUid);
Optional.ofNullable(info).ifPresent(i -> i.addServerNameFromPodList(serverName));

if (domainUid != null && serverName != null) {
setServerPodFromEvent(getExistingDomainPresenceInfo(domainUid), serverName, pod);
setServerPodFromEvent(info, serverName, pod);
}
if (PodHelper.getPodLabel(pod, LabelConstants.JOBNAME_LABEL) != null) {
processor.updateDomainStatus(pod, getExistingDomainPresenceInfo(domainUid));
processor.updateDomainStatus(pod, info);
}
}

Expand Down Expand Up @@ -243,14 +244,14 @@ private void addClusterList(ClusterList list) {
}

private void addCluster(ClusterResource cluster) {
ClusterPresenceInfo cachedInfo = getClusterPresenceInfoMap().get(cluster.getClusterName());
ClusterPresenceInfo cachedInfo = getClusterPresenceInfoMap().get(getClusterName(cluster));
if (cachedInfo == null) {
newClusterNames.add(cluster.getClusterName());
newClusterNames.add(getClusterName(cluster));
} else if (cluster.isGenerationChanged(cachedInfo.getCluster())) {
modifiedClusterNames.add(cluster.getClusterName());
modifiedClusterNames.add(getClusterName(cluster));
}

getClusterPresenceInfoMap().put(cluster.getClusterName(), new ClusterPresenceInfo(cluster));
getClusterPresenceInfoMap().put(getClusterName(cluster), new ClusterPresenceInfo(cluster));
}

private Stream<DomainPresenceInfo> getStrandedDomainPresenceInfos(DomainProcessor dp) {
Expand Down Expand Up @@ -305,18 +306,22 @@ private EventItem getEventItem(DomainPresenceInfo info) {
}

private EventItem getEventItem(ClusterResource cluster) {
if (newClusterNames.contains(cluster.getClusterName()) || cluster.getStatus() == null) {
if (newClusterNames.contains(getClusterName(cluster)) || cluster.getStatus() == null) {
return CLUSTER_CREATED;
}
if (modifiedClusterNames.contains(cluster.getClusterName())) {
if (modifiedClusterNames.contains(getClusterName(cluster))) {
return CLUSTER_CHANGED;
}
return null;
}

private String getClusterName(ClusterResource cluster) {
return cluster.getMetadata().getName();
}

private void updateCluster(DomainProcessor dp, ClusterResource cluster, EventItem eventItem) {
List<DomainPresenceInfo> list =
dp.getExistingDomainPresenceInfoForCluster(cluster.getNamespace(), cluster.getClusterName());
dp.getExistingDomainPresenceInfoForCluster(cluster.getNamespace(), getClusterName(cluster));
if (list.isEmpty()) {
createAndExecuteMakeRightOperation(dp, cluster, eventItem, null);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ public class DomainPresenceInfo extends ResourcePresenceInfo {
private String adminServerName;

private final List<String> validationWarnings = Collections.synchronizedList(new ArrayList<>());
private final List<String> serverNamesFromPodList = Collections.synchronizedList(new ArrayList<>());
private Map<String, Step.StepAndPacket> serversToRoll = Collections.emptyMap();

/**
Expand Down Expand Up @@ -850,6 +851,36 @@ public void addValidationWarning(String validationWarning) {
validationWarnings.add(validationWarning);
}

/**
* Return server Pod names from List operation.
*/
public List<String> getServerNamesFromPodList() {
return serverNamesFromPodList;
}

/**
* Add server Pod names from List operation.
* @param podNames pod names to be added
*/
public void addServerNamesFromPodList(Collection<String> podNames) {
serverNamesFromPodList.addAll(podNames);
}

/**
* Add server Pod name from List operation.
* @param podName pod name to be added
*/
public void addServerNameFromPodList(String podName) {
serverNamesFromPodList.add(podName);
}

/**
* Clear server Pod names from List operation.
*/
public void clearServerPodNamesFromList() {
serverNamesFromPodList.clear();
}

/**
* Returns the names of the servers which are supposed to be running.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package oracle.kubernetes.operator.makeright;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
Expand Down Expand Up @@ -480,11 +479,8 @@ public Consumer<V1PodList> getPodListProcessing() {
}

private void processList(V1PodList list) {
Collection<String> serverNamesFromPodList = list.getItems().stream()
.map(PodHelper::getPodServerName).collect(Collectors.toList());

info.getServerNames().stream().filter(s -> !serverNamesFromPodList.contains(s)).collect(Collectors.toList())
.forEach(name -> info.deleteServerPodFromEvent(name, null));
info.addServerNamesFromPodList(list.getItems().stream()
.map(PodHelper::getPodServerName).collect(Collectors.toList()));
list.getItems().forEach(this::addPod);
}

Expand All @@ -510,6 +506,14 @@ public Consumer<V1PodDisruptionBudgetList> getPodDisruptionBudgetListProcessing(
private void addPodDisruptionBudget(V1PodDisruptionBudget pdb) {
PodDisruptionBudgetHelper.addToPresence(info, pdb);
}

@Override
public void completeProcessing(Packet packet) {
info.getServerNames().stream().filter(
s -> !info.getServerNamesFromPodList().contains(s)).collect(Collectors.toList())
.forEach(name -> info.deleteServerPodFromEvent(name, null));
info.clearServerPodNamesFromList();
}
};

return executor.createNamespacedResourceSteps(processor, info, delegate.getDomainNamespaces());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import static oracle.kubernetes.operator.helpers.EventHelper.EventItem.DOMAIN_CREATED;
import static oracle.kubernetes.operator.helpers.KubernetesTestSupport.CLUSTER;
import static oracle.kubernetes.operator.helpers.KubernetesTestSupport.DOMAIN;
import static oracle.kubernetes.operator.tuning.TuningParameters.DEFAULT_CALL_LIMIT;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasKey;
Expand All @@ -81,6 +82,9 @@ class DomainPresenceTest extends ThreadFactoryTestBase {
// Call builder tuning
public static final int CALL_REQUEST_LIMIT = 10;
private static final int LAST_DOMAIN_NUM = 2 * CALL_REQUEST_LIMIT - 1;
/** More than one chunk's worth of pods. */
private static final int MULTICHUNK_LAST_POD_NUM = 2 * DEFAULT_CALL_LIMIT - 1;

public static final String CLUSTER_1 = "cluster1";
public static final String CLUSTER_2 = "cluster2";
public static final String CLUSTER_3 = "cluster3";
Expand Down Expand Up @@ -432,6 +436,23 @@ void whenK8sHasOneDomainWithPod_recordPodPresence() {
assertThat(getDomainPresenceInfo(dp, UID1).getServerPod("admin"), equalTo(pod));
}

@Test
void whenK8sDomainWithMoreThanCallRequestLimitNumberOfPods_recordPodsPresence() {
addDomainResource(UID1, NS);
V1Pod pod = createPodResource(UID1, NS, "admin");
testSupport.defineResources(pod);
createPodResources(UID1, NS, MULTICHUNK_LAST_POD_NUM);

dp.domains.computeIfAbsent(NS, k -> new ConcurrentHashMap<>()).put(UID1, info);

testSupport.addComponent("DP", DomainProcessor.class, dp);
testSupport.runSteps(domainNamespaces.readExistingResources(NS, dp));

assertThat(getDomainPresenceInfo(dp, UID1).getServerPod("managed-server1"), notNullValue());
assertThat(getDomainPresenceInfo(dp, UID1).getServerPod("managed-server" + MULTICHUNK_LAST_POD_NUM),
notNullValue());
}

@Test
void whenK8sHasOneDomainWithPodButMissingInfo_dontRecordPodPresence() {
addDomainResource(UID1, NS);
Expand Down Expand Up @@ -479,6 +500,14 @@ private void addPodResource(String uid, String namespace, String serverName) {
testSupport.defineResources(createPodResource(uid, namespace, serverName));
}

private void createPodResources(String uid, String namespace, int lastPodNum) {
IntStream.rangeClosed(1, lastPodNum)
.boxed()
.map(i -> "managed-server" + i)
.map(s -> createPodResource(uid, namespace, s))
.forEach(testSupport::defineResources);
}

@Test
void whenK8sHasOneDomainWithOtherEvent_ignoreIt() {
addDomainResource(UID1, NS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,22 @@ void whenClusterChanged_generateClusterChangedEvent() {
assertThat(getEventsForSeason(CLUSTER_CHANGED.getReason()), not(empty()));
}

@Test
void whenClusterResourceWithDifferentMetadataNameAndSpecNameChanged_generateClusterChangedEvent() {
ClusterStatus status = new ClusterStatus().withClusterName(CLUSTER4);
ClusterResource cluster1 = createClusterWithDifferentMetadataAndSpecName(CLUSTER4, NS).withStatus(status);
ClusterPresenceInfo info = new ClusterPresenceInfo(cluster1);
processor.registerClusterPresenceInfo(info);
ClusterResource cluster2 = createClusterWithDifferentMetadataAndSpecName(CLUSTER4, NS).withStatus(status);
cluster2.getMetadata().setGeneration(1234L);
testSupport.defineResources(cluster2);

testSupport.runSteps(domainNamespaces.readExistingResources(NS, processor));

assertThat(testSupport, hasEvent(CLUSTER_CHANGED.getReason()));
assertThat(getEventsForSeason(CLUSTER_CHANGED.getReason()), not(empty()));
}

private List<Object> getEventsForSeason(String reason) {
return testSupport.getResources(EVENT).stream()
.filter(e -> ((CoreV1Event)e).getReason().equals(reason)).collect(Collectors.toList());
Expand Down Expand Up @@ -1525,6 +1541,12 @@ private ClusterResource createClusterAlone(String clusterName, String ns) {
.spec(new ClusterSpec().withClusterName(clusterName));
}

private ClusterResource createClusterWithDifferentMetadataAndSpecName(String clusterMetadataName, String ns) {
return new ClusterResource()
.withMetadata(new V1ObjectMeta().name(clusterMetadataName).namespace(ns))
.spec(new ClusterSpec().withClusterName("specClusterName-" + clusterMetadataName));
}

private V1Service createNonOperatorService() {
return new V1Service()
.metadata(
Expand Down

0 comments on commit 326ec06

Please sign in to comment.