-
Notifications
You must be signed in to change notification settings - Fork 16
Apply resources limits to wls 14.1.1.0 to solve the evicted pod issue. #103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1d8cda8
7dad213
5523347
d84b00c
4f37b4d
a399318
aee82e1
8cd232d
0b1e7c8
0082b26
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
# Copyright (c) 2021, Oracle Corporation and/or its affiliates. | ||
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. | ||
# This script runs on Azure Container Instance with Alpine Linux that Azure Deployment script creates. | ||
# | ||
# Temporary workaround for https://github.com/oracle/weblogic-kubernetes-operator/issues/2693 | ||
# env inputs: | ||
# AKS_CLUSTER_NAME | ||
# AKS_CLUSTER_RESOURCEGROUP_NAME | ||
# WLS_CLUSTER_NAME | ||
# WLS_DOMAIN_UID | ||
|
||
# Main script | ||
script="${BASH_SOURCE[0]}" | ||
scriptDir="$(cd "$(dirname "${script}")" && pwd)" | ||
source ${scriptDir}/common.sh | ||
source ${scriptDir}/utility.sh | ||
|
||
qualityofService="BestEffort" | ||
wlsDomainNS="${WLS_DOMAIN_UID}-ns" | ||
|
||
echo_stdout "install kubectl" | ||
install_kubectl | ||
|
||
echo_stdout "Connect to AKS" | ||
az aks get-credentials \ | ||
--resource-group ${AKS_CLUSTER_RESOURCEGROUP_NAME} \ | ||
--name ${AKS_CLUSTER_NAME} \ | ||
--overwrite-existing | ||
|
||
# we should not run the script in admin pod, as there is no admin pod for slim image. | ||
podNum=$(kubectl -n ${wlsDomainNS} get pod -l weblogic.clusterName=${WLS_CLUSTER_NAME} -o json | jq '.items| length') | ||
if [ ${podNum} -le 0 ]; then | ||
echo_stderr "Ensure your cluster has at least one pod." | ||
exit 1 | ||
fi | ||
|
||
podName=$(kubectl -n ${wlsDomainNS} get pod -l weblogic.clusterName=${WLS_CLUSTER_NAME} -o json \ | ||
| jq '.items[0] | .metadata.name' \ | ||
| tr -d "\"") | ||
|
||
# run `source $ORACLE_HOME/wlserver/server/bin/setWLSEnv.sh > /dev/null 2>&1 && java weblogic.version` to get the version. | ||
# the command will print three lines, with WLS version in the first line. | ||
# use `grep "WebLogic Server" to get the first line. | ||
|
||
# $ source $ORACLE_HOME/wlserver/server/bin/setWLSEnv.sh > /dev/null 2>&1 && java weblogic.version | ||
# WebLogic Server 12.2.1.4.0 Thu Sep 12 04:04:29 GMT 2019 1974621 | ||
# Use 'weblogic.version -verbose' to get subsystem information | ||
# Use 'weblogic.utils.Versions' to get version information for all modules | ||
rawOutput=$(kubectl exec -it ${podName} -n ${wlsDomainNS} -c ${wlsContainerName} \ | ||
-- bash -c 'source $ORACLE_HOME/wlserver/server/bin/setWLSEnv.sh > /dev/null 2>&1 && java weblogic.version | grep "WebLogic Server"') | ||
|
||
# get version from string like "WebLogic Server 12.2.1.4.0 Thu Sep 12 04:04:29 GMT 2019 1974621" | ||
stringArray=($rawOutput) | ||
version=${stringArray[2]} | ||
echo_stdout "WebLogic Server version: ${version}" | ||
|
||
if [ "${version#*14.1.1.0}" != "$version" ]; then | ||
timestampBeforePatchingDomain=$(date +%s) | ||
echo "timestampBeforePatchingDomain=${timestampBeforePatchingDomain}" | ||
|
||
# we assume the customer to create WebLogic Server using the offer or template, | ||
# and specify the same resources requirement for admin server and managed server. | ||
cpuRequest=$(kubectl get domain ${WLS_DOMAIN_UID} -n ${wlsDomainNS} -o json | | ||
jq '. |.spec.serverPod.resources.requests.cpu' | | ||
tr -d "\"") | ||
echo_stdout "Previous CPU request: ${cpuRequest}" | ||
|
||
memoryRequest=$(kubectl get domain ${WLS_DOMAIN_UID} -n ${wlsDomainNS} -o json | | ||
jq '. | .spec.serverPod.resources.requests.memory' | | ||
tr -d "\"") | ||
echo_stdout "Previous memory request: ${memoryRequest}" | ||
|
||
restartVersion=$(kubectl -n ${wlsDomainNS} get domain ${WLS_DOMAIN_UID} -o json | | ||
jq '. | .spec.restartVersion' | | ||
tr -d "\"") | ||
restartVersion=$((restartVersion+1)) | ||
|
||
# check CPU units, set units with "m" | ||
if [[ ${cpuRequest} =~ "m" ]]; then | ||
cpu=$(echo $cpuRequest | sed 's/[^0-9]*//g') | ||
else | ||
cpu=$((cpuRequest * 1000)) | ||
fi | ||
# make sure there is enough CPU limits to run the WebLogic Server | ||
# if the cpu is less than 500m, set it 500m | ||
# the domain configuration will be outputed after the offer deployment finishes. | ||
if [ $cpu -lt 500 ]; then | ||
cpu=500 | ||
fi | ||
|
||
# create patch configuration with YAML file | ||
# keep resources.limits the same with requests | ||
cat <<EOF >patch-resource-limits.yaml | ||
spec: | ||
serverPod: | ||
resources: | ||
requests: | ||
cpu: "${cpu}m" | ||
memory: "${memoryRequest}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I can tell, we are not modifying the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hello @edburns per my observations, 1.5Gi is enough, so will not increase the memory in this workaround. |
||
limits: | ||
cpu: "${cpu}m" | ||
memory: "${memoryRequest}" | ||
configuration: | ||
introspectorJobActiveDeadlineSeconds: ${constIntrospectorJobActiveDeadlineSeconds} | ||
restartVersion: "${restartVersion}" | ||
EOF | ||
echo_stdout "New resource configurations: " | ||
echo_stdout $(cat patch-resource-limits.yaml) | ||
# patch the domain with resource limits | ||
kubectl -n ${wlsDomainNS} patch domain ${WLS_DOMAIN_UID} \ | ||
--type=merge \ | ||
--patch "$(cat patch-resource-limits.yaml)" | ||
|
||
# make sure all of the pods are running correctly. | ||
replicas=$(kubectl -n ${wlsDomainNS} get domain ${WLS_DOMAIN_UID} -o json | | ||
jq '. | .spec.clusters[] | .replicas') | ||
# pod provision will be slower, set larger max attemp. | ||
maxAttemps=$((checkPodStatusMaxAttemps * 2)) | ||
interval=$((checkPodStatusInterval * 2)) | ||
|
||
utility_wait_for_pod_restarted \ | ||
${timestampBeforePatchingDomain} \ | ||
${replicas} \ | ||
"${WLS_DOMAIN_UID}" \ | ||
${maxAttemps} \ | ||
${interval} | ||
|
||
qualityofService="Guaranteed" | ||
fi | ||
|
||
# output the WebLogic Server version and quality of service. | ||
result=$(jq -n -c \ | ||
--arg wlsVersion "$version" \ | ||
--arg qualityofService "$qualityofService" \ | ||
'{wlsVersion: $wlsVersion, qualityofService: $qualityofService}') | ||
echo "result is: $result" | ||
echo $result >$AZ_SCRIPTS_OUTPUT_PATH |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -276,6 +276,7 @@ var const_hasStorageAccount = !createAKSCluster && reference('query-existing-sto | |
var const_identityKeyStoreType = (sslConfigurationAccessOption == const_wlsSSLCertOptionKeyVault) ? sslKeyVaultCustomIdentityKeyStoreType : sslUploadedCustomIdentityKeyStoreType | ||
var const_keyvaultNameFromTag = const_hasTags && contains(resourceGroup().tags, name_tagNameForKeyVault) ? resourceGroup().tags.wlsKeyVault : '' | ||
var const_trustKeyStoreType = (sslConfigurationAccessOption == const_wlsSSLCertOptionKeyVault) ? sslKeyVaultCustomTrustKeyStoreType : sslUploadedCustomTrustKeyStoreType | ||
var const_wlsClusterName = 'cluster-1' | ||
var const_wlsJavaOptions = wlsJavaOption == '' ? 'null' : wlsJavaOption | ||
var const_wlsSSLCertOptionKeyVault = 'keyVaultStoredConfig' | ||
var name_defaultPidDeployment = 'pid' | ||
|
@@ -646,6 +647,29 @@ module datasourceDeployment 'modules/_setupDBConnection.bicep' = if (enableDB) { | |
] | ||
} | ||
|
||
/* | ||
* Temporary workaround for https://github.com/oracle/weblogic-kubernetes-operator/issues/2693 | ||
* Apply resource limits to WebLogic Server 14.1.1.0. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Include this in the comment.
|
||
* The script will check the WebLogic Server version, and apply resource limits to 14.1.1.0. | ||
* The resource limits will be the same with requests. | ||
*/ | ||
module applyGuaranteedQos 'modules/_deployment-scripts/_ds-apply-guaranteed-qos.bicep' = { | ||
name: 'apply-resources-limits-to-wls14' | ||
params:{ | ||
_artifactsLocation: _artifactsLocation | ||
_artifactsLocationSasToken: _artifactsLocationSasToken | ||
aksClusterRGName: ref_wlsDomainDeployment.outputs.aksClusterRGName.value | ||
aksClusterName: ref_wlsDomainDeployment.outputs.aksClusterName.value | ||
identity: identity | ||
location: location | ||
wlsClusterName: const_wlsClusterName | ||
wlsDomainUID: wlsDomainUID | ||
} | ||
dependsOn: [ | ||
datasourceDeployment | ||
] | ||
} | ||
|
||
/* | ||
* To check if all the applciations in WLS cluster become ACTIVE state after all configurations are completed. | ||
* This should be the last step. | ||
|
@@ -664,7 +688,7 @@ module validateApplciations 'modules/_deployment-scripts/_ds-validate-applicatio | |
wlsUserName: wlsUserName | ||
} | ||
dependsOn: [ | ||
datasourceDeployment | ||
applyGuaranteedQos | ||
] | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Copyright (c) 2021, Oracle Corporation and/or its affiliates. | ||
// Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. | ||
|
||
/* This script is to apply Guaranteed Qos by specifying resources.limits | ||
* To solve pod evicted issue in Oracle WebLogic 14.1.1.0. | ||
* The script will promote CPU request and limit to 500m if the CPU request is less than 500m. | ||
*/ | ||
|
||
param _artifactsLocation string = deployment().properties.templateLink.uri | ||
@secure() | ||
param _artifactsLocationSasToken string = '' | ||
|
||
param aksClusterName string = '' | ||
param aksClusterRGName string = '' | ||
|
||
param identity object | ||
param location string | ||
param utcValue string = utcNow() | ||
param wlsClusterName string = 'cluster-1' | ||
param wlsDomainUID string = 'sample-domain1' | ||
|
||
var const_azcliVersion = '2.15.0' | ||
var const_constScript = 'common.sh' | ||
var const_deploymentName = 'ds-apply-guaranteed-qos' | ||
var const_scriptLocation = uri(_artifactsLocation, 'scripts/') | ||
var const_updateQosScript = 'applyGuaranteedQos.sh' | ||
var const_utilityScript = 'utility.sh' | ||
|
||
resource deploymentScript 'Microsoft.Resources/deploymentScripts@2020-10-01' = { | ||
name: const_deploymentName | ||
location: location | ||
kind: 'AzureCLI' | ||
identity: identity | ||
properties: { | ||
azCliVersion: const_azcliVersion | ||
environmentVariables: [ | ||
{ | ||
name: 'AKS_CLUSTER_NAME' | ||
value: aksClusterName | ||
} | ||
{ | ||
name: 'AKS_CLUSTER_RESOURCEGROUP_NAME' | ||
value: aksClusterRGName | ||
} | ||
{ | ||
name: 'WLS_CLUSTER_NAME' | ||
value: wlsClusterName | ||
} | ||
{ | ||
name: 'WLS_DOMAIN_UID' | ||
value: wlsDomainUID | ||
} | ||
] | ||
primaryScriptUri: uri(const_scriptLocation, '${const_updateQosScript}${_artifactsLocationSasToken}') | ||
supportingScriptUris: [ | ||
uri(const_scriptLocation, '${const_constScript}${_artifactsLocationSasToken}') | ||
uri(const_scriptLocation, '${const_utilityScript}${_artifactsLocationSasToken}') | ||
] | ||
cleanupPreference: 'OnSuccess' | ||
retentionInterval: 'P1D' | ||
forceUpdateTag: utcValue | ||
} | ||
} | ||
|
||
output wlsVersion string = string(reference(const_deploymentName).outputs.wlsVersion) | ||
output qualityofService string = string(reference(const_deploymentName).outputs.qualityofService) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please include this comment: