-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
When an OSD reaches OSD_NEARFULL state, we have to manually increase the PVC volume claim or manually increase the count of OSDs in the device set Added a script auto-grow-storage.sh which will i)automatically increase claim volume ii)automatically add number of OSDs Closes: #6101 Signed-off-by: parth-gr <paarora@redhat.com>
- Loading branch information
Showing
2 changed files
with
295 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,258 @@ | ||
#!/usr/bin/env bash | ||
|
||
############# | ||
# FUNCTIONS # | ||
############# | ||
|
||
function calculateSize() { | ||
local currentsize=$2 | ||
local unit=$1 | ||
rawsizeValue=0 # rawsizeValue is a global variable | ||
|
||
if [[ "$currentsize" == *"Mi" ]] | ||
then | ||
rawSize=$(echo "${currentsize}" | sed -e 's/Mi//g') # rawSize is a global variable | ||
unitSize="Mi" | ||
rawsizeValue=$rawSize | ||
elif [[ "$currentsize" == *"Gi" ]] | ||
then | ||
rawSize=$(echo "${currentsize}" | sed -e 's/Gi//g') | ||
unitSize="Gi" | ||
rawsizeValue=$(( rawSize * 1000 )) | ||
elif [[ "$currentsize" == *"Ti" ]] | ||
then | ||
rawSize=$(echo "${currentsize}" | sed -e 's/Ti//g') | ||
unitSize="Ti" | ||
rawsizeValue=$(( rawSize * 1000000 )) | ||
else | ||
echo "Unknown unit of $unit : ${currentsize}" | ||
echo "Supported units are 'Mi','Gi','Ti'" | ||
exit 1 | ||
fi | ||
} | ||
|
||
function compareSizes() { | ||
local newsize=$1 | ||
local maxsize=$2 | ||
calculateSize newsize "${newsize}" # rawsizeValue is calculated and used for further process | ||
local newsize=$rawsizeValue | ||
calculateSize maxsize "${maxsize}" | ||
local maxsize=$rawsizeValue | ||
if [ "${newsize}" -ge "${maxsize}" ] | ||
then | ||
return "1" | ||
fi | ||
return "0" | ||
} | ||
|
||
function growVertically() { | ||
local growRate=$1 | ||
local pvc=$2 | ||
local ns=$3 | ||
local maxSize=$4 | ||
local currentSize | ||
currentSize=$(kubectl get pvc "${pvc}" -n "${ns}" -o json | jq -r '.spec.resources.requests.storage') | ||
echo "PVC(OSD) current size is ${currentSize} and will be increased by ${growRate}%." | ||
|
||
calculateSize "${pvc}" "${currentSize}" # rawSize is calculated and used for further process | ||
|
||
if ! [[ "${rawSize}" =~ ^[0-9]+$ ]] | ||
then | ||
echo "disk size should be an integer" | ||
else | ||
newSize=$(echo "${rawSize}+(${rawSize} * ${growRate})/100" | bc | cut -f1 -d'.') | ||
if [ "${newSize}" = "${rawSize}" ] | ||
then | ||
newSize=$(( rawSize + 1 )) | ||
echo "New adjusted calculated size for the PVC is ${newSize}${unitSize}" | ||
else | ||
echo "New calculated size for the PVC is ${newSize}${unitSize}" | ||
fi | ||
|
||
compareSizes ${newSize}${unitSize} "${maxSize}" | ||
if [ "1" = $? ] | ||
then | ||
newSize=${maxSize} | ||
echo "Disk has reached it's MAX capacity ${maxSize}, add a new disk to it" | ||
result=$(kubectl patch pvc "${pvc}" -n "${ns}" --type json --patch "[{ "op": "replace", "path": "/spec/resources/requests/storage", "value": ${newSize} }]") | ||
else | ||
result=$(kubectl patch pvc "${pvc}" -n "${ns}" --type json --patch "[{ "op": "replace", "path": "/spec/resources/requests/storage", "value": ""${newSize}""${unitSize}"" }]") | ||
fi | ||
echo "${result}" | ||
fi | ||
} | ||
|
||
function growHorizontally() { | ||
local increaseOSDCount=$1 | ||
local pvc=$2 | ||
local ns=$3 | ||
local maxOSDCount=$4 | ||
local deviceSetName | ||
local cluster="" | ||
local deviceSet="" | ||
local currentOSDCount=0 | ||
local clusterCount=0 | ||
local deviceSetCount=0 | ||
deviceSetName=$(kubectl get pvc "${pvc}" -n "${ns}" -o json | jq -r '.metadata.labels."ceph.rook.io/DeviceSet"') | ||
while [ "$cluster" != "null" ] | ||
do | ||
cluster=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}]") | ||
while [ "$deviceSet" != "null" ] | ||
do | ||
deviceSet=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}].spec.storage.storageClassDeviceSets[${deviceSetCount}].name") | ||
if [[ $deviceSet == "${deviceSetName}" ]] | ||
then | ||
currentOSDCount=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}].spec.storage.storageClassDeviceSets[${deviceSetCount}].count") | ||
finalCount=$(( "${currentOSDCount}" + "${increaseOSDCount}" )) | ||
echo "OSD count: ${currentOSDCount}. OSD count will be increased by ${increaseOSDCount}." | ||
if [ "${finalCount}" -ge "${maxOSDCount}" ] | ||
then | ||
finalCount=${maxOSDCount} | ||
echo "DeviceSet ${deviceSet} capacity is full, cannot add more OSD to it" | ||
fi | ||
echo "Total count of OSDs for deviceset ${deviceSetName} is set to ${finalCount}." | ||
clusterName=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}].metadata.name" ) | ||
result=$(kubectl patch CephCluster "${clusterName}" -n "${ns}" --type json --patch "[{ "op": "replace", "path": "/spec/storage/storageClassDeviceSets/${deviceSetCount}/count", "value": ${finalCount} }]") | ||
echo "${result}" | ||
break | ||
fi | ||
deviceSetCount=$((deviceSetCount+1)) | ||
deviceSet=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}].spec.storage.storageClassDeviceSets[${deviceSetCount}].name") | ||
done | ||
clusterCount=$((clusterCount+1)) | ||
cluster=$(kubectl get CephCluster -n "${ns}" -o json | jq -r ".items[${clusterCount}]") | ||
done | ||
} | ||
|
||
function growOSD(){ | ||
itr=0 | ||
alertmanagerroute=$(kubectl -n rook-ceph -o jsonpath={.status.hostIP} get pod prometheus-rook-prometheus-0) | ||
route=${alertmanagerroute}:30900 | ||
toolbox=$(kubectl get pods -n rook-ceph | grep -i rook-ceph-tools | awk '{ print $1 }') | ||
alerts=$(kubectl exec -it "${toolbox}" -n rook-ceph -- bash -c "curl -s http://${route}/api/v1/alerts") | ||
export total_alerts | ||
total_alerts=$( jq '.data.alerts | length' <<< "${alerts}") | ||
echo "Looping at $(date +"%Y-%m-%d %H:%M:%S")" | ||
|
||
while true | ||
do | ||
if [ "${total_alerts}" == "" ] | ||
then | ||
echo "Alert manager not configured,re-run the script" | ||
exit 1 | ||
fi | ||
export entry | ||
entry=$( jq ".data.alerts[$itr]" <<< "${alerts}") | ||
thename=$(echo "${entry}" | jq -r '.labels.alertname') | ||
if [ "${thename}" = "CephOSDNearFull" ] || [ "${thename}" = "CephOSDCriticallyFull" ] | ||
then | ||
echo "${entry}" | ||
ns=$(echo "${entry}" | jq -r '.labels.namespace') | ||
osdID=$(echo "${entry}" | jq -r '.labels.ceph_daemon') | ||
osdID=${osdID/./-} | ||
pvc=$(kubectl get deployment -n "${ns}" rook-ceph-"${osdID}" -o json | jq -r '.metadata.labels."ceph.rook.io/pvc"') | ||
if [[ $pvc == null ]] | ||
then | ||
echo "PVC not found, script can only run on PVC-based cluster" | ||
exit 1 | ||
fi | ||
echo "Processing NearFull or Full alert for PVC ${pvc} in namespace ${ns}" | ||
if [[ $1 == "count" ]] | ||
then | ||
growHorizontally "$2" "${pvc}" "${ns}" "$3" | ||
else | ||
growVertically "$2" "${pvc}" "${ns}" "$3" | ||
fi | ||
fi | ||
(( itr = itr + 1 )) | ||
if [[ "${itr}" == "${total_alerts}" ]] || [[ "${total_alerts}" == "0" ]] | ||
then | ||
sleep 600 | ||
alerts=$(kubectl exec -it "${toolbox}" -n rook-ceph -- bash -c "curl -s http://${route}/api/v1/alerts") | ||
total_alerts=$( jq '.data.alerts | length' <<< "${alerts}") | ||
itr=0 | ||
echo "Looping at $(date +"%Y-%m-%d %H:%M:%S")" | ||
fi | ||
done | ||
} | ||
|
||
function creatingPrerequisites(){ | ||
echo "creating Prerequisites deployments - Prometheus Operator and Prometheus Instances" | ||
# creating Prometheus operator | ||
kubectl apply -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml | ||
# waitng for Prometheus operator to get ready | ||
timeout 30 sh -c "until [ "$(kubectl get pod -l app.kubernetes.'io/name'=prometheus-operator -o json | jq -r '.items[0].status.phase')" = "Running" ]; do echo 'waiting for prometheus-operator to get created' && sleep 1; done" | ||
# creating a service monitor that will watch the Rook cluster and collect metrics regularly | ||
kubectl create -f https://raw.githubusercontent.com/rook/rook/master/cluster/examples/kubernetes/ceph/monitoring/service-monitor.yaml | ||
# create the PrometheusRule for Rook alerts. | ||
kubectl create -f https://raw.githubusercontent.com/rook/rook/master/cluster/examples/kubernetes/ceph/monitoring/prometheus-ceph-v14-rules.yaml | ||
# create prometheus-rook-prometheus-0 pod | ||
kubectl create -f https://raw.githubusercontent.com/rook/rook/master/cluster/examples/kubernetes/ceph/monitoring/prometheus.yaml | ||
# create prometheus-service | ||
kubectl create -f https://raw.githubusercontent.com/rook/rook/master/cluster/examples/kubernetes/ceph/monitoring/prometheus-service.yaml | ||
# waitng for prometheus-rook-prometheus-0 pod to get ready | ||
timeout 60 sh -c "until [ "$(kubectl get pod -l prometheus=rook-prometheus -nrook-ceph -o json | jq -r '.items[0].status.phase')" = "Running" ]; do echo 'waiting for prometheus-rook-prometheus-0 pod to get created' && sleep 1; done" | ||
echo "Prerequisites deployments created" | ||
} | ||
|
||
function invalidCall(){ | ||
echo " $0 [command] | ||
Available Commands for normal cluster: | ||
./auto-grow-storage.sh count --max maxCount --count rate Scale horizontally by adding more OSDs to the cluster | ||
./auto-grow-storage.sh size --max maxSize --growth-rate percent Scale vertically by increasing the size of existing OSDs | ||
" >&2 | ||
} | ||
|
||
case "${1:-}" in | ||
count) | ||
if [[ $# -ne 5 ]]; then | ||
echo "incorrect command to run the script" | ||
invalidCall | ||
exit 1 | ||
fi | ||
max=$3 | ||
count=$5 | ||
if ! [[ "${max}" =~ ^[0-9]+$ ]] | ||
then | ||
echo "maxCount should be an integer" | ||
invalidCall | ||
exit 1 | ||
fi | ||
if ! [[ "${count}" =~ ^[0-9]+$ ]] | ||
then | ||
echo "rate should be an integer" | ||
invalidCall | ||
exit 1 | ||
fi | ||
creatingPrerequisites | ||
echo "Adding on nearfull and full alert and number of OSD to add is ${count}" | ||
growOSD count "${count}" "${max}" | ||
;; | ||
size) | ||
if [[ $# -ne 5 ]]; then | ||
echo "incorrect command to run the script" | ||
invalidCall | ||
exit 1 | ||
fi | ||
max=$3 | ||
growRate=$5 | ||
if [[ "${max}" =~ ^[0-9]+$ ]] | ||
then | ||
echo "maxSize should be an string" | ||
invalidCall | ||
exit 1 | ||
fi | ||
if ! [[ "${growRate}" =~ ^[0-9]+$ ]] | ||
then | ||
echo "growth-rate should be an integer" | ||
invalidCall | ||
exit 1 | ||
fi | ||
creatingPrerequisites | ||
echo "Resizing on nearfull and full alert and Expansion percentage set to ${growRate}%" | ||
growOSD size "${growRate}" "${max}" | ||
;; | ||
*) | ||
invalidCall | ||
;; | ||
esac |