Skip to content

Commit

Permalink
ovn: fix corrupted database file on start (#3112)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangzujian committed Aug 7, 2023
1 parent 02f8c63 commit cd1202c
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 39 deletions.
4 changes: 3 additions & 1 deletion dist/images/Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ RUN cd /usr/src/ && \
# Add jitter parameter patch for netem qos
curl -s https://github.com/kubeovn/ovs/commit/2eaaf89fbf3ee2172719ed10d045fd79900edc8e.patch | git apply && \
# fix memory leak in qos
curl -s https://github.com/kubeovn/ovs/commit/6a4dd2f4b9311a227cc26fef7c398ae9b241311b.patch | git apply
curl -s https://github.com/kubeovn/ovs/commit/6a4dd2f4b9311a227cc26fef7c398ae9b241311b.patch | git apply && \
# ovsdb-tool: add command fix-cluster
curl -s https://github.com/kubeovn/ovs/commit/f52c239f5ded40b503e4d217f916b46ca413da4c.patch | git apply

RUN cd /usr/src/ && git clone -b branch-22.12 --depth=1 https://github.com/ovn-org/ovn.git && \
cd ovn && \
Expand Down
79 changes: 41 additions & 38 deletions dist/images/start-db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -141,54 +141,57 @@ function ovn_db_pre_start() {

local db_file="/etc/ovn/ovn${1}_db.db"
if [ -e "$db_file" ]; then
if ovsdb-tool db-is-clustered "$db_file"; then
if ovsdb-tool db-is-clustered "$db_file"; then
local msg=$(ovsdb-tool check-cluster "$db_file" 2>&1) || true
if echo $msg | grep -q 'has not joined the cluster'; then
local birth_time=$(stat --format=%W $db_file)
local now=$(date +%s)
if [ $(($now - $birth_time)) -ge 120 ]; then
echo "ovn db file $db_file exists for more than 120s, removing it..."
echo "ovn db file $db_file exists for more than 120s, remove it."
rm -f "$db_file" || return 1
fi
return
fi

if ! ovsdb-tool check-cluster "$db_file"; then
echo "detected database corruption for file $db_file, rebuild it."
local sid=$(ovsdb-tool db-sid "$db_file")
if ! echo -n "$sid" | grep -qE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then
echo "failed to get sid from $1 db file $db_file"
return 1
fi
echo "get local server id $sid"

eval port="\$${db_eval}_CLUSTER_PORT"
local local_addr="$(gen_conn_addr $DB_CLUSTER_ADDR $port)"
echo "local address: $local_addr"

local remote_addr=()
local node_ips=$(echo -n "${NODE_IPS}" | sed 's/,/ /g')
for node_ip in ${node_ips[*]}; do
if [ ! "$node_ip" = "$DB_CLUSTER_ADDR" ]; then
remote_addr=(${remote_addr[*]} "$(gen_conn_addr $node_ip $port)")
fi
done
echo "remote addresses: ${remote_addr[*]}"

local db_new="$db_file.init-$(date +%s)-$(random_str)"
echo "generating new database file $db_new"
if [ ${#remote_addr[*]} -ne 0 ]; then
ovsdb-tool --sid $sid join-cluster "$db_new" $db $local_addr ${remote_addr[*]} || return 1

local db_bak="$db_file.backup-$(date +%s)-$(random_str)"
echo "backup $db_file to $db_bak"
mv "$db_file" "$db_bak" || return 1

echo "use new database file $db_new"
mv "$db_new" "$db_file"
fi
fi
fi
if ! ovsdb-tool check-cluster "$db_file"; then
local db_bak="$db_file.backup-$(date +%s)-$(random_str)"
echo "backup $db_file to $db_bak"
cp "$db_file" "$db_bak" || return 1

echo "detected database corruption for file $db_file, try to fix it."
if ! ovsdb-tool fix-cluster "$db_file"; then
echo "failed to fix database file $db_file, rebuild it."
local sid=$(ovsdb-tool db-sid "$db_file")
if ! echo -n "$sid" | grep -qE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then
echo "failed to get sid from db file $db_file"
return 1
fi
echo "get local server id $sid"

eval port="\$${db_eval}_CLUSTER_PORT"
local local_addr="$(gen_conn_addr $DB_CLUSTER_ADDR $port)"
echo "local address: $local_addr"

local remote_addr=()
local node_ips=$(echo -n "${NODE_IPS}" | sed 's/,/ /g')
for node_ip in ${node_ips[*]}; do
if [ ! "$node_ip" = "$DB_CLUSTER_ADDR" ]; then
remote_addr=(${remote_addr[*]} "$(gen_conn_addr $node_ip $port)")
fi
done
echo "remote addresses: ${remote_addr[*]}"

local db_new="$db_file.init-$(date +%s)-$(random_str)"
echo "generating new database file $db_new"
if [ ${#remote_addr[*]} -ne 0 ]; then
ovsdb-tool --sid $sid join-cluster "$db_new" $db $local_addr ${remote_addr[*]} || return 1

echo "use new database file $db_new"
mv "$db_new" "$db_file"
fi
fi
fi
fi
fi

# create local config
Expand Down

0 comments on commit cd1202c

Please sign in to comment.