Skip to content

Commit

Permalink
Adding auto-deletion of dynamic cluster nodes and small refactoring
Browse files Browse the repository at this point in the history
- New kernel parameter "ClusterAutodeleteInterval" to specify, when a
  dynamic cluster node is not regarded as temporarily unavailable but
  as definitely gone. The default value for this parameter is 2m (2
  minutes).
- small refactoring to reduce duplicated logic

- Bumped version numbers:
  * acs-kernel to 5.10.1b4
  * acs-tcl    to 5.10.1b3
  * acs-admin  to 5.10.1b3
  • Loading branch information
gustafn committed Dec 5, 2023
1 parent 3facedd commit 1a7a765
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 32 deletions.
8 changes: 4 additions & 4 deletions packages/acs-admin/acs-admin.info
Expand Up @@ -9,7 +9,7 @@
<implements-subsite-p>f</implements-subsite-p>
<inherit-templates-p>t</inherit-templates-p>

<version name="5.10.1b2" url="http://openacs.org/repository/download/apm/acs-admin-5.10.1b2.apm">
<version name="5.10.1b3" url="http://openacs.org/repository/download/apm/acs-admin-5.10.1b3.apm">
<owner url="mailto:dhogaza@pacifier.com">Don Baccus</owner>
<summary>An interface for Site-wide administration of an OpenACS Installation.</summary>
<release-date>2023-02-08</release-date>
Expand All @@ -20,9 +20,9 @@
<license>GPL</license>
<maturity>3</maturity>

<provides url="acs-admin" version="5.10.1b2"/>
<requires url="acs-kernel" version="5.10.1b3"/>
<requires url="acs-tcl" version="5.10.1b1"/>
<provides url="acs-admin" version="5.10.1b3"/>
<requires url="acs-kernel" version="5.10.1b4"/>
<requires url="acs-tcl" version="5.10.1b3"/>
<requires url="acs-templating" version="5.10.1b1"/>
<requires url="acs-mail-lite" version="5.10.1b1"/>
<requires url="acs-authentication" version="5.10.1b1"/>
Expand Down
15 changes: 2 additions & 13 deletions packages/acs-admin/www/cluster.tcl
Expand Up @@ -11,24 +11,13 @@ set page_title "Cluster Management"
set context [list $page_title]

set server_cluster_enabled_p [server_cluster_enabled_p]
set dynamic_cluster_nodes [lsort [parameter::get -package_id $::acs::kernel_id -parameter DynamicClusterPeers]]

set dynamic_cluster_nodes [::acs::cluster dynamic_cluster_nodes]

if {$drop_node ne ""} {
#
# Drop the provided node from DynamicClusterPeers
#
set p [lsearch $dynamic_cluster_nodes $drop_node]
if {$p != -1} {
set cluster_nodes [lreplace $dynamic_cluster_nodes $p $p]
parameter::set_value \
-package_id $::acs::kernel_id \
-parameter DynamicClusterPeers \
-value $cluster_nodes
} else {
ns_log warning "cluster: provided node '$drop_node' is not in the" \
"dynamic cluster configuration: $dynamic_cluster_nodes"
}
acs::cluster drop_dynamic_node $drop_node
set done 1
} elseif {$flush_node ne ""} {
#
Expand Down
5 changes: 3 additions & 2 deletions packages/acs-kernel/acs-kernel.info
Expand Up @@ -9,15 +9,15 @@
<implements-subsite-p>f</implements-subsite-p>
<inherit-templates-p>t</inherit-templates-p>

<version name="5.10.1b3" url="http://openacs.org/repository/download/apm/acs-kernel-5.10.1b3.apm">
<version name="5.10.1b4" url="http://openacs.org/repository/download/apm/acs-kernel-5.10.1b4.apm">
<owner url="mailto:oct@openacs.org">OpenACS Core Team</owner>
<summary>Routines and data models providing the foundation for OpenACS-based Web services.</summary>
<release-date>2023-07-10</release-date>
<vendor url="http://openacs.org">OpenACS</vendor>
<description format="text/html">The OpenACS kernel contains the core datamodel create and drop scripts for such things as objects, groups, parties and the supporting PL/SQL and PL/pgSQL procedures.</description>
<maturity>3</maturity>

<provides url="acs-kernel" version="5.10.1b3"/>
<provides url="acs-kernel" version="5.10.1b4"/>

<callbacks>
</callbacks>
Expand All @@ -33,6 +33,7 @@
<parameter scope="instance" datatype="string" min_n_values="1" max_n_values="1" name="CanonicalServer" description="The canonical (primary) server, i.e. the server running the scheduled procedures etc. The provided value should be included in the list of ClusterPeerIP? If a port is not provided, we assume port 80." section_name="server-cluster"/>
<parameter scope="instance" datatype="string" min_n_values="1" max_n_values="1" name="ClusterAuthorizedIP" description="A space separated list of which machines can issues requests (e.g., flushing) to the cluster. Can use glob matching notation (10.0.0.*)" section_name="server-cluster"/>
<parameter scope="instance" datatype="string" min_n_values="1" max_n_values="1" name="ClusterHeartbeatInterval" default="20s" description="Time duration between checks for the liveliness of cluster nodes" section_name="server-cluster"/>
<parameter scope="instance" datatype="string" min_n_values="1" max_n_values="1" name="ClusterAutodeleteInterval" default="2m" description="When a peer node is inactive long than this duration, delete it automatically" section_name="server-cluster"/>
<parameter scope="instance" datatype="string" min_n_values="1" max_n_values="1" name="ClusterEnabledP" default="0" description="is clustering enabled?" section_name="server-cluster"/>
<parameter scope="instance" datatype="string" min_n_values="1" max_n_values="1" name="ClusterPreferredLocationRegexp" default="https://" description="When servers listen on multiple endpoints (different protocols, ip addresses, IPv4/IPv6, ...) use the specified regexp to select the preferred option. When there is no match, the first optional value is used." section_name="server-cluster"/>
<parameter scope="instance" datatype="string" min_n_values="1" max_n_values="1" name="DynamicClusterPeers" description="Do not edit here. Used for bookkeeping." section_name="server-cluster"/>
Expand Down
6 changes: 3 additions & 3 deletions packages/acs-tcl/acs-tcl.info
Expand Up @@ -9,7 +9,7 @@
<implements-subsite-p>f</implements-subsite-p>
<inherit-templates-p>t</inherit-templates-p>

<version name="5.10.1b2" url="http://openacs.org/repository/download/apm/acs-tcl-5.10.1b2.apm">
<version name="5.10.1b3" url="http://openacs.org/repository/download/apm/acs-tcl-5.10.1b3.apm">
<owner url="http://openacs.org">OpenACS</owner>
<summary>The Kernel Tcl API library.</summary>
<release-date>2023-05-15</release-date>
Expand All @@ -18,9 +18,9 @@
<license>GPL version 2</license>
<maturity>3</maturity>

<provides url="acs-tcl" version="5.10.1b2"/>
<provides url="acs-tcl" version="5.10.1b3"/>
<requires url="acs-bootstrap-installer" version="5.10.1b1"/>
<requires url="acs-kernel" version="5.10.1b3"/>
<requires url="acs-kernel" version="5.10.1b4"/>

<callbacks>
</callbacks>
Expand Down
10 changes: 10 additions & 0 deletions packages/acs-tcl/tcl/cluster-init.tcl
Expand Up @@ -28,6 +28,7 @@ if {[server_cluster_enabled_p]} {
# Update the cluster info depending of the configured
# ClusterHeartbeatInterval to detect changed cluster
# configurations (maybe induced by missing reachability).
# This has to happen on all cluster nodes.
#
ad_schedule_proc -all_servers t \
[parameter::get \
Expand All @@ -36,6 +37,15 @@ if {[server_cluster_enabled_p]} {
-default 20s] \
::acs::cluster update_node_info

#
# Liveliness manager (running on the canonical server). It checks
# e.g. whether dynamic nodes should be deleted from the dynamic
# cluster nodes list automatically after some time of being not
# reachable.
#
ad_schedule_proc 5s ::acs::cluster check_state


#
# Setup of the listening URL
#
Expand Down
75 changes: 65 additions & 10 deletions packages/acs-tcl/tcl/cluster-procs.tcl
Expand Up @@ -300,6 +300,58 @@ namespace eval ::acs {
}
}

:public method dynamic_cluster_nodes {} {
#
# Convenience function returning the list of dynamic
# cluster nodes.
#
return [parameter::get \
-package_id $::acs::kernel_id \
-parameter DynamicClusterPeers]
}

:public method drop_dynamic_node {node} {
#
# Drop the provided node from DynamicClusterPeers
#
set dynamic_cluster_nodes [:dynamic_cluster_nodes]
set p [lsearch $dynamic_cluster_nodes $node]
if {$p != -1} {
set cluster_nodes [lreplace $dynamic_cluster_nodes $p $p]
parameter::set_value \
-package_id $::acs::kernel_id \
-parameter DynamicClusterPeers \
-value $cluster_nodes
} else {
ns_log warning "cluster: can't drop node '$node': not in the" \
"dynamic cluster configuration: $dynamic_cluster_nodes"
}
}

:public method check_state {} {
#
# Check the livelyness of the dynamic cluster nodes. This
# method is intended to be run on the canonical server
# only, since it might update the DynamicClusterPeers via
# acs::clusterwide.
#
set autodeleteInterval [parameter::get \
-package_id $::acs::kernel_id \
-parameter ClusterAutodeleteInterval \
-default 2m]

foreach node [:dynamic_cluster_nodes] {
set last_contact [acs::cluster last_contact $node]
if {$last_contact ne ""} {
set seconds [expr {$last_contact/1000}]
if {[clock seconds]-($last_contact/1000) > [ns_baseunit -time $autodeleteInterval]} {
ns_log notice "[self] drop dynamic node $node due to ClusterAutodeleteInterval"
:drop_dynamic_node $node
}
}
}
}

:public method update_node_info {} {
#
# Update cluster configuration when the when the
Expand All @@ -310,9 +362,7 @@ namespace eval ::acs {
# every couple of seconds when clustering is enabled.
#

set dynamic_peers [parameter::get \
-package_id $::acs::kernel_id \
-parameter DynamicClusterPeers]
set dynamic_peers [:dynamic_cluster_nodes]

if {!${:current_server_is_canonical_server}} {
#
Expand All @@ -335,7 +385,7 @@ namespace eval ::acs {
}
#
# Are we an dynamic peer and not listed in
# DynamicClusterPeers? This might happen in
# dynamic cluster nodes? This might happen in
# situations, where the canonical server was
# restarted (or separated for a while).
#
Expand Down Expand Up @@ -643,13 +693,16 @@ namespace eval ::acs {
# we know that the request is trustworthy.
#
ns_log notice "Cluster join_request $peerLocation accepted from $peerLocation"
set dynamicClusterNodes [parameter::get -package_id $::acs::kernel_id -parameter DynamicClusterPeers]
set dynamicClusterNodes [:dynamic_cluster_nodes]
set dynamicClusterNodes [lsort -unique [concat $dynamicClusterNodes [:qualified_location $peerLocation]]]
#
# The parameter::set_value operation causes a
# clusterwide cache-flush for the parameters
#
parameter::set_value -package_id $::acs::kernel_id -parameter DynamicClusterPeers -value $dynamicClusterNodes
parameter::set_value \
-package_id $::acs::kernel_id \
-parameter DynamicClusterPeers \
-value $dynamicClusterNodes
ns_log notice "[self] Cluster join_request leads to DynamicClusterPeers $dynamicClusterNodes"
}
return $success
Expand Down Expand Up @@ -708,10 +761,10 @@ namespace eval ::acs {
# Configure base configuration values
#
#
set dynamic_peers [parameter::get -package_id $::acs::kernel_id -parameter DynamicClusterPeers]
set dynamic_peers [:dynamic_cluster_nodes]

# At startup, when we are running on the canonical server,
# check, whether the existing DynamicClusterPeers are
# check, whether the existing dynamic cluster nodes are
# still reachable. When the canonical server is started
# before the other cluster nodes, this parameter should be
# empty. However, when the canonical server is restarted,
Expand Down Expand Up @@ -745,8 +798,10 @@ namespace eval ::acs {
# well.
#
:log "updating DynamicClusterPeers to $new_peer_locations"
parameter::set_value -package_id $::acs::kernel_id -parameter DynamicClusterPeers \
-value $new_peer_locations
parameter::set_value \
-package_id $::acs::kernel_id \
-parameter DynamicClusterPeers \
-value [lsort $new_peer_locations]
set dynamic_peers $new_peer_locations
}
}
Expand Down

0 comments on commit 1a7a765

Please sign in to comment.