Permalink
Browse files

Merge pull request #10 from kwarter-cookbooks/datastax_config

Datastax config
  • Loading branch information...
2 parents 298988e + c6ffc9a commit 572fa7663fac645a991448b9b812ce3675e79144 Michael Klishin committed Apr 2, 2013
Showing with 197 additions and 45 deletions.
  1. +10 −3 attributes/default.rb
  2. +11 −0 recipes/default.rb
  3. +107 −15 templates/default/cassandra-env.sh.erb
  4. +69 −27 templates/default/cassandra.yaml.erb
View
@@ -3,7 +3,6 @@
default[:cassandra] = {
:cluster_name => "Test Cluster",
:initial_token => "",
- :seeds => "127.0.0.1",
:version => cassandra_version,
:tarball => {
:url => "http://www.eu.apache.org/dist/cassandra/#{cassandra_version}/apache-cassandra-#{cassandra_version}-bin.tar.gz",
@@ -24,7 +23,15 @@
:conf_dir => "/etc/cassandra/",
# commit log, data directory, saved caches and so on are all stored under the data root. MK.
:data_root_dir => "/var/lib/cassandra/",
+ :commitlog_dir => "/var/lib/cassandra/",
:log_dir => "/var/log/cassandra/",
- :listen_address => "localhost",
- :rpc_address => "localhost"
+ :listen_address => node[:ipaddress],
+ :rpc_address => node[:ipaddress],
+ :max_heap_size => nil,
+ :heap_new_size => nil,
+ :vnodes => false,
+ :seeds => [],
+ :concurrent_reads => 32,
+ :concurrent_writes => 32,
+ :snitch => 'SimpleSnitch'
}
View
@@ -0,0 +1,11 @@
+include_recipe "cassandra::datastax"
+
+%w(cassandra.yaml cassandra-env.sh).each do |f|
+ template File.join(node["cassandra"]["conf_dir"], f) do
+ source "cassandra/#{f}.erb"
+ owner node["cassandra"]["user"]
+ group node["cassandra"]["user"]
+ mode 0644
+ notifies :restart, resources(:service => "cassandra")
+ end
+end
@@ -14,6 +14,78 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+calculate_heap_sizes()
+{
+ case "`uname`" in
+ Linux)
+ system_memory_in_mb=`free -m | awk '/Mem:/ {print $2}'`
+ system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo`
+ ;;
+ FreeBSD)
+ system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'`
+ system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
+ system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
+ ;;
+ SunOS)
+ system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'`
+ system_cpu_cores=`psrinfo | wc -l`
+ ;;
+ Darwin)
+ system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'`
+ system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
+ system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
+ ;;
+ *)
+ # assume reasonable defaults for e.g. a modern desktop or
+ # cheap server
+ system_memory_in_mb="2048"
+ system_cpu_cores="2"
+ ;;
+ esac
+
+ # some systems like the raspberry pi don't report cores, use at least 1
+ if [ "$system_cpu_cores" -lt "1" ]
+ then
+ system_cpu_cores="1"
+ fi
+
+ # set max heap size based on the following
+ # max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
+ # calculate 1/2 ram and cap to 1024MB
+ # calculate 1/4 ram and cap to 8192MB
+ # pick the max
+ half_system_memory_in_mb=`expr $system_memory_in_mb / 2`
+ quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2`
+ if [ "$half_system_memory_in_mb" -gt "1024" ]
+ then
+ half_system_memory_in_mb="1024"
+ fi
+ if [ "$quarter_system_memory_in_mb" -gt "8192" ]
+ then
+ quarter_system_memory_in_mb="8192"
+ fi
+ if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ]
+ then
+ max_heap_size_in_mb="$half_system_memory_in_mb"
+ else
+ max_heap_size_in_mb="$quarter_system_memory_in_mb"
+ fi
+ MAX_HEAP_SIZE="${max_heap_size_in_mb}M"
+
+ # Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size)
+ max_sensible_yg_per_core_in_mb="100"
+ max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores`
+
+ desired_yg_in_mb=`expr $max_heap_size_in_mb / 4`
+
+ if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ]
+ then
+ HEAP_NEWSIZE="${max_sensible_yg_in_mb}M"
+ else
+ HEAP_NEWSIZE="${desired_yg_in_mb}M"
+ fi
+}
+
# Determine the sort of JVM we'll be running on.
java_ver_output=`"${JAVA:-java}" -version 2>&1`
@@ -43,11 +115,11 @@ esac
# Override these to set the amount of memory to allocate to the JVM at
-# start-up. For production use you almost certainly want to adjust
-# this for your environment. MAX_HEAP_SIZE is the total amount of
-# memory dedicated to the Java heap; HEAP_NEWSIZE refers to the size
-# of the young generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should
-# be either set or not (if you set one, set the other).
+# start-up. For production use you may wish to adjust this for your
+# environment. MAX_HEAP_SIZE is the total amount of memory dedicated
+# to the Java heap; HEAP_NEWSIZE refers to the size of the young
+# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set
+# or not (if you set one, set the other).
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
@@ -57,8 +129,22 @@ esac
# times. If in doubt, and if you do not particularly want to tweak, go with
# 100 MB per physical CPU core.
-MAX_HEAP_SIZE="256M"
-HEAP_NEWSIZE="128M"
+<% if node[:cassandra][:max_heap_size] && node[:cassandra][:heap_new_size] %>
+MAX_HEAP_SIZE="<%=node[:cassandra][:max_heap_size]%>"
+HEAP_NEWSIZE="<%=node[:cassandra][:heap_new_size]%>"
+<% else %>
+#MAX_HEAP_SIZE="4G"
+#HEAP_NEWSIZE="800M"
+<% end %>
+
+if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" ]; then
+ calculate_heap_sizes
+else
+ if [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" ]; then
+ echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs (see cassandra-env.sh)"
+ exit 1
+ fi
+fi
# Specifies the default port over which Cassandra will be available for
# JMX connections.
@@ -101,19 +187,15 @@ if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then
fi
-startswith () [ "${1#$2}" != "$1" ]
+startswith() { [ "${1#$2}" != "$1" ]; }
if [ "`uname`" = "Linux" ] ; then
# reduce the per-thread stack size to minimize the impact of Thrift
# thread-per-client. (Best practice is for client connections to
# be pooled anyway.) Only do so on Linux where it is known to be
# supported.
- if startswith "$JVM_VERSION" '1.7.'
- then
- JVM_OPTS="$JVM_OPTS -Xss160k"
- else
- JVM_OPTS="$JVM_OPTS -Xss128k"
- fi
+ # u34 and greater need 180k
+ JVM_OPTS="$JVM_OPTS -Xss180k"
fi
echo "xss = $JVM_OPTS"
@@ -125,6 +207,10 @@ JVM_OPTS="$JVM_OPTS -XX:SurvivorRatio=8"
JVM_OPTS="$JVM_OPTS -XX:MaxTenuringThreshold=1"
JVM_OPTS="$JVM_OPTS -XX:CMSInitiatingOccupancyFraction=75"
JVM_OPTS="$JVM_OPTS -XX:+UseCMSInitiatingOccupancyOnly"
+# note: bash evals '1.7.x' as > '1.7' so this is really a >= 1.7 jvm check
+if [ "$JVM_VERSION" \> "1.7" ] ; then
+ JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark"
+fi
# GC logging options -- uncomment to enable
# JVM_OPTS="$JVM_OPTS -XX:+PrintGCDetails"
@@ -135,6 +221,12 @@ JVM_OPTS="$JVM_OPTS -XX:+UseCMSInitiatingOccupancyOnly"
# JVM_OPTS="$JVM_OPTS -XX:+PrintPromotionFailure"
# JVM_OPTS="$JVM_OPTS -XX:PrintFLSStatistics=1"
# JVM_OPTS="$JVM_OPTS -Xloggc:/var/log/cassandra/gc-`date +%s`.log"
+# If you are using JDK 6u34 7u2 or later you can enable GC log rotation
+# don't stick the date in the log name if rotation is on.
+# JVM_OPTS="$JVM_OPTS -Xloggc:/var/log/cassandra/gc.log"
+# JVM_OPTS="$JVM_OPTS -XX:+UseGCLogFileRotation"
+# JVM_OPTS="$JVM_OPTS -XX:NumberOfGCLogFiles=10"
+# JVM_OPTS="$JVM_OPTS -XX:GCLogFileSize=10M"
# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414
# JVM_OPTS="$JVM_OPTS -Xdebug -Xnoagent -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=1414"
@@ -156,4 +248,4 @@ JVM_OPTS="$JVM_OPTS -Djava.net.preferIPv4Stack=true"
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.port=$JMX_PORT"
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=false"
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false"
-JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS"
+JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS"
Oops, something went wrong.

0 comments on commit 572fa76

Please sign in to comment.