From 1420baa22e0e290e4388e3a29f2c88fdf4c41c07 Mon Sep 17 00:00:00 2001 From: DP Date: Tue, 7 Jun 2011 10:27:34 -0700 Subject: [PATCH] 0.8 update with right yaml --- resources/cassandra.yaml | 317 +++++++++++++++++++++++++++++---------- 1 file changed, 239 insertions(+), 78 deletions(-) diff --git a/resources/cassandra.yaml b/resources/cassandra.yaml index c8916c8..0f3fd72 100644 --- a/resources/cassandra.yaml +++ b/resources/cassandra.yaml @@ -1,8 +1,12 @@ -# Cassandra storage config YAML -# See http://wiki.apache.org/cassandra/StorageConfiguration for -# explanations of configuration directives. +# Cassandra storage config YAML -# name of the cluster +# NOTE: +# See http://wiki.apache.org/cassandra/StorageConfiguration for +# full explanations of configuration directives +# /NOTE + +# The name of the cluster. This is mainly used to prevent machines in +# one logical cluster from joining another. cluster_name: 'Solandra Cluster' # You should always specify InitialToken when setting up a production @@ -27,47 +31,124 @@ auto_bootstrap: false # See http://wiki.apache.org/cassandra/HintedHandoff hinted_handoff_enabled: true +# this defines the maximum amount of time a dead host will have hints +# generated. After it has been dead this long, hints will be dropped. +max_hint_window_in_ms: 3600000 # one hour +# Sleep this long after delivering each row or row fragment +hinted_handoff_throttle_delay_in_ms: 50 -# authentication backend, implementing IAuthenticator; used to limit keyspace access +# authentication backend, implementing IAuthenticator; used to identify users authenticator: org.apache.cassandra.auth.AllowAllAuthenticator -# any IPartitioner may be used, including your own as long as it is on -# the classpath. Out of the box, Cassandra provides -# org.apache.cassandra.dht.RandomPartitioner -# org.apache.cassandra.dht.OrderPreservingPartitioner, and -# org.apache.cassandra.dht.CollatingOrderPreservingPartitioner. -partitioner: lucandra.dht.RandomPartitioner -#partitioner: org.apache.cassandra.dht.RandomPartitioner +# authorization backend, implementing IAuthority; used to limit access/provide permissions +authority: org.apache.cassandra.auth.AllowAllAuthority + +# The partitioner is responsible for distributing rows (by key) across +# nodes in the cluster. Any IPartitioner may be used, including your +# own as long as it is on the classpath. Out of the box, Cassandra +# provides org.apache.cassandra.dht.RandomPartitioner +# org.apache.cassandra.dht.ByteOrderedPartitioner, +# org.apache.cassandra.dht.OrderPreservingPartitioner (deprecated), +# and org.apache.cassandra.dht.CollatingOrderPreservingPartitioner +# (deprecated). +# +# - RandomPartitioner distributes rows across the cluster evenly by md5. +# When in doubt, this is the best option. +# - ByteOrderedPartitioner orders rows lexically by key bytes. BOP allows +# scanning rows in key order, but the ordering can generate hot spots +# for sequential insertion workloads. +# - OrderPreservingPartitioner is an obsolete form of BOP, that stores +# - keys in a less-efficient format and only works with keys that are +# UTF8-encoded Strings. +# - CollatingOPP colates according to EN,US rules rather than lexical byte +# ordering. Use this as an example if you need custom collation. +# +# See http://wiki.apache.org/cassandra/Operations for more on +# partitioners and token selection. +partitioner: org.apache.cassandra.dht.RandomPartitioner # directories where Cassandra should store data on disk. data_file_directories: - /tmp/cassandra-data/data -# Addresses of hosts that are deemed contact points. -# Cassandra nodes use this list of hosts to find each other and learn -# the topology of the ring. You must change this if you are running -# multiple nodes! -seeds: - - 127.0.0.1 - -# Access mode. mmapped i/o is substantially faster, but only practical on -# a 64bit machine (which notably does not include EC2 "small" instances) -# or relatively small datasets. "auto", the safe choice, will enable -# mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only" -# (which may allow you to get part of the benefits of mmap on a 32bit -# machine by mmapping only index files) and "standard". -# (The buffer size settings that follow only apply to standard, -# non-mmapped i/o.) -disk_access_mode: auto - -# Unlike most systems, in Cassandra writes are faster than reads, so -# you can afford more of those in parallel. A good rule of thumb is 2 -# concurrent reads per processor core. Increase ConcurrentWrites to -# the number of clients writing at once if you enable CommitLogSync + -# CommitLogSyncDelay. --> +# commit log +commitlog_directory: /tmp/cassandra-data/commitlog + +# saved caches +saved_caches_directory: /tmp/cassandra-data/saved_caches + +# Size to allow commitlog to grow to before creating a new segment +commitlog_rotation_threshold_in_mb: 128 + +# commitlog_sync may be either "periodic" or "batch." +# When in batch mode, Cassandra won't ack writes until the commit log +# has been fsynced to disk. It will wait up to +# CommitLogSyncBatchWindowInMS milliseconds for other writes, before +# performing the sync. +commitlog_sync: periodic + +# the other option is "periodic" where writes may be acked immediately +# and the CommitLog is simply synced every commitlog_sync_period_in_ms +# milliseconds. +commitlog_sync_period_in_ms: 10000 + +# any class that implements the SeedProvider interface and has a constructor that takes a Map of +# parameters will do. +seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "127.0.0.1" + +# emergency pressure valve: each time heap usage after a full (CMS) +# garbage collection is above this fraction of the max, Cassandra will +# flush the largest memtables. +# +# Set to 1.0 to disable. Setting this lower than +# CMSInitiatingOccupancyFraction is not likely to be useful. +# +# RELYING ON THIS AS YOUR PRIMARY TUNING MECHANISM WILL WORK POORLY: +# it is most effective under light to moderate load, or read-heavy +# workloads; under truly massive write load, it will often be too +# little, too late. +flush_largest_memtables_at: 0.75 + +# emergency pressure valve #2: the first time heap usage after a full +# (CMS) garbage collection is above this fraction of the max, +# Cassandra will reduce cache maximum _capacity_ to the given fraction +# of the current _size_. Should usually be set substantially above +# flush_largest_memtables_at, since that will have less long-term +# impact on the system. +# +# Set to 1.0 to disable. Setting this lower than +# CMSInitiatingOccupancyFraction is not likely to be useful. +reduce_cache_sizes_at: 0.85 +reduce_cache_capacity_to: 0.6 + +# For workloads with more data than can fit in memory, Cassandra's +# bottleneck will be reads that need to fetch data from +# disk. "concurrent_reads" should be set to (16 * number_of_drives) in +# order to allow the operations to enqueue low enough in the stack +# that the OS and drives can reorder them. +# +# On the other hand, since writes are almost never IO bound, the ideal +# number of "concurrent_writes" is dependent on the number of cores in +# your system; (8 * number_of_cores) is a good rule of thumb. concurrent_reads: 32 concurrent_writes: 32 +# Total memory to use for memtables. Cassandra will flush the largest +# memtable when this much memory is used. Prefer using this to +# the older, per-ColumnFamily memtable flush thresholds. +# If omitted, Cassandra will set it to 1/3 of the heap. +# If set to 0, only the old flush thresholds are used. +# memtable_total_space_in_mb: 2048 + # This sets the amount of memtable flush writer threads. These will # be blocked by disk io, and each one will hold a memtable in memory # while blocked. If you have a large heap and many data directories, @@ -75,81 +156,143 @@ concurrent_writes: 32 # By default this will be set to the amount of data directories defined. #memtable_flush_writers: 1 -# Buffer size to use when performing contiguous column slices. +# the number of full memtables to allow pending flush, that is, +# waiting for a writer thread. At a minimum, this should be set to +# the maximum number of secondary indexes created on a single CF. +memtable_flush_queue_size: 4 + +# Buffer size to use when performing contiguous column slices. # Increase this to the size of the column slices you typically perform sliced_buffer_size_in_kb: 64 # TCP port, for commands and data storage_port: 7000 -# Address to bind to and tell other nodes to connect to. You _must_ -# change this if you want multiple nodes to be able to communicate! -listen_address: 127.0.0.1 - -# The address to bind the Thrift RPC service to +# Address to bind to and tell other Cassandra nodes to connect to. You +# _must_ change this if you want multiple nodes to be able to +# communicate! +# +# Leaving it blank leaves it up to InetAddress.getLocalHost(). This +# will always do the Right Thing *if* the node is properly configured +# (hostname, name resolution, etc), and the Right Thing is to use the +# address associated with the hostname (it might not be). +# +# Setting this to 0.0.0.0 is always wrong. +listen_address: localhost + +# The address to bind the Thrift RPC service to -- clients connect +# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if +# you want Thrift to listen on all interfaces. +# +# Leaving this blank has the same effect it does for ListenAddress, +# (i.e. it will be based on the configured hostname of the node). rpc_address: localhost -# port for Thrift to listen on +# port for Thrift to listen for clients on rpc_port: 9160 +# enable or disable keepalive on rpc connections +rpc_keepalive: true + +# Cassandra uses thread-per-client for client RPC. This can +# be expensive in memory used for thread stack for a large +# enough number of clients. (Hence, connection pooling is +# very, very strongly recommended.) +# +# Uncomment rpc_min|max|thread to set request pool size. +# You would primarily set max as a safeguard against misbehaved +# clients; if you do hit the max, Cassandra will block until +# one disconnects before accepting more. The defaults are +# min of 16 and max unlimited. +# +# rpc_min_threads: 16 +# rpc_max_threads: 2048 + +# uncomment to set socket buffer sizes on rpc connections +# rpc_send_buff_size_in_bytes: +# rpc_recv_buff_size_in_bytes: + # Frame size for thrift (maximum field length). -# 0 disables TFramedTransport in favor of TSocket. +# 0 disables TFramedTransport in favor of TSocket. This option +# is deprecated; we strongly recommend using Framed mode. thrift_framed_transport_size_in_mb: 15 # The max length of a thrift message, including all fields and # internal thrift overhead. thrift_max_message_length_in_mb: 16 +# Set to true to have Cassandra create a hard link to each sstable +# flushed or streamed locally in a backups/ subdirectory of the +# Keyspace data. Removing these links is the operator's +# responsibility. +incremental_backups: false + # Whether or not to take a snapshot before each compaction. Be # careful using this option, since Cassandra won't clean up the # snapshots for you. Mostly useful if you're paranoid when there # is a data format change. snapshot_before_compaction: false -# The threshold size in megabytes the binary memtable must grow to, -# before it's submitted for flushing to disk. -binary_memtable_throughput_in_mb: 256 - +# change this to increase the compaction thread's priority. In java, 1 is the +# lowest priority and that is our default. +# compaction_thread_priority: 1 + +# Add column indexes to a row after its contents reach this size. +# Increase if your column values are large, or if you have a very large +# number of columns. The competing causes are, Cassandra has to +# deserialize this much of the row to read a single column, so you want +# it to be small - at least if you do many partial-row reads - but all +# the index data is read for each access, so you don't want to generate +# that wastefully either. column_index_size_in_kb: 64 -in_memory_compaction_limit_in_mb: 128 - -# commit log -commitlog_directory: /tmp/cassandra-data/commitlog - -# saved caches -saved_caches_directory: /tmp/cassandra-data/saved_caches - -# Size to allow commitlog to grow to before creating a new segment -commitlog_rotation_threshold_in_mb: 128 - -# commitlog_sync may be either "periodic" or "batch." -# When in batch mode, Cassandra won't ack writes until the commit log -# has been fsynced to disk. It will wait up to -# CommitLogSyncBatchWindowInMS milliseconds for other writes, before -# performing the sync. -commitlog_sync: periodic -# the other option is "timed," where writes may be acked immediately -# and the CommitLog is simply synced every commitlog_sync_period_in_ms -# milliseconds. -commitlog_sync_period_in_ms: 10000 - -# Time to wait for a reply from other nodes before failing the command +# Size limit for rows being compacted in memory. Larger rows will spill +# over to disk and use a slower two-pass compaction process. A message +# will be logged specifying the row key. +in_memory_compaction_limit_in_mb: 64 + +# Number of compaction threads. This default to the number of processors, +# enabling multiple compactions to execute at once. Using more than one +# thread is highly recommended to preserve read performance in a mixed +# read/write workload as this avoids sstables from accumulating during long +# running compactions. The default is usually fine and if you experience +# problems with compaction running too slowly or too fast, you should look at +# compaction_throughput_mb_per_sec first. +# Uncomment to make compaction mono-threaded. +#concurrent_compactors: 1 + +# Throttles compaction to the given total throughput across the entire +# system. The faster you insert data, the faster you need to compact in +# order to keep the sstable count down, but in general, setting this to +# 16 to 32 times the rate you are inserting data is more than sufficient. +# Setting this to 0 disables throttling. +compaction_throughput_mb_per_sec: 16 + +# Track cached row keys during compaction, and re-cache their new +# positions in the compacted sstable. Disable if you use really large +# key caches. +compaction_preheat_key_cache: true + +# Time to wait for a reply from other nodes before failing the command rpc_timeout_in_ms: 10000 # phi value that must be reached for a host to be marked down. # most users should never need to adjust this. # phi_convict_threshold: 8 -# time to wait before garbage collecting tombstones (deletion markers) -#gc_grace_seconds: 864000 - # endpoint_snitch -- Set this to a class that implements # IEndpointSnitch, which will let Cassandra know enough # about your network topology to route requests efficiently. # Out of the box, Cassandra provides -# org.apache.cassandra.locator.SimpleSnitch, -# org.apache.cassandra.locator.RackInferringSnitch, and -# org.apache.cassandra.locator.PropertyFileSnitch. +# - org.apache.cassandra.locator.SimpleSnitch: +# Treats Strategy order as proximity. This improves cache locality +# when disabling read repair, which can further improve throughput. +# - org.apache.cassandra.locator.RackInferringSnitch: +# Proximity is determined by rack and data center, which are +# assumed to correspond to the 3rd and 2nd octet of each node's +# IP address, respectively +# org.apache.cassandra.locator.PropertyFileSnitch: +# - Proximity is determined by rack and data center, which are +# explicitly configured in cassandra-topology.properties. endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch # dynamic_snitch -- This boolean controls whether the above snitch is @@ -159,7 +302,7 @@ endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch dynamic_snitch: true # controls how often to perform the more expensive part of host score # calculation -dynamic_snitch_update_interval_in_ms: 100 +dynamic_snitch_update_interval_in_ms: 100 # controls how often to reset all host scores, allowing a bad host to # possibly recover dynamic_snitch_reset_interval_in_ms: 600000 @@ -167,7 +310,9 @@ dynamic_snitch_reset_interval_in_ms: 600000 # 'pinning' of replicas to hosts in order to increase cache capacity. # The badness threshold will control how much worse the pinned host has to be # before the dynamic snitch will prefer other replicas over it. This is -# expressed as a double which represents a percentage. +# expressed as a double which represents a percentage. Thus, a value of +# 0.2 means Cassandra would continue to prefer the static snitch values +# until the pinned host was 20% worse than the fastest. dynamic_snitch_badness_threshold: 0.0 # request_scheduler -- Set this to a class that implements @@ -187,7 +332,7 @@ request_scheduler: org.apache.cassandra.scheduler.NoScheduler # NoScheduler - Has no options # RoundRobin # - throttle_limit -- The throttle_limit is the number of in-flight -# requests per client. Requests beyond +# requests per client. Requests beyond # that limit are queued up until # running requests can complete. # The value of 80 here is twice the number of @@ -215,3 +360,19 @@ request_scheduler: org.apache.cassandra.scheduler.NoScheduler # the index is at the cost of space. index_interval: 128 +# Enable or disable inter-node encryption +# Default settings are TLS v1, RSA 1024-bit keys (it is imperative that +# users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher +# suite for authentication, key exchange and encryption of the actual data transfers. +# NOTE: No custom encryption options are enabled at the moment +# The available internode options are : all, none +# +# The passwords used in these options must match the passwords used when generating +# the keystore and truststore. For instructions on generating these files, see: +# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +encryption_options: + internode_encryption: none + keystore: conf/.keystore + keystore_password: cassandra + truststore: conf/.truststore + truststore_password: cassandra