Skip to content
This repository
Browse code

February 2011 Merge

* Upgrade discount to 1.6.8
* Start reading Votes, Saves, and Hides from Cassandra (while still dual-writing all three)
* Some XSS fixes
* Significant Gold buying and gifting improvements
  - Move /api/ipn to /ipn
* Allow non-US countries to buy sponsored links
* Increase embed.ly scope
* redd.it support
* Allow postgres port number to be specified in ini file (this changes the format of the .ini file)
* Upgrade Cassandra to 0.7
  - Change g.urlcache to LinksByURL
  - Translate storage-conf.xml to cassandra.yaml
  - TTL support (and enable on Hides)
  - Move permacache keyspace to inside reddit keyspace
* The stalecache: a local memcached that contains slightly old information to speed up some lookups
* Switch to patched Paste that is hopefully download.gz-proof
* Don't store votes on things > 30 days old
* Many many bugfixes/small features
  • Loading branch information...
commit 7fff900bbeba362b607821159f6419d7762c9957 1 parent c78f27b
David King authored February 23, 2011

Showing 153 changed files with 5,286 additions and 2,263 deletions. Show diff stats Hide diff stats

  1. 1  .gitignore
  2. 479  config/cassandra/cassandra.yaml
  3. 420  config/cassandra/storage-conf.xml
  4. 52  r2/example.ini
  5. 6  r2/r2/config/middleware.py
  6. 6  r2/r2/config/rewrites.py
  7. 10  r2/r2/config/routing.py
  8. 2  r2/r2/controllers/__init__.py
  9. 305  r2/r2/controllers/api.py
  10. 35  r2/r2/controllers/embed.py
  11. 2  r2/r2/controllers/error.py
  12. 2  r2/r2/controllers/errors.py
  13. 101  r2/r2/controllers/front.py
  14. 476  r2/r2/controllers/ipn.py
  15. 20  r2/r2/controllers/listingcontroller.py
  16. 2  r2/r2/controllers/post.py
  17. 29  r2/r2/controllers/promotecontroller.py
  18. 37  r2/r2/controllers/reddit_base.py
  19. 22  r2/r2/controllers/toolbar.py
  20. 89  r2/r2/controllers/validator/validator.py
  21. 6  r2/r2/lib/amqp.py
  22. 94  r2/r2/lib/app_globals.py
  23. 3  r2/r2/lib/authorize/api.py
  24. 17  r2/r2/lib/base.py
  25. 214  r2/r2/lib/cache.py
  26. 6  r2/r2/lib/comment_tree.py
  27. 15  r2/r2/lib/contrib/discount/Makefile
  28. 7  r2/r2/lib/contrib/discount/Makefile.in
  29. 2  r2/r2/lib/contrib/discount/Plan9/mkfile
  30. 2  r2/r2/lib/contrib/discount/VERSION
  31. 2  r2/r2/lib/contrib/discount/config.cmd
  32. 4  r2/r2/lib/contrib/discount/config.h
  33. 16  r2/r2/lib/contrib/discount/config.log
  34. 2  r2/r2/lib/contrib/discount/config.md
  35. 6  r2/r2/lib/contrib/discount/config.sub
  36. 4  r2/r2/lib/contrib/discount/cstring.h
  37. 1  r2/r2/lib/contrib/discount/dumptree.c
  38. 110  r2/r2/lib/contrib/discount/generate.c
  39. 24  r2/r2/lib/contrib/discount/html5.c
  40. 13  r2/r2/lib/contrib/discount/main.c
  41. 8  r2/r2/lib/contrib/discount/markdown.1
  42. 167  r2/r2/lib/contrib/discount/markdown.c
  43. 3  r2/r2/lib/contrib/discount/markdown.h
  44. 23  r2/r2/lib/contrib/discount/mkd-extensions.7
  45. 4  r2/r2/lib/contrib/discount/mkd-functions.3
  46. 4  r2/r2/lib/contrib/discount/mkdio.c
  47. 1  r2/r2/lib/contrib/discount/mkdio.h
  48. 2  r2/r2/lib/contrib/discount/resource.c
  49. 110  r2/r2/lib/contrib/discount/tags.c
  50. 18  r2/r2/lib/contrib/discount/tags.h
  51. 4  r2/r2/lib/contrib/discount/tests/code.t
  52. 2  r2/r2/lib/contrib/discount/tests/html.t
  53. 17  r2/r2/lib/contrib/discount/tests/html5.t
  54. 1  r2/r2/lib/contrib/discount/tests/linkylinky.t
  55. 29  r2/r2/lib/contrib/discount/tests/peculiarities.t
  56. 15  r2/r2/lib/contrib/discount/tests/strikethrough.t
  57. 3  r2/r2/lib/contrib/discount/theme.c
  58. 12  r2/r2/lib/count.py
  59. 13  r2/r2/lib/cssfilter.py
  60. 238  r2/r2/lib/db/queries.py
  61. 439  r2/r2/lib/db/tdb_cassandra.py
  62. 69  r2/r2/lib/db/thing.py
  63. 14  r2/r2/lib/emailer.py
  64. 3  r2/r2/lib/filters.py
  65. 39  r2/r2/lib/indextank.py
  66. 12  r2/r2/lib/jsontemplates.py
  67. 20  r2/r2/lib/lock.py
  68. 8  r2/r2/lib/manager/db_manager.py
  69. 10  r2/r2/lib/media.py
  70. 2  r2/r2/lib/menus.py
  71. 219  r2/r2/lib/migrate/migrate.py
  72. 69  r2/r2/lib/migrate/mr_permacache.py
  73. 51  r2/r2/lib/migrate/mr_urls.py
  74. 38  r2/r2/lib/mr_account.py
  75. 233  r2/r2/lib/mr_tools.py
  76. 2  r2/r2/lib/mr_tools/__init__.py
  77. 192  r2/r2/lib/mr_tools/_mr_tools.pyx
  78. 53  r2/r2/lib/mr_tools/mr_tools.py
  79. 6  r2/r2/lib/mr_top.py
  80. 5  r2/r2/lib/pages/graph.py
  81. 251  r2/r2/lib/pages/pages.py
  82. 4  r2/r2/lib/pages/things.py
  83. 7  r2/r2/lib/promote.py
  84. 2  r2/r2/lib/queues.py
  85. 665  r2/r2/lib/scraper.py
  86. 20  r2/r2/lib/sgm.pyx
  87. 4  r2/r2/lib/sr_pops.py
  88. 44  r2/r2/lib/strings.py
  89. 25  r2/r2/lib/tracking.py
  90. 10  r2/r2/lib/translation.py
  91. 1  r2/r2/lib/utils/thing_utils.py
  92. 169  r2/r2/lib/utils/utils.py
  93. 8  r2/r2/lib/wrapped.pyx
  94. 34  r2/r2/models/_builder.pyx
  95. 8  r2/r2/models/account.py
  96. 12  r2/r2/models/admintools.py
  97. 10  r2/r2/models/builder.py
  98. 54  r2/r2/models/gold.py
  99. 192  r2/r2/models/link.py
  100. 53  r2/r2/models/subreddit.py
  101. 185  r2/r2/models/vote.py
  102. BIN  r2/r2/public/static/bestof_award.png
  103. 24  r2/r2/public/static/css/mobile.css
  104. 177  r2/r2/public/static/css/reddit.css
  105. BIN  r2/r2/public/static/giftgold.png
  106. 52  r2/r2/public/static/gpgkey
  107. 15  r2/r2/public/static/iphone/index.html
  108. 78  r2/r2/public/static/js/reddit.js
  109. 5  r2/r2/public/static/js/sponsored.js
  110. BIN  r2/r2/public/static/nsfw2.png
  111. BIN  r2/r2/public/static/reddit404d.png
  112. BIN  r2/r2/public/static/reddit_gold-70.png
  113. 8  r2/r2/templates/adminerrorlog.html
  114. 18  r2/r2/templates/base.html
  115. 19  r2/r2/templates/comment.mobile
  116. 4  r2/r2/templates/createsubreddit.html
  117. 4  r2/r2/templates/dart_ad.html
  118. 4  r2/r2/templates/frametoolbar.html
  119. 21  r2/r2/templates/giftgold.html
  120. 180  r2/r2/templates/gold.html
  121. 99  r2/r2/templates/goldpayment.html
  122. 22  r2/r2/templates/link.mobile
  123. 4  r2/r2/templates/link.xml
  124. 0  r2/r2/templates/linkcommentsep.compact
  125. 0  r2/r2/templates/linkcommentsep.html
  126. 0  r2/r2/templates/linkcommentsep.htmllite
  127. 1  r2/r2/templates/linkcommentsep.mobile
  128. 0  r2/r2/templates/linkcommentsep.xml
  129. 8  r2/r2/templates/linkinfobar.html
  130. 8  r2/r2/templates/login.html
  131. 4  r2/r2/templates/loginformwide.html
  132. 4  r2/r2/templates/messagecompose.html
  133. 2  r2/r2/templates/paymentform.html
  134. 22  r2/r2/templates/prefoptions.html
  135. 1  r2/r2/templates/printable.html
  136. 8  r2/r2/templates/profilebar.html
  137. 2  r2/r2/templates/promote_graph.html
  138. 9  r2/r2/templates/promotelinkform.html
  139. 23  r2/r2/templates/reddit.html
  140. 6  r2/r2/templates/reddit.mobile
  141. 4  r2/r2/templates/redditfooter.html
  142. 2  r2/r2/templates/reddittraffic.html
  143. 2  r2/r2/templates/searchbar.html
  144. 51  r2/r2/templates/searchform.html
  145. 2  r2/r2/templates/sidebox.html
  146. 4  r2/r2/templates/subredditinfobar.html
  147. 17  r2/r2/templates/thanks.html
  148. 8  r2/r2/templates/usertext.compact
  149. 8  r2/r2/templates/usertext.html
  150. 2  r2/r2/templates/usertext.mobile
  151. 35  r2/r2/tests/functional/cassamodels.py
  152. 20  r2/setup.py
  153. 2  r2/updateini.py
1  .gitignore
@@ -35,6 +35,7 @@ r2/_builder.egg-info/
35 35
 r2/_normalized_hot.egg-info/
36 36
 r2/_sorts.egg-info/
37 37
 r2/r2/lib/_normalized_hot.c
  38
+r2/r2/lib/mr_tools/_mr_tools.c
38 39
 r2/r2/lib/db/_sorts.c
39 40
 r2/r2/lib/sgm.c
40 41
 r2/r2/lib/utils/_utils.c
479  config/cassandra/cassandra.yaml
... ...
@@ -0,0 +1,479 @@
  1
+# Cassandra storage config YAML 
  2
+
  3
+# NOTE:
  4
+#   See http://wiki.apache.org/cassandra/StorageConfiguration for
  5
+#   full explanations of configuration directives
  6
+# /NOTE
  7
+
  8
+# The name of the cluster. This is mainly used to prevent machines in
  9
+# one logical cluster from joining another.
  10
+cluster_name: 'reddit'
  11
+
  12
+# You should always specify InitialToken when setting up a production
  13
+# cluster for the first time, and often when adding capacity later.
  14
+# The principle is that each node should be given an equal slice of
  15
+# the token ring; see http://wiki.apache.org/cassandra/Operations
  16
+# for more details.
  17
+#
  18
+# If blank, Cassandra will request a token bisecting the range of
  19
+# the heaviest-loaded existing node.  If there is no load information
  20
+# available, such as is the case with a new cluster, it will pick
  21
+# a random token, which will lead to hot spots.
  22
+initial_token:
  23
+
  24
+# Set to true to make new [non-seed] nodes automatically migrate data
  25
+# to themselves from the pre-existing nodes in the cluster.  Defaults
  26
+# to false because you can only bootstrap N machines at a time from
  27
+# an existing cluster of N, so if you are bringing up a cluster of
  28
+# 10 machines with 3 seeds you would have to do it in stages.  Leaving
  29
+# this off for the initial start simplifies that.
  30
+auto_bootstrap: false
  31
+
  32
+# See http://wiki.apache.org/cassandra/HintedHandoff
  33
+hinted_handoff_enabled: true
  34
+
  35
+# authentication backend, implementing IAuthenticator; used to identify users
  36
+authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
  37
+
  38
+# authorization backend, implementing IAuthority; used to limit access/provide permissions
  39
+authority: org.apache.cassandra.auth.AllowAllAuthority
  40
+
  41
+# The partitioner is responsible for distributing rows (by key) across
  42
+# nodes in the cluster.  Any IPartitioner may be used, including your
  43
+# own as long as it is on the classpath.  Out of the box, Cassandra
  44
+# provides org.apache.cassandra.dht.RandomPartitioner
  45
+# org.apache.cassandra.dht.ByteOrderedPartitioner,
  46
+# org.apache.cassandra.dht.OrderPreservingPartitioner (deprecated),
  47
+# and org.apache.cassandra.dht.CollatingOrderPreservingPartitioner
  48
+# (deprecated).
  49
+# 
  50
+# - RandomPartitioner distributes rows across the cluster evenly by md5.
  51
+#   When in doubt, this is the best option.
  52
+# - ByteOrderedPartitioner orders rows lexically by key bytes.  BOP allows
  53
+#   scanning rows in key order, but the ordering can generate hot spots
  54
+#   for sequential insertion workloads.
  55
+# - OrderPreservingPartitioner is an obsolete form of BOP, that stores
  56
+# - keys in a less-efficient format and only works with keys that are
  57
+#   UTF8-encoded Strings.
  58
+# - CollatingOPP colates according to EN,US rules rather than lexical byte
  59
+#   ordering.  Use this as an example if you need custom collation.
  60
+#
  61
+# See http://wiki.apache.org/cassandra/Operations for more on
  62
+# partitioners and token selection.
  63
+partitioner: org.apache.cassandra.dht.RandomPartitioner
  64
+
  65
+# directories where Cassandra should store data on disk.
  66
+data_file_directories:
  67
+    - /cassandra/data
  68
+
  69
+# commit log
  70
+commitlog_directory: /cassandra/commitlog
  71
+
  72
+# saved caches
  73
+saved_caches_directory: /cassandra/saved_caches
  74
+
  75
+# Size to allow commitlog to grow to before creating a new segment 
  76
+commitlog_rotation_threshold_in_mb: 128
  77
+
  78
+# commitlog_sync may be either "periodic" or "batch." 
  79
+# When in batch mode, Cassandra won't ack writes until the commit log
  80
+# has been fsynced to disk.  It will wait up to
  81
+# CommitLogSyncBatchWindowInMS milliseconds for other writes, before
  82
+# performing the sync.
  83
+commitlog_sync: periodic
  84
+
  85
+# the other option is "timed," where writes may be acked immediately
  86
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
  87
+# milliseconds.
  88
+commitlog_sync_period_in_ms: 10000
  89
+
  90
+# Addresses of hosts that are deemed contact points.
  91
+# Cassandra nodes use this list of hosts to find each other and learn
  92
+# the topology of the ring.  You must change this if you are running
  93
+# multiple nodes!
  94
+seeds:
  95
+    - pmc01
  96
+    - pmc02
  97
+    - pmc03
  98
+    - pmc04
  99
+    - pmc05
  100
+    - pmc06
  101
+    - pmc07
  102
+    - pmc08
  103
+    - pmc09
  104
+    - pmc10
  105
+    - pmc11
  106
+    - pmc12
  107
+    - pmc13
  108
+    - pmc14
  109
+    - pmc15
  110
+    - pmc16
  111
+    - pmc17
  112
+    - pmc18
  113
+    - pmc19
  114
+    - pmc20
  115
+
  116
+# Access mode.  mmapped i/o is substantially faster, but only practical on
  117
+# a 64bit machine (which notably does not include EC2 "small" instances)
  118
+# or relatively small datasets.  "auto", the safe choice, will enable
  119
+# mmapping on a 64bit JVM.  Other values are "mmap", "mmap_index_only"
  120
+# (which may allow you to get part of the benefits of mmap on a 32bit
  121
+# machine by mmapping only index files) and "standard".
  122
+# (The buffer size settings that follow only apply to standard,
  123
+# non-mmapped i/o.)
  124
+disk_access_mode: mmap_index_only
  125
+
  126
+# Unlike most systems, in Cassandra writes are faster than reads, so
  127
+# you can afford more of those in parallel.  A good rule of thumb is 2
  128
+# concurrent reads per processor core.  Increase ConcurrentWrites to
  129
+# the number of clients writing at once if you enable CommitLogSync +
  130
+# CommitLogSyncDelay. -->
  131
+concurrent_reads: 8
  132
+concurrent_writes: 32
  133
+
  134
+# This sets the amount of memtable flush writer threads.  These will
  135
+# be blocked by disk io, and each one will hold a memtable in memory
  136
+# while blocked. If you have a large heap and many data directories,
  137
+# you can increase this value for better flush performance.
  138
+# By default this will be set to the amount of data directories defined.
  139
+#memtable_flush_writers: 1
  140
+
  141
+# Buffer size to use when performing contiguous column slices. 
  142
+# Increase this to the size of the column slices you typically perform
  143
+sliced_buffer_size_in_kb: 64
  144
+
  145
+# TCP port, for commands and data
  146
+storage_port: 7000
  147
+
  148
+# Address to bind to and tell other Cassandra nodes to connect to. You
  149
+# _must_ change this if you want multiple nodes to be able to
  150
+# communicate!
  151
+# 
  152
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
  153
+# will always do the Right Thing *if* the node is properly configured
  154
+# (hostname, name resolution, etc), and the Right Thing is to use the
  155
+# address associated with the hostname (it might not be).
  156
+#
  157
+# Setting this to 0.0.0.0 is always wrong.
  158
+listen_address:
  159
+
  160
+# The address to bind the Thrift RPC service to -- clients connect
  161
+# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
  162
+# you want Thrift to listen on all interfaces.
  163
+# 
  164
+# Leaving this blank has the same effect it does for ListenAddress,
  165
+# (i.e. it will be based on the configured hostname of the node).
  166
+rpc_address: 0.0.0.0
  167
+# port for Thrift to listen for clients on
  168
+rpc_port: 9160
  169
+
  170
+# enable or disable keepalive on rpc connections
  171
+rpc_keepalive: true
  172
+
  173
+# uncomment to set socket buffer sizes on rpc connections
  174
+# rpc_send_buff_size_in_bytes:
  175
+# rpc_recv_buff_size_in_bytes:
  176
+
  177
+# Frame size for thrift (maximum field length).
  178
+# 0 disables TFramedTransport in favor of TSocket. This option
  179
+# is deprecated; we strongly recommend using Framed mode.
  180
+thrift_framed_transport_size_in_mb: 15
  181
+
  182
+# The max length of a thrift message, including all fields and
  183
+# internal thrift overhead.
  184
+thrift_max_message_length_in_mb: 16
  185
+
  186
+# Whether or not to take a snapshot before each compaction.  Be
  187
+# careful using this option, since Cassandra won't clean up the
  188
+# snapshots for you.  Mostly useful if you're paranoid when there
  189
+# is a data format change.
  190
+snapshot_before_compaction: false
  191
+
  192
+# change this to increase the compaction thread's priority.  In java, 1 is the
  193
+# lowest priority and that is our default.
  194
+# compaction_thread_priority: 1
  195
+
  196
+# The threshold size in megabytes the binary memtable must grow to,
  197
+# before it's submitted for flushing to disk.
  198
+binary_memtable_throughput_in_mb: 256
  199
+
  200
+# Add column indexes to a row after its contents reach this size.
  201
+# Increase if your column values are large, or if you have a very large
  202
+# number of columns.  The competing causes are, Cassandra has to
  203
+# deserialize this much of the row to read a single column, so you want
  204
+# it to be small - at least if you do many partial-row reads - but all
  205
+# the index data is read for each access, so you don't want to generate
  206
+# that wastefully either.
  207
+column_index_size_in_kb: 64
  208
+
  209
+# Size limit for rows being compacted in memory.  Larger rows will spill
  210
+# over to disk and use a slower two-pass compaction process.  A message
  211
+# will be logged specifying the row key.
  212
+in_memory_compaction_limit_in_mb: 64
  213
+
  214
+# Time to wait for a reply from other nodes before failing the command 
  215
+rpc_timeout_in_ms: 20000
  216
+
  217
+# phi value that must be reached for a host to be marked down.
  218
+# most users should never need to adjust this.
  219
+phi_convict_threshold: 10
  220
+
  221
+# endpoint_snitch -- Set this to a class that implements
  222
+# IEndpointSnitch, which will let Cassandra know enough
  223
+# about your network topology to route requests efficiently.
  224
+# Out of the box, Cassandra provides
  225
+#  - org.apache.cassandra.locator.SimpleSnitch:
  226
+#    Treats Strategy order as proximity. This improves cache locality
  227
+#    when disabling read repair, which can further improve throughput.
  228
+#  - org.apache.cassandra.locator.RackInferringSnitch:
  229
+#    Proximity is determined by rack and data center, which are
  230
+#    assumed to correspond to the 3rd and 2nd octet of each node's
  231
+#    IP address, respectively
  232
+# org.apache.cassandra.locator.PropertyFileSnitch:
  233
+#  - Proximity is determined by rack and data center, which are
  234
+#    explicitly configured in cassandra-topology.properties.
  235
+endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch
  236
+
  237
+# dynamic_snitch -- This boolean controls whether the above snitch is
  238
+# wrapped with a dynamic snitch, which will monitor read latencies
  239
+# and avoid reading from hosts that have slowed (due to compaction,
  240
+# for instance)
  241
+dynamic_snitch: true
  242
+# controls how often to perform the more expensive part of host score
  243
+# calculation
  244
+dynamic_snitch_update_interval_in_ms: 100 
  245
+# controls how often to reset all host scores, allowing a bad host to
  246
+# possibly recover
  247
+dynamic_snitch_reset_interval_in_ms: 600000
  248
+# if set greater than zero and read_repair_chance is < 1.0, this will allow
  249
+# 'pinning' of replicas to hosts in order to increase cache capacity.
  250
+# The badness threshold will control how much worse the pinned host has to be
  251
+# before the dynamic snitch will prefer other replicas over it.  This is
  252
+# expressed as a double which represents a percentage.  Thus, a value of
  253
+# 0.2 means Cassandra would continue to prefer the static snitch values
  254
+# until the pinned host was 20% worse than the fastest.
  255
+dynamic_snitch_badness_threshold: 0.1
  256
+
  257
+# request_scheduler -- Set this to a class that implements
  258
+# RequestScheduler, which will schedule incoming client requests
  259
+# according to the specific policy. This is useful for multi-tenancy
  260
+# with a single Cassandra cluster.
  261
+# NOTE: This is specifically for requests from the client and does
  262
+# not affect inter node communication.
  263
+# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
  264
+# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
  265
+# client requests to a node with a separate queue for each
  266
+# request_scheduler_id. The scheduler is further customized by
  267
+# request_scheduler_options as described below.
  268
+request_scheduler: org.apache.cassandra.scheduler.NoScheduler
  269
+
  270
+# Scheduler Options vary based on the type of scheduler
  271
+# NoScheduler - Has no options
  272
+# RoundRobin
  273
+#  - throttle_limit -- The throttle_limit is the number of in-flight
  274
+#                      requests per client.  Requests beyond 
  275
+#                      that limit are queued up until
  276
+#                      running requests can complete.
  277
+#                      The value of 80 here is twice the number of
  278
+#                      concurrent_reads + concurrent_writes.
  279
+#  - default_weight -- default_weight is optional and allows for
  280
+#                      overriding the default which is 1.
  281
+#  - weights -- Weights are optional and will default to 1 or the
  282
+#               overridden default_weight. The weight translates into how
  283
+#               many requests are handled during each turn of the
  284
+#               RoundRobin, based on the scheduler id.
  285
+#
  286
+# request_scheduler_options:
  287
+#    throttle_limit: 80
  288
+#    default_weight: 5
  289
+#    weights:
  290
+#      Keyspace1: 1
  291
+#      Keyspace2: 5
  292
+
  293
+# request_scheduler_id -- An identifer based on which to perform
  294
+# the request scheduling. Currently the only valid option is keyspace.
  295
+# request_scheduler_id: keyspace
  296
+
  297
+# The Index Interval determines how large the sampling of row keys
  298
+#  is for a given SSTable. The larger the sampling, the more effective
  299
+#  the index is at the cost of space.
  300
+index_interval: 128
  301
+
  302
+# Keyspaces have ColumnFamilies.        (Usually 1 KS per application.)
  303
+# ColumnFamilies have Rows.             (Dozens of CFs per KS.)
  304
+# Rows contain Columns.                 (Many per CF.)
  305
+# Columns contain name:value:timestamp. (Many per Row.)
  306
+#
  307
+# A KS is most similar to a schema, and a CF is most similar to a relational table.
  308
+#
  309
+# Keyspaces, ColumnFamilies, and Columns may carry additional
  310
+# metadata that change their behavior. These are as follows:
  311
+#
  312
+# Keyspace required parameters:
  313
+# - name: name of the keyspace; "system" is
  314
+#   reserved for Cassandra Internals.
  315
+# - replica_placement_strategy: the class that determines how replicas
  316
+#   are distributed among nodes. Contains both the class as well as
  317
+#   configuration information.  Must extend AbstractReplicationStrategy.
  318
+#   Out of the box, Cassandra provides 
  319
+#     * org.apache.cassandra.locator.SimpleStrategy 
  320
+#     * org.apache.cassandra.locator.NetworkTopologyStrategy
  321
+#     * org.apache.cassandra.locator.OldNetworkTopologyStrategy
  322
+#
  323
+#   SimpleStrategy merely places the first
  324
+#   replica at the node whose token is closest to the key (as determined
  325
+#   by the Partitioner), and additional replicas on subsequent nodes
  326
+#   along the ring in increasing Token order.
  327
+# 
  328
+#   With NetworkTopologyStrategy,
  329
+#   for each datacenter, you can specify how many replicas you want
  330
+#   on a per-keyspace basis.  Replicas are placed on different racks
  331
+#   within each DC, if possible. This strategy also requires rack aware
  332
+#   snitch, such as RackInferringSnitch or PropertyFileSnitch.
  333
+#   An example:
  334
+#    - name: Keyspace1
  335
+#      replica_placement_strategy: org.apache.cassandra.locator.NetworkTopologyStrategy
  336
+#      strategy_options:
  337
+#        DC1 : 3
  338
+#        DC2 : 2
  339
+#        DC3 : 1
  340
+# 
  341
+#   OldNetworkToplogyStrategy [formerly RackAwareStrategy] 
  342
+#   places one replica in each of two datacenters, and the third on a
  343
+#   different rack in in the first.  Additional datacenters are not
  344
+#   guaranteed to get a replica.  Additional replicas after three are placed
  345
+#   in ring order after the third without regard to rack or datacenter.
  346
+# - replication_factor: Number of replicas of each row
  347
+# Keyspace optional paramaters:
  348
+# - strategy_options: Additional information for the replication strategy.
  349
+# - column_families:
  350
+#     ColumnFamily required parameters:
  351
+#     - name: name of the ColumnFamily.  Must not contain the character "-".
  352
+#     - compare_with: tells Cassandra how to sort the columns for slicing
  353
+#       operations. The default is BytesType, which is a straightforward
  354
+#       lexical comparison of the bytes in each column.  Other options are
  355
+#       AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, LongType,
  356
+#       and IntegerType (a generic variable-length integer type).
  357
+#       You can also specify the fully-qualified class name to a class of
  358
+#       your choice extending org.apache.cassandra.db.marshal.AbstractType.
  359
+#    
  360
+#     ColumnFamily optional parameters:
  361
+#     - keys_cached: specifies the number of keys per sstable whose
  362
+#        locations we keep in memory in "mostly LRU" order.  (JUST the key
  363
+#        locations, NOT any column values.) Specify a fraction (value less
  364
+#        than 1) or an absolute number of keys to cache.  Defaults to 200000
  365
+#        keys.
  366
+#     - rows_cached: specifies the number of rows whose entire contents we
  367
+#        cache in memory. Do not use this on ColumnFamilies with large rows,
  368
+#        or ColumnFamilies with high write:read ratios. Specify a fraction
  369
+#        (value less than 1) or an absolute number of rows to cache.
  370
+#        Defaults to 0. (i.e. row caching is off by default)
  371
+#     - comment: used to attach additional human-readable information about 
  372
+#        the column family to its definition.
  373
+#     - read_repair_chance: specifies the probability with which read
  374
+#        repairs should be invoked on non-quorum reads.  must be between 0
  375
+#        and 1. defaults to 1.0 (always read repair).
  376
+#     - gc_grace_seconds: specifies the time to wait before garbage
  377
+#        collecting tombstones (deletion markers). defaults to 864000 (10
  378
+#        days). See http://wiki.apache.org/cassandra/DistributedDeletes
  379
+#     - default_validation_class: specifies a validator class to use for
  380
+#        validating all the column values in the CF.
  381
+#     NOTE:
  382
+#     min_ must be less than max_compaction_threshold!
  383
+#     - min_compaction_threshold: the minimum number of SSTables needed
  384
+#        to start a minor compaction.  increasing this will cause minor
  385
+#        compactions to start less frequently and be more intensive. setting
  386
+#        this to 0 disables minor compactions.  defaults to 4.
  387
+#     - max_compaction_threshold: the maximum number of SSTables allowed
  388
+#        before a minor compaction is forced.  decreasing this will cause
  389
+#        minor compactions to start more frequently and be less intensive.
  390
+#        setting this to 0 disables minor compactions.  defaults to 32.
  391
+#     /NOTE
  392
+#     - row_cache_save_period_in_seconds: number of seconds between saving
  393
+#        row caches.  The row caches can be saved periodically and if one
  394
+#        exists on startup it will be loaded.
  395
+#     - key_cache_save_period_in_seconds: number of seconds between saving
  396
+#        key caches.  The key caches can be saved periodically and if one 
  397
+#        exists on startup it will be loaded.
  398
+#     - memtable_flush_after_mins: The maximum time to leave a dirty table
  399
+#        unflushed.  This should be large enough that it won't cause a flush
  400
+#        storm of all memtables during periods of inactivity.
  401
+#     - memtable_throughput_in_mb: The maximum size of the memtable before
  402
+#        it is flushed.  If undefined, 1/8 * heapsize will be used.
  403
+#     - memtable_operations_in_millions: Number of operations in millions
  404
+#        before the memtable is flushed. If undefined, throughput / 64 * 0.3
  405
+#        will be used.
  406
+#     - column_metadata:
  407
+#         Column required parameters:
  408
+#         - name: binds a validator (and optionally an indexer) to columns 
  409
+#            with this name in any row of the enclosing column family.
  410
+#         - validator: like cf.compare_with, an AbstractType that checks
  411
+#            that the value of the column is well-defined.
  412
+#         Column optional parameters:
  413
+#         NOTE:
  414
+#         index_name cannot be set if index_type is not also set!
  415
+#         - index_name: User-friendly name for the index.
  416
+#         - index_type: The type of index to be created. Currently only
  417
+#            KEYS is supported.
  418
+#         /NOTE
  419
+# 
  420
+# NOTE:
  421
+#   this keyspace definition is for demonstration purposes only.
  422
+#   Cassandra will not load these definitions during startup. See
  423
+#   http://wiki.apache.org/cassandra/FAQ#no_keyspaces for an explanation.
  424
+# /NOTE
  425
+keyspaces:
  426
+    - name: reddit
  427
+      replica_placement_strategy: org.apache.cassandra.locator.RackUnawareStrategy
  428
+      replication_factor: 3
  429
+      column_families:
  430
+      - column_type: Standard
  431
+        compare_with: BytesType
  432
+        name: permacache
  433
+        row_cache_save_period_in_seconds: 3600
  434
+        rows_cached: 100000
  435
+      - column_type: Standard
  436
+        compare_with: BytesType
  437
+        name: urls
  438
+        row_cache_save_period_in_seconds: 3600
  439
+        rows_cached: 100000
  440
+      - column_type: Standard
  441
+        compare_with: UTF8Type
  442
+        name: LinkVote
  443
+        rows_cached: 100000
  444
+      - column_type: Standard
  445
+        compare_with: UTF8Type
  446
+        name: CommentVote
  447
+        rows_cached: 100000
  448
+      - column_type: Standard
  449
+        compare_with: UTF8Type
  450
+        name: Friend
  451
+        rows_cached: 10000
  452
+      - column_type: Standard
  453
+        compare_with: UTF8Type
  454
+        name: Save
  455
+        rows_cached: 100000
  456
+      - column_type: Standard
  457
+        compare_with: UTF8Type
  458
+        name: Hide
  459
+        rows_cached: 100000
  460
+      - column_type: Standard
  461
+        compare_with: UTF8Type
  462
+        name: Click
  463
+        rows_cached: 100000
  464
+      - column_type: Standard
  465
+        compare_with: UTF8Type
  466
+        name: VotesByLink
  467
+      - column_type: Standard
  468
+        compare_with: UTF8Type
  469
+        name: VotesByDay
  470
+      - column_type: Standard
  471
+        name: FriendsByAccount
  472
+      - column_type: Standard
  473
+        compare_with: UTF8Type
  474
+        name: SavesByAccount
  475
+      - column_type: Standard
  476
+        compare_with: UTF8Type
  477
+        name: CommentSortsCache
  478
+        row_cache_save_period_in_seconds: 3600
  479
+        rows_cached: 200000
420  config/cassandra/storage-conf.xml
... ...
@@ -1,420 +0,0 @@
1  
-<!--
2  
- ~ Licensed to the Apache Software Foundation (ASF) under one
3  
- ~ or more contributor license agreements.  See the NOTICE file
4  
- ~ distributed with this work for additional information
5  
- ~ regarding copyright ownership.  The ASF licenses this file
6  
- ~ to you under the Apache License, Version 2.0 (the
7  
- ~ "License"); you may not use this file except in compliance
8  
- ~ with the License.  You may obtain a copy of the License at
9  
- ~
10  
- ~    http://www.apache.org/licenses/LICENSE-2.0
11  
- ~
12  
- ~ Unless required by applicable law or agreed to in writing,
13  
- ~ software distributed under the License is distributed on an
14  
- ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  
- ~ KIND, either express or implied.  See the License for the
16  
- ~ specific language governing permissions and limitations
17  
- ~ under the License.
18  
--->
19  
-<Storage>
20  
-  <!--======================================================================-->
21  
-  <!-- Basic Configuration                                                  -->
22  
-  <!--======================================================================-->
23  
-
24  
-  <!-- 
25  
-   ~ The name of this cluster.  This is mainly used to prevent machines in
26  
-   ~ one logical cluster from joining another.
27  
-  -->
28  
-  <ClusterName>reddit</ClusterName>
29  
-
30  
-  <!--
31  
-   ~ Turn on to make new [non-seed] nodes automatically migrate the right data 
32  
-   ~ to themselves.  (If no InitialToken is specified, they will pick one 
33  
-   ~ such that they will get half the range of the most-loaded node.)
34  
-   ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
35  
-   ~ so that you can't subsequently accidently bootstrap a node with
36  
-   ~ data on it.  (You can reset this by wiping your data and commitlog
37  
-   ~ directories.)
38  
-   ~
39  
-   ~ Off by default so that new clusters and upgraders from 0.4 don't
40  
-   ~ bootstrap immediately.  You should turn this on when you start adding
41  
-   ~ new nodes to a cluster that already has data on it.  (If you are upgrading
42  
-   ~ from 0.4, start your cluster with it off once before changing it to true.
43  
-   ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
44  
-   ~ I/O before your cluster starts up.)
45  
-  -->
46  
-  <AutoBootstrap>false</AutoBootstrap>
47  
-
48  
-  <!--
49  
-   ~ See http://wiki.apache.org/cassandra/HintedHandoff
50  
-  -->
51  
-  <HintedHandoffEnabled>true</HintedHandoffEnabled>
52  
-
53  
-  <!--
54  
-   ~ Keyspaces and ColumnFamilies:
55  
-   ~ A ColumnFamily is the Cassandra concept closest to a relational
56  
-   ~ table.  Keyspaces are separate groups of ColumnFamilies.  Except in
57  
-   ~ very unusual circumstances you will have one Keyspace per application.
58  
-
59  
-   ~ There is an implicit keyspace named 'system' for Cassandra internals.
60  
-  -->
61  
-  <Keyspaces>
62  
-    <Keyspace Name="Keyspace1">
63  
-      <!--
64  
-       ~ ColumnFamily definitions have one required attribute (Name)
65  
-       ~ and several optional ones.
66  
-       ~
67  
-       ~ The CompareWith attribute tells Cassandra how to sort the columns
68  
-       ~ for slicing operations.  The default is BytesType, which is a
69  
-       ~ straightforward lexical comparison of the bytes in each column.
70  
-       ~ Other options are AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType,
71  
-       ~ and LongType.  You can also specify the fully-qualified class
72  
-       ~ name to a class of your choice extending
73  
-       ~ org.apache.cassandra.db.marshal.AbstractType.
74  
-       ~ 
75  
-       ~ SuperColumns have a similar CompareSubcolumnsWith attribute.
76  
-       ~ 
77  
-       ~ BytesType: Simple sort by byte value.  No validation is performed.
78  
-       ~ AsciiType: Like BytesType, but validates that the input can be 
79  
-       ~            parsed as US-ASCII.
80  
-       ~ UTF8Type: A string encoded as UTF8
81  
-       ~ LongType: A 64bit long
82  
-       ~ LexicalUUIDType: A 128bit UUID, compared lexically (by byte value)
83  
-       ~ TimeUUIDType: a 128bit version 1 UUID, compared by timestamp
84  
-       ~
85  
-       ~ (To get the closest approximation to 0.3-style supercolumns, you
86  
-       ~ would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)
87  
-       ~
88  
-       ~ An optional `Comment` attribute may be used to attach additional
89  
-       ~ human-readable information about the column family to its definition.
90  
-       ~ 
91  
-       ~ The optional KeysCached attribute specifies
92  
-       ~ the number of keys per sstable whose locations we keep in
93  
-       ~ memory in "mostly LRU" order.  (JUST the key locations, NOT any
94  
-       ~ column values.) Specify a fraction (value less than 1), a percentage
95  
-       ~ (ending in a % sign) or an absolute number of keys to cache.
96  
-       ~ KeysCached defaults to 200000 keys.
97  
-       ~
98  
-       ~ The optional RowsCached attribute specifies the number of rows
99  
-       ~ whose entire contents we cache in memory. Do not use this on
100  
-       ~ ColumnFamilies with large rows, or ColumnFamilies with high write:read
101  
-       ~ ratios. Specify a fraction (value less than 1), a percentage (ending in
102  
-       ~ a % sign) or an absolute number of rows to cache. 
103  
-       ~ RowsCached defaults to 0, i.e., row cache is off by default.
104  
-       ~
105  
-       ~ Remember, when using caches as a percentage, they WILL grow with
106  
-       ~ your data set!
107  
-      -->
108  
-      <ColumnFamily Name="Standard1" CompareWith="BytesType"/>
109  
-      <ColumnFamily Name="Standard2" 
110  
-                    CompareWith="UTF8Type"
111  
-                    KeysCached="100%"/>
112  
-      <ColumnFamily Name="StandardByUUID1" CompareWith="TimeUUIDType" />
113  
-      <ColumnFamily Name="Super1"
114  
-                    ColumnType="Super"
115  
-                    CompareWith="BytesType"
116  
-                    CompareSubcolumnsWith="BytesType" />
117  
-      <ColumnFamily Name="Super2"
118  
-                    ColumnType="Super"
119  
-                    CompareWith="UTF8Type"
120  
-                    CompareSubcolumnsWith="UTF8Type"
121  
-                    RowsCached="10000"
122  
-                    KeysCached="50%"
123  
-                    Comment="A column family with supercolumns, whose column and subcolumn names are UTF8 strings"/>
124  
-
125  
-      <!--
126  
-       ~ Strategy: Setting this to the class that implements
127  
-       ~ IReplicaPlacementStrategy will change the way the node picker works.
128  
-       ~ Out of the box, Cassandra provides
129  
-       ~ org.apache.cassandra.locator.RackUnawareStrategy and
130  
-       ~ org.apache.cassandra.locator.RackAwareStrategy (place one replica in
131  
-       ~ a different datacenter, and the others on different racks in the same
132  
-       ~ one.)
133  
-      -->
134  
-      <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
135  
-
136  
-      <!-- Number of replicas of the data -->
137  
-      <ReplicationFactor>1</ReplicationFactor>
138  
-
139  
-      <!--
140  
-       ~ EndPointSnitch: Setting this to the class that implements
141  
-       ~ AbstractEndpointSnitch, which lets Cassandra know enough
142  
-       ~ about your network topology to route requests efficiently.
143  
-       ~ Out of the box, Cassandra provides org.apache.cassandra.locator.EndPointSnitch,
144  
-       ~ and PropertyFileEndPointSnitch is available in contrib/.
145  
-      -->
146  
-       <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
147  
-     </Keyspace>
148  
-
149  
-    <Keyspace Name="permacache">
150  
-      <ColumnFamily CompareWith="BytesType" Name="permacache" />
151  
-      <ColumnFamily CompareWith="BytesType" Name="urls" RowsCached="100000" />
152  
-
153  
-      <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
154  
-      <ReplicationFactor>3</ReplicationFactor>
155  
-      <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
156  
-    </Keyspace>
157  
-
158  
-    <Keyspace Name="reddit">
159  
-      <!-- Relations -->
160  
-      <ColumnFamily CompareWith="UTF8Type" Name="LinkVote" />
161  
-      <ColumnFamily CompareWith="UTF8Type" Name="CommentVote" />
162  
-
163  
-      <!-- Views -->
164  
-      <ColumnFamily CompareWith="UTF8Type" Name="VotesByLink" />
165  
-      <ColumnFamily CompareWith="UTF8Type" Name="CommentSortsCache" RowsCached="100000" />
166  
-
167  
-      <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
168  
-      <ReplicationFactor>3</ReplicationFactor>
169  
-      <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
170  
-    </Keyspace>
171  
-
172  
-  </Keyspaces>
173  
-
174  
-  <!--
175  
-   ~ Authenticator: any IAuthenticator may be used, including your own as long
176  
-   ~ as it is on the classpath.  Out of the box, Cassandra provides
177  
-   ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
178  
-   ~ org.apache.cassandra.auth.SimpleAuthenticator 
179  
-   ~ (SimpleAuthenticator uses access.properties and passwd.properties by
180  
-   ~ default).
181  
-   ~
182  
-   ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
183  
-  -->
184  
-  <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
185  
-
186  
-  <!--
187  
-   ~ Partitioner: any IPartitioner may be used, including your own as long
188  
-   ~ as it is on the classpath.  Out of the box, Cassandra provides
189  
-   ~ org.apache.cassandra.dht.RandomPartitioner,
190  
-   ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
191  
-   ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
192  
-   ~ (CollatingOPP colates according to EN,US rules, not naive byte
193  
-   ~ ordering.  Use this as an example if you need locale-aware collation.)
194  
-   ~ Range queries require using an order-preserving partitioner.
195  
-   ~
196  
-   ~ Achtung!  Changing this parameter requires wiping your data
197  
-   ~ directories, since the partitioner can modify the sstable on-disk
198  
-   ~ format.
199  
-  -->
200  
-  <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
201  
-
202  
-  <!--
203  
-   ~ If you are using an order-preserving partitioner and you know your key
204  
-   ~ distribution, you can specify the token for this node to use. (Keys
205  
-   ~ are sent to the node with the "closest" token, so distributing your
206  
-   ~ tokens equally along the key distribution space will spread keys
207  
-   ~ evenly across your cluster.)  This setting is only checked the first
208  
-   ~ time a node is started. 
209  
-
210  
-   ~ This can also be useful with RandomPartitioner to force equal spacing
211  
-   ~ of tokens around the hash space, especially for clusters with a small
212  
-   ~ number of nodes.
213  
-  -->
214  
-  <InitialToken></InitialToken>
215  
-
216  
-  <!--
217  
-   ~ Directories: Specify where Cassandra should store different data on
218  
-   ~ disk.  Keep the data disks and the CommitLog disks separate for best
219  
-   ~ performance
220  
-  -->
221  
-  <CommitLogDirectory>/cassandra/commitlog</CommitLogDirectory>
222  
-  <DataFileDirectories>
223  
-      <DataFileDirectory>/cassandra/data</DataFileDirectory>
224  
-  </DataFileDirectories>
225  
-
226  
-  <!--
227  
-   ~ Addresses of hosts that are deemed contact points. Cassandra nodes
228  
-   ~ use this list of hosts to find each other and learn the topology of
229  
-   ~ the ring. You must change this if you are running multiple nodes!
230  
-  -->
231  
-  <Seeds>
232  
-      <Seed>pmc01</Seed>
233  
-      <Seed>pmc02</Seed>
234  
-      <Seed>pmc03</Seed>
235  
-      <Seed>pmc04</Seed>
236  
-      <Seed>pmc05</Seed>
237  
-      <Seed>pmc06</Seed>
238  
-      <Seed>pmc07</Seed>
239  
-      <Seed>pmc08</Seed>
240  
-      <Seed>pmc09</Seed>
241  
-      <Seed>pmc10</Seed>
242  
-      <Seed>pmc11</Seed>
243  
-      <Seed>pmc12</Seed>
244  
-  </Seeds>
245  
-
246  
-  <!-- Miscellaneous -->
247  
-
248  
-  <!-- Time to wait for a reply from other nodes before failing the command -->
249  
-  <RpcTimeoutInMillis>30000</RpcTimeoutInMillis>
250  
-  <!-- phi value that must be reached before a host is marked as down.
251  
-       most users should never adjust this -->
252  
-  <PhiConvictThreshold>10</PhiConvictThreshold>
253  
-  <!-- Size to allow commitlog to grow to before creating a new segment -->
254  
-  <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
255  
-
256  
-
257  
-  <!-- Local hosts and ports -->
258  
-
259  
-  <!-- 
260  
-   ~ Address to bind to and tell other nodes to connect to.  You _must_
261  
-   ~ change this if you want multiple nodes to be able to communicate!  
262  
-   ~
263  
-   ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
264  
-   ~ will always do the Right Thing *if* the node is properly configured
265  
-   ~ (hostname, name resolution, etc), and the Right Thing is to use the
266  
-   ~ address associated with the hostname (it might not be).
267  
-  -->
268  
-  <ListenAddress></ListenAddress>
269  
-  <!-- internal communications port -->
270  
-  <StoragePort>7000</StoragePort>
271  
-
272  
-  <!--
273  
-   ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
274  
-   ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
275  
-   ~ all interfaces.
276  
-   ~
277  
-   ~ Leaving this blank has the same effect it does for ListenAddress,
278  
-   ~ (i.e. it will be based on the configured hostname of the node).
279  
-  -->
280  
-  <ThriftAddress></ThriftAddress>
281  
-  <!-- Thrift RPC port (the port clients connect to). -->
282  
-  <ThriftPort>9160</ThriftPort>
283  
-  <!-- 
284  
-   ~ Whether or not to use a framed transport for Thrift. If this option
285  
-   ~ is set to true then you must also use a framed transport on the 
286  
-   ~ client-side, (framed and non-framed transports are not compatible).
287  
-  -->
288  
-  <ThriftFramedTransport>false</ThriftFramedTransport>
289  
-
290  
-
291  
-  <!--======================================================================-->
292  
-  <!-- Memory, Disk, and Performance                                        -->
293  
-  <!--======================================================================-->
294  
-
295  
-  <!--
296  
-   ~ Access mode.  mmapped i/o is substantially faster, but only practical on
297  
-   ~ a 64bit machine (which notably does not include EC2 "small" instances)
298  
-   ~ or relatively small datasets.  "auto", the safe choice, will enable
299  
-   ~ mmapping on a 64bit JVM.  Other values are "mmap", "mmap_index_only"
300  
-   ~ (which may allow you to get part of the benefits of mmap on a 32bit
301  
-   ~ machine by mmapping only index files) and "standard".
302  
-   ~ (The buffer size settings that follow only apply to standard,
303  
-   ~ non-mmapped i/o.)
304  
-   -->
305  
-  <DiskAccessMode>mmap_index_only</DiskAccessMode>
306  
-
307  
-  <!--
308  
-   ~ Size of compacted row above which to log a warning.  (If compacted
309  
-   ~ rows do not fit in memory, Cassandra will crash.  This is explained
310  
-   ~ in http://wiki.apache.org/cassandra/CassandraLimitations and is
311  
-   ~ scheduled to be fixed in 0.7.)
312  
-  -->
313  
-  <RowWarningThresholdInMB>512</RowWarningThresholdInMB>
314  
-
315  
-  <!--
316  
-   ~ Buffer size to use when performing contiguous column slices. Increase
317  
-   ~ this to the size of the column slices you typically perform. 
318  
-   ~ (Name-based queries are performed with a buffer size of 
319  
-   ~ ColumnIndexSizeInKB.)
320  
-  -->
321  
-  <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
322  
-
323  
-  <!--
324  
-   ~ Buffer size to use when flushing memtables to disk. (Only one 
325  
-   ~ memtable is ever flushed at a time.) Increase (decrease) the index
326  
-   ~ buffer size relative to the data buffer if you have few (many) 
327  
-   ~ columns per key.  Bigger is only better _if_ your memtables get large
328  
-   ~ enough to use the space. (Check in your data directory after your
329  
-   ~ app has been running long enough.) -->
330  
-  <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
331  
-  <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
332  
-
333  
-  <!--
334  
-   ~ Add column indexes to a row after its contents reach this size.
335  
-   ~ Increase if your column values are large, or if you have a very large
336  
-   ~ number of columns.  The competing causes are, Cassandra has to
337  
-   ~ deserialize this much of the row to read a single column, so you want
338  
-   ~ it to be small - at least if you do many partial-row reads - but all
339  
-   ~ the index data is read for each access, so you don't want to generate
340  
-   ~ that wastefully either.
341  
-  -->
342  
-  <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
343  
-
344  
-  <!--
345  
-   ~ Flush memtable after this much data has been inserted, including
346  
-   ~ overwritten data.  There is one memtable per column family, and 
347  
-   ~ this threshold is based solely on the amount of data stored, not
348  
-   ~ actual heap memory usage (there is some overhead in indexing the
349  
-   ~ columns).
350  
-  -->
351  
-  <MemtableThroughputInMB>128</MemtableThroughputInMB>
352  
-  <!--
353  
-   ~ Throughput setting for Binary Memtables.  Typically these are
354  
-   ~ used for bulk load so you want them to be larger.
355  
-  -->
356  
-  <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
357  
-  <!--
358  
-   ~ The maximum number of columns in millions to store in memory per
359  
-   ~ ColumnFamily before flushing to disk.  This is also a per-memtable
360  
-   ~ setting.  Use with MemtableThroughputInMB to tune memory usage.
361  
-  -->
362  
-  <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
363  
-  <!--
364  
-   ~ The maximum time to leave a dirty memtable unflushed.
365  
-   ~ (While any affected columnfamilies have unflushed data from a
366  
-   ~ commit log segment, that segment cannot be deleted.)
367  
-   ~ This needs to be large enough that it won't cause a flush storm
368  
-   ~ of all your memtables flushing at once because none has hit
369  
-   ~ the size or count thresholds yet.
370  
-  -->
371  
-  <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
372  
-
373  
-  <!--
374  
-   ~ Unlike most systems, in Cassandra writes are faster than reads, so
375  
-   ~ you can afford more of those in parallel.  A good rule of thumb is 2
376  
-   ~ concurrent reads per processor core.  Increase ConcurrentWrites to
377  
-   ~ the number of clients writing at once if you enable CommitLogSync +
378  
-   ~ CommitLogSyncDelay. -->
379  
-  <ConcurrentReads>8</ConcurrentReads>
380  
-  <ConcurrentWrites>32</ConcurrentWrites>
381  
-
382  
-  <!--
383  
-   ~ CommitLogSync may be either "periodic" or "batch."  When in batch
384  
-   ~ mode, Cassandra won't ack writes until the commit log has been
385  
-   ~ fsynced to disk.  It will wait up to CommitLogSyncBatchWindowInMS
386  
-   ~ milliseconds for other writes, before performing the sync.
387  
-
388  
-   ~ This is less necessary in Cassandra than in traditional databases
389  
-   ~ since replication reduces the odds of losing data from a failure
390  
-   ~ after writing the log entry but before it actually reaches the disk.
391  
-   ~ So the other option is "periodic," where writes may be acked immediately
392  
-   ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
393  
-   ~ milliseconds.
394  
-  -->
395  
-  <CommitLogSync>periodic</CommitLogSync>
396  
-  <!--
397  
-   ~ Interval at which to perform syncs of the CommitLog in periodic mode.
398  
-   ~ Usually the default of 10000ms is fine; increase it if your i/o
399  
-   ~ load is such that syncs are taking excessively long times.
400  
-  -->
401  
-  <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
402  
-  <!--
403  
-   ~ Delay (in milliseconds) during which additional commit log entries
404  
-   ~ may be written before fsync in batch mode.  This will increase
405  
-   ~ latency slightly, but can vastly improve throughput where there are
406  
-   ~ many writers.  Set to zero to disable (each entry will be synced
407  
-   ~ individually).  Reasonable values range from a minimal 0.1 to 10 or
408  
-   ~ even more if throughput matters more than latency.
409  
-  -->
410  
-  <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> --> 
411  
-
412  
-  <!--
413  
-   ~ Time to wait before garbage-collection deletion markers.  Set this to
414  
-   ~ a large enough value that you are confident that the deletion marker
415  
-   ~ will be propagated to all replicas by the time this many seconds has
416  
-   ~ elapsed, even in the face of hardware failures.  The default value is
417  
-   ~ ten days.
418  
-  -->
419  
-  <GCGraceSeconds>864000</GCGraceSeconds>
420  
-</Storage>
52  r2/example.ini
@@ -32,9 +32,11 @@ MODSECRET = abcdefghijklmnopqrstuvwxyz0123456789
32 32
 # secret for /prefs/feeds
33 33
 FEEDSECRET = abcdefghijklmnopqrstuvwxyz0123456789
34 34
 
  35
+INDEXTANK_API_URL =
  36
+
35 37
 # -- important settings -- 
36 38
 # the domain that this app serves itself up as
37  
-domain = localhost
  39
+domain = reddit.local
38 40
 # if you use www for the old-timey feel, put it here
39 41
 domain_prefix = 
40 42
 # the user used for "system" operations and messages
@@ -80,6 +82,7 @@ admin_message_acct = reddit
80 82
 # data cache (used for caching Thing objects)
81 83
 num_mc_clients = 5
82 84
 memcaches = 127.0.0.1:11211
  85
+stalecaches =
83 86
 # render caches (the second is "remote" and the local is optional but in the same format)
84 87
 local_rendercache =
85 88
 rendercaches = 127.0.0.1:11211
@@ -94,7 +97,7 @@ permacache_memcaches = 127.0.0.1:11211
94 97
 cassandra_seeds = 127.0.0.1:9160
95 98
 # read/write consistency levels for Cassandra
96 99
 cassandra_rcl = ONE
97  
-cassandra_wcl = QUORUM
  100
+cassandra_wcl = ONE
98 101
 
99 102
 # -- url cache options --
100 103
 url_caches = 127.0.0.1:11211
@@ -117,6 +120,8 @@ adtracker_url = /static/pixel.png
117 120
 adframetracker_url = http://pixel.reddit.com/pixel/of_defenestration.png
118 121
 # open redirector to bounce clicks off of on sponsored links for tracking
119 122
 clicktracker_url = /static/pixel.png
  123
+# new pixel
  124
+newtracker_url = 
120 125
 
121 126
 # amqp
122 127
 amqp_host = localhost:5672
@@ -128,17 +133,23 @@ amqp_virtual_host = /
128 133
 # list of all databases named in the subsequent table
129 134
 databases = main, comment, vote, email, authorize, award, hc
130 135
 
131  
-#db name         db           host       user, pass
132  
-main_db =        reddit,   127.0.0.1, reddit,   password
133  
-comment_db =     reddit,   127.0.0.1, reddit,   password
134  
-comment2_db =    reddit,   127.0.0.1, reddit,   password
135  
-vote_db =        reddit,   127.0.0.1, reddit,   password
136  
-email_db =       reddit,   127.0.0.1, reddit,   password
137  
-authorize_db =   reddit,   127.0.0.1, reddit,   password
138  
-award_db =       reddit,   127.0.0.1, reddit,   password
139  
-hc_db =          reddit,   127.0.0.1, reddit,   password
  136
+db_user = reddit
  137
+db_pass = password
  138
+db_port = 5432
  139
+db_pool_size = 3
  140
+db_pool_overflow_size = 3
  141
+
  142
+#db name       db         host      user, pass, port, conn, overflow_conn
  143
+main_db =      reddit,   127.0.0.1, *,    *,    *,    *,    *
  144
+comment_db =   reddit,   127.0.0.1, *,    *,    *,    *,    *
  145
+comment2_db =  reddit,   127.0.0.1, *,    *,    *,    *,    *
  146
+vote_db =      reddit,   127.0.0.1, *,    *,    *,    *,    *
  147
+email_db =     reddit,   127.0.0.1, *,    *,    *,    *,    *
  148
+authorize_db = reddit,   127.0.0.1, *,    *,    *,    *,    *
  149
+award_db =     reddit,   127.0.0.1, *,    *,    *,    *,    *
  150
+hc_db =        reddit,   127.0.0.1, *,    *,    *,    *,    *
140 151
 
141  
-hardcache_categories = *:hc
  152
+hardcache_categories = *:hc:hc
142 153
 
143 154
 # this setting will prefix all of the table names
144 155
 db_app_name = reddit
@@ -197,13 +208,17 @@ tracking_secret = abcdefghijklmnopqrstuvwxyz0123456789
197 208
 
198 209
 ## -- Self-service sponsored link stuff --
199 210
 # (secure) payment domain
200  
-payment_domain = http://pay.localhost/
201  
-ad_domain = http://localhost
  211
+payment_domain = http://reddit.local/
  212
+ad_domain = http://reddit.local
  213
+allowed_pay_countries = United States, United Kingdom, Canada
202 214
 sponsors =
203  
-# authorize.net credentials
  215
+
  216
+# authorize.net credentials (blank authorizenetapi to disable)
  217
+authorizenetapi =
  218
+# authorizenetapi = https://api.authorize.net/xml/v1/request.api
204 219
 authorizenetname = 
205 220
 authorizenetkey = 
206  
-authorizenetapi = https://api.authorize.net/xml/v1/request.api
  221
+
207 222
 min_promote_bid = 20
208 223
 max_promote_bid = 9999
209 224
 min_promote_future = 2
@@ -227,7 +242,7 @@ authorized_cnames =
227 242
 num_query_queue_workers = 5
228 243
 query_queue_worker = http://cslowe.local:8000
229 244
 enable_doquery = True
230  
-use_query_cache = False
  245
+use_query_cache = True
231 246
 write_query_queue = True
232 247
 
233 248
 # -- stylesheet editor --
@@ -243,6 +258,9 @@ stylesheet_rtl = reddit-rtl.css
243 258
 # location of the static directory
244 259
 static_path = /static/
245