Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

February 2011 Merge

* Upgrade discount to 1.6.8
* Start reading Votes, Saves, and Hides from Cassandra (while still dual-writing all three)
* Some XSS fixes
* Significant Gold buying and gifting improvements
  - Move /api/ipn to /ipn
* Allow non-US countries to buy sponsored links
* Increase embed.ly scope
* redd.it support
* Allow postgres port number to be specified in ini file (this changes the format of the .ini file)
* Upgrade Cassandra to 0.7
  - Change g.urlcache to LinksByURL
  - Translate storage-conf.xml to cassandra.yaml
  - TTL support (and enable on Hides)
  - Move permacache keyspace to inside reddit keyspace
* The stalecache: a local memcached that contains slightly old information to speed up some lookups
* Switch to patched Paste that is hopefully download.gz-proof
* Don't store votes on things > 30 days old
* Many many bugfixes/small features
  • Loading branch information...
commit 7fff900bbeba362b607821159f6419d7762c9957 1 parent c78f27b
@ketralnis ketralnis authored
Showing with 5,286 additions and 2,263 deletions.
  1. +1 −0  .gitignore
  2. +479 −0 config/cassandra/cassandra.yaml
  3. +0 −420 config/cassandra/storage-conf.xml
  4. +35 −17 r2/example.ini
  5. +3 −3 r2/r2/config/middleware.py
  6. +3 −3 r2/r2/config/rewrites.py
  7. +8 −2 r2/r2/config/routing.py
  8. +1 −1  r2/r2/controllers/__init__.py
  9. +53 −252 r2/r2/controllers/api.py
  10. +21 −14 r2/r2/controllers/embed.py
  11. +1 −1  r2/r2/controllers/error.py
  12. +1 −1  r2/r2/controllers/errors.py
  13. +91 −10 r2/r2/controllers/front.py
  14. +476 −0 r2/r2/controllers/ipn.py
  15. +12 −8 r2/r2/controllers/listingcontroller.py
  16. +2 −0  r2/r2/controllers/post.py
  17. +19 −10 r2/r2/controllers/promotecontroller.py
  18. +23 −14 r2/r2/controllers/reddit_base.py
  19. +15 −7 r2/r2/controllers/toolbar.py
  20. +71 −18 r2/r2/controllers/validator/validator.py
  21. +6 −0 r2/r2/lib/amqp.py
  22. +48 −46 r2/r2/lib/app_globals.py
  23. +2 −1  r2/r2/lib/authorize/api.py
  24. +10 −7 r2/r2/lib/base.py
  25. +162 −52 r2/r2/lib/cache.py
  26. +6 −0 r2/r2/lib/comment_tree.py
  27. +8 −7 r2/r2/lib/contrib/discount/Makefile
  28. +4 −3 r2/r2/lib/contrib/discount/Makefile.in
  29. +1 −1  r2/r2/lib/contrib/discount/Plan9/mkfile
  30. +1 −1  r2/r2/lib/contrib/discount/VERSION
  31. +1 −1  r2/r2/lib/contrib/discount/config.cmd
  32. +3 −1 r2/r2/lib/contrib/discount/config.h
  33. +8 −8 r2/r2/lib/contrib/discount/config.log
  34. +1 −1  r2/r2/lib/contrib/discount/config.md
  35. +3 −3 r2/r2/lib/contrib/discount/config.sub
  36. +3 −1 r2/r2/lib/contrib/discount/cstring.h
  37. +1 −0  r2/r2/lib/contrib/discount/dumptree.c
  38. +62 −48 r2/r2/lib/contrib/discount/generate.c
  39. +24 −0 r2/r2/lib/contrib/discount/html5.c
  40. +11 −2 r2/r2/lib/contrib/discount/main.c
  41. +4 −4 r2/r2/lib/contrib/discount/markdown.1
  42. +73 −94 r2/r2/lib/contrib/discount/markdown.c
  43. +3 −0  r2/r2/lib/contrib/discount/markdown.h
  44. +21 −2 r2/r2/lib/contrib/discount/mkd-extensions.7
  45. +3 −1 r2/r2/lib/contrib/discount/mkd-functions.3
  46. +3 −1 r2/r2/lib/contrib/discount/mkdio.c
  47. +1 −0  r2/r2/lib/contrib/discount/mkdio.h
  48. +1 −1  r2/r2/lib/contrib/discount/resource.c
  49. +110 −0 r2/r2/lib/contrib/discount/tags.c
  50. +18 −0 r2/r2/lib/contrib/discount/tags.h
  51. +4 −0 r2/r2/lib/contrib/discount/tests/code.t
  52. +2 −0  r2/r2/lib/contrib/discount/tests/html.t
  53. +17 −0 r2/r2/lib/contrib/discount/tests/html5.t
  54. +1 −0  r2/r2/lib/contrib/discount/tests/linkylinky.t
  55. +29 −0 r2/r2/lib/contrib/discount/tests/peculiarities.t
  56. +15 −0 r2/r2/lib/contrib/discount/tests/strikethrough.t
  57. +3 −0  r2/r2/lib/contrib/discount/theme.c
  58. +6 −6 r2/r2/lib/count.py
  59. +9 −4 r2/r2/lib/cssfilter.py
  60. +145 −93 r2/r2/lib/db/queries.py
  61. +339 −100 r2/r2/lib/db/tdb_cassandra.py
  62. +38 −31 r2/r2/lib/db/thing.py
  63. +9 −5 r2/r2/lib/emailer.py
  64. +2 −1  r2/r2/lib/filters.py
  65. +27 −12 r2/r2/lib/indextank.py
  66. +10 −2 r2/r2/lib/jsontemplates.py
  67. +12 −8 r2/r2/lib/lock.py
  68. +5 −3 r2/r2/lib/manager/db_manager.py
  69. +10 −0 r2/r2/lib/media.py
  70. +1 −1  r2/r2/lib/menus.py
  71. +89 −130 r2/r2/lib/migrate/migrate.py
  72. +60 −9 r2/r2/lib/migrate/mr_permacache.py
  73. +0 −51 r2/r2/lib/migrate/mr_urls.py
  74. +4 −34 r2/r2/lib/mr_account.py
  75. +0 −233 r2/r2/lib/mr_tools.py
  76. +2 −0  r2/r2/lib/mr_tools/__init__.py
  77. +192 −0 r2/r2/lib/mr_tools/_mr_tools.pyx
  78. +53 −0 r2/r2/lib/mr_tools/mr_tools.py
  79. +3 −3 r2/r2/lib/mr_top.py
  80. +3 −2 r2/r2/lib/pages/graph.py
  81. +202 −49 r2/r2/lib/pages/pages.py
  82. +3 −1 r2/r2/lib/pages/things.py
  83. +6 −1 r2/r2/lib/promote.py
  84. +2 −0  r2/r2/lib/queues.py
  85. +556 −109 r2/r2/lib/scraper.py
  86. +15 −5 r2/r2/lib/sgm.pyx
  87. +2 −2 r2/r2/lib/sr_pops.py
  88. +29 −15 r2/r2/lib/strings.py
  89. +19 −6 r2/r2/lib/tracking.py
  90. +5 −5 r2/r2/lib/translation.py
  91. +1 −0  r2/r2/lib/utils/thing_utils.py
  92. +127 −42 r2/r2/lib/utils/utils.py
  93. +6 −2 r2/r2/lib/wrapped.pyx
  94. +32 −2 r2/r2/models/_builder.pyx
  95. +5 −3 r2/r2/models/account.py
  96. +9 −3 r2/r2/models/admintools.py
  97. +6 −4 r2/r2/models/builder.py
  98. +46 −8 r2/r2/models/gold.py
  99. +146 −46 r2/r2/models/link.py
  100. +37 −16 r2/r2/models/subreddit.py
  101. +157 −28 r2/r2/models/vote.py
  102. BIN  r2/r2/public/static/bestof_award.png
  103. +16 −8 r2/r2/public/static/css/mobile.css
  104. +172 −5 r2/r2/public/static/css/reddit.css
  105. BIN  r2/r2/public/static/giftgold.png
  106. +52 −0 r2/r2/public/static/gpgkey
  107. +2 −13 r2/r2/public/static/iphone/index.html
  108. +77 −1 r2/r2/public/static/js/reddit.js
  109. +3 −2 r2/r2/public/static/js/sponsored.js
  110. BIN  r2/r2/public/static/nsfw2.png
  111. BIN  r2/r2/public/static/reddit404d.png
  112. BIN  r2/r2/public/static/reddit_gold-70.png
  113. +4 −4 r2/r2/templates/adminerrorlog.html
  114. +18 −0 r2/r2/templates/base.html
  115. +10 −9 r2/r2/templates/comment.mobile
  116. +2 −2 r2/r2/templates/createsubreddit.html
  117. +2 −2 r2/r2/templates/dart_ad.html
  118. +0 −4 r2/r2/templates/frametoolbar.html
  119. +21 −0 r2/r2/templates/giftgold.html
  120. +180 −0 r2/r2/templates/gold.html
  121. +99 −0 r2/r2/templates/goldpayment.html
  122. +11 −11 r2/r2/templates/link.mobile
  123. +2 −2 r2/r2/templates/link.xml
  124. 0  r2/r2/templates/linkcommentsep.compact
  125. 0  r2/r2/templates/linkcommentsep.html
  126. 0  r2/r2/templates/linkcommentsep.htmllite
  127. +1 −0  r2/r2/templates/linkcommentsep.mobile
  128. 0  r2/r2/templates/linkcommentsep.xml
  129. +8 −0 r2/r2/templates/linkinfobar.html
  130. +8 −0 r2/r2/templates/login.html
  131. +3 −1 r2/r2/templates/loginformwide.html
  132. +2 −2 r2/r2/templates/messagecompose.html
  133. +1 −1  r2/r2/templates/paymentform.html
  134. +21 −1 r2/r2/templates/prefoptions.html
  135. +0 −1  r2/r2/templates/printable.html
  136. +8 −0 r2/r2/templates/profilebar.html
  137. +1 −1  r2/r2/templates/promote_graph.html
  138. +5 −4 r2/r2/templates/promotelinkform.html
  139. +7 −16 r2/r2/templates/reddit.html
  140. +6 −0 r2/r2/templates/reddit.mobile
  141. +2 −2 r2/r2/templates/redditfooter.html
  142. +1 −1  r2/r2/templates/reddittraffic.html
  143. +1 −1  r2/r2/templates/searchbar.html
  144. +44 −7 r2/r2/templates/searchform.html
  145. +2 −0  r2/r2/templates/sidebox.html
  146. +3 −1 r2/r2/templates/subredditinfobar.html
  147. +12 −5 r2/r2/templates/thanks.html
  148. +8 −0 r2/r2/templates/usertext.compact
  149. +8 −0 r2/r2/templates/usertext.html
  150. +1 −1  r2/r2/templates/usertext.mobile
  151. +35 −0 r2/r2/tests/functional/cassamodels.py
  152. +8 −12 r2/setup.py
  153. +1 −1  r2/updateini.py
View
1  .gitignore
@@ -35,6 +35,7 @@ r2/_builder.egg-info/
r2/_normalized_hot.egg-info/
r2/_sorts.egg-info/
r2/r2/lib/_normalized_hot.c
+r2/r2/lib/mr_tools/_mr_tools.c
r2/r2/lib/db/_sorts.c
r2/r2/lib/sgm.c
r2/r2/lib/utils/_utils.c
View
479 config/cassandra/cassandra.yaml
@@ -0,0 +1,479 @@
+# Cassandra storage config YAML
+
+# NOTE:
+# See http://wiki.apache.org/cassandra/StorageConfiguration for
+# full explanations of configuration directives
+# /NOTE
+
+# The name of the cluster. This is mainly used to prevent machines in
+# one logical cluster from joining another.
+cluster_name: 'reddit'
+
+# You should always specify InitialToken when setting up a production
+# cluster for the first time, and often when adding capacity later.
+# The principle is that each node should be given an equal slice of
+# the token ring; see http://wiki.apache.org/cassandra/Operations
+# for more details.
+#
+# If blank, Cassandra will request a token bisecting the range of
+# the heaviest-loaded existing node. If there is no load information
+# available, such as is the case with a new cluster, it will pick
+# a random token, which will lead to hot spots.
+initial_token:
+
+# Set to true to make new [non-seed] nodes automatically migrate data
+# to themselves from the pre-existing nodes in the cluster. Defaults
+# to false because you can only bootstrap N machines at a time from
+# an existing cluster of N, so if you are bringing up a cluster of
+# 10 machines with 3 seeds you would have to do it in stages. Leaving
+# this off for the initial start simplifies that.
+auto_bootstrap: false
+
+# See http://wiki.apache.org/cassandra/HintedHandoff
+hinted_handoff_enabled: true
+
+# authentication backend, implementing IAuthenticator; used to identify users
+authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
+
+# authorization backend, implementing IAuthority; used to limit access/provide permissions
+authority: org.apache.cassandra.auth.AllowAllAuthority
+
+# The partitioner is responsible for distributing rows (by key) across
+# nodes in the cluster. Any IPartitioner may be used, including your
+# own as long as it is on the classpath. Out of the box, Cassandra
+# provides org.apache.cassandra.dht.RandomPartitioner
+# org.apache.cassandra.dht.ByteOrderedPartitioner,
+# org.apache.cassandra.dht.OrderPreservingPartitioner (deprecated),
+# and org.apache.cassandra.dht.CollatingOrderPreservingPartitioner
+# (deprecated).
+#
+# - RandomPartitioner distributes rows across the cluster evenly by md5.
+# When in doubt, this is the best option.
+# - ByteOrderedPartitioner orders rows lexically by key bytes. BOP allows
+# scanning rows in key order, but the ordering can generate hot spots
+# for sequential insertion workloads.
+# - OrderPreservingPartitioner is an obsolete form of BOP, that stores
+# - keys in a less-efficient format and only works with keys that are
+# UTF8-encoded Strings.
+# - CollatingOPP colates according to EN,US rules rather than lexical byte
+# ordering. Use this as an example if you need custom collation.
+#
+# See http://wiki.apache.org/cassandra/Operations for more on
+# partitioners and token selection.
+partitioner: org.apache.cassandra.dht.RandomPartitioner
+
+# directories where Cassandra should store data on disk.
+data_file_directories:
+ - /cassandra/data
+
+# commit log
+commitlog_directory: /cassandra/commitlog
+
+# saved caches
+saved_caches_directory: /cassandra/saved_caches
+
+# Size to allow commitlog to grow to before creating a new segment
+commitlog_rotation_threshold_in_mb: 128
+
+# commitlog_sync may be either "periodic" or "batch."
+# When in batch mode, Cassandra won't ack writes until the commit log
+# has been fsynced to disk. It will wait up to
+# CommitLogSyncBatchWindowInMS milliseconds for other writes, before
+# performing the sync.
+commitlog_sync: periodic
+
+# the other option is "timed," where writes may be acked immediately
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
+# milliseconds.
+commitlog_sync_period_in_ms: 10000
+
+# Addresses of hosts that are deemed contact points.
+# Cassandra nodes use this list of hosts to find each other and learn
+# the topology of the ring. You must change this if you are running
+# multiple nodes!
+seeds:
+ - pmc01
+ - pmc02
+ - pmc03
+ - pmc04
+ - pmc05
+ - pmc06
+ - pmc07
+ - pmc08
+ - pmc09
+ - pmc10
+ - pmc11
+ - pmc12
+ - pmc13
+ - pmc14
+ - pmc15
+ - pmc16
+ - pmc17
+ - pmc18
+ - pmc19
+ - pmc20
+
+# Access mode. mmapped i/o is substantially faster, but only practical on
+# a 64bit machine (which notably does not include EC2 "small" instances)
+# or relatively small datasets. "auto", the safe choice, will enable
+# mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
+# (which may allow you to get part of the benefits of mmap on a 32bit
+# machine by mmapping only index files) and "standard".
+# (The buffer size settings that follow only apply to standard,
+# non-mmapped i/o.)
+disk_access_mode: mmap_index_only
+
+# Unlike most systems, in Cassandra writes are faster than reads, so
+# you can afford more of those in parallel. A good rule of thumb is 2
+# concurrent reads per processor core. Increase ConcurrentWrites to
+# the number of clients writing at once if you enable CommitLogSync +
+# CommitLogSyncDelay. -->
+concurrent_reads: 8
+concurrent_writes: 32
+
+# This sets the amount of memtable flush writer threads. These will
+# be blocked by disk io, and each one will hold a memtable in memory
+# while blocked. If you have a large heap and many data directories,
+# you can increase this value for better flush performance.
+# By default this will be set to the amount of data directories defined.
+#memtable_flush_writers: 1
+
+# Buffer size to use when performing contiguous column slices.
+# Increase this to the size of the column slices you typically perform
+sliced_buffer_size_in_kb: 64
+
+# TCP port, for commands and data
+storage_port: 7000
+
+# Address to bind to and tell other Cassandra nodes to connect to. You
+# _must_ change this if you want multiple nodes to be able to
+# communicate!
+#
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+# will always do the Right Thing *if* the node is properly configured
+# (hostname, name resolution, etc), and the Right Thing is to use the
+# address associated with the hostname (it might not be).
+#
+# Setting this to 0.0.0.0 is always wrong.
+listen_address:
+
+# The address to bind the Thrift RPC service to -- clients connect
+# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
+# you want Thrift to listen on all interfaces.
+#
+# Leaving this blank has the same effect it does for ListenAddress,
+# (i.e. it will be based on the configured hostname of the node).
+rpc_address: 0.0.0.0
+# port for Thrift to listen for clients on
+rpc_port: 9160
+
+# enable or disable keepalive on rpc connections
+rpc_keepalive: true
+
+# uncomment to set socket buffer sizes on rpc connections
+# rpc_send_buff_size_in_bytes:
+# rpc_recv_buff_size_in_bytes:
+
+# Frame size for thrift (maximum field length).
+# 0 disables TFramedTransport in favor of TSocket. This option
+# is deprecated; we strongly recommend using Framed mode.
+thrift_framed_transport_size_in_mb: 15
+
+# The max length of a thrift message, including all fields and
+# internal thrift overhead.
+thrift_max_message_length_in_mb: 16
+
+# Whether or not to take a snapshot before each compaction. Be
+# careful using this option, since Cassandra won't clean up the
+# snapshots for you. Mostly useful if you're paranoid when there
+# is a data format change.
+snapshot_before_compaction: false
+
+# change this to increase the compaction thread's priority. In java, 1 is the
+# lowest priority and that is our default.
+# compaction_thread_priority: 1
+
+# The threshold size in megabytes the binary memtable must grow to,
+# before it's submitted for flushing to disk.
+binary_memtable_throughput_in_mb: 256
+
+# Add column indexes to a row after its contents reach this size.
+# Increase if your column values are large, or if you have a very large
+# number of columns. The competing causes are, Cassandra has to
+# deserialize this much of the row to read a single column, so you want
+# it to be small - at least if you do many partial-row reads - but all
+# the index data is read for each access, so you don't want to generate
+# that wastefully either.
+column_index_size_in_kb: 64
+
+# Size limit for rows being compacted in memory. Larger rows will spill
+# over to disk and use a slower two-pass compaction process. A message
+# will be logged specifying the row key.
+in_memory_compaction_limit_in_mb: 64
+
+# Time to wait for a reply from other nodes before failing the command
+rpc_timeout_in_ms: 20000
+
+# phi value that must be reached for a host to be marked down.
+# most users should never need to adjust this.
+phi_convict_threshold: 10
+
+# endpoint_snitch -- Set this to a class that implements
+# IEndpointSnitch, which will let Cassandra know enough
+# about your network topology to route requests efficiently.
+# Out of the box, Cassandra provides
+# - org.apache.cassandra.locator.SimpleSnitch:
+# Treats Strategy order as proximity. This improves cache locality
+# when disabling read repair, which can further improve throughput.
+# - org.apache.cassandra.locator.RackInferringSnitch:
+# Proximity is determined by rack and data center, which are
+# assumed to correspond to the 3rd and 2nd octet of each node's
+# IP address, respectively
+# org.apache.cassandra.locator.PropertyFileSnitch:
+# - Proximity is determined by rack and data center, which are
+# explicitly configured in cassandra-topology.properties.
+endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch
+
+# dynamic_snitch -- This boolean controls whether the above snitch is
+# wrapped with a dynamic snitch, which will monitor read latencies
+# and avoid reading from hosts that have slowed (due to compaction,
+# for instance)
+dynamic_snitch: true
+# controls how often to perform the more expensive part of host score
+# calculation
+dynamic_snitch_update_interval_in_ms: 100
+# controls how often to reset all host scores, allowing a bad host to
+# possibly recover
+dynamic_snitch_reset_interval_in_ms: 600000
+# if set greater than zero and read_repair_chance is < 1.0, this will allow
+# 'pinning' of replicas to hosts in order to increase cache capacity.
+# The badness threshold will control how much worse the pinned host has to be
+# before the dynamic snitch will prefer other replicas over it. This is
+# expressed as a double which represents a percentage. Thus, a value of
+# 0.2 means Cassandra would continue to prefer the static snitch values
+# until the pinned host was 20% worse than the fastest.
+dynamic_snitch_badness_threshold: 0.1
+
+# request_scheduler -- Set this to a class that implements
+# RequestScheduler, which will schedule incoming client requests
+# according to the specific policy. This is useful for multi-tenancy
+# with a single Cassandra cluster.
+# NOTE: This is specifically for requests from the client and does
+# not affect inter node communication.
+# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
+# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
+# client requests to a node with a separate queue for each
+# request_scheduler_id. The scheduler is further customized by
+# request_scheduler_options as described below.
+request_scheduler: org.apache.cassandra.scheduler.NoScheduler
+
+# Scheduler Options vary based on the type of scheduler
+# NoScheduler - Has no options
+# RoundRobin
+# - throttle_limit -- The throttle_limit is the number of in-flight
+# requests per client. Requests beyond
+# that limit are queued up until
+# running requests can complete.
+# The value of 80 here is twice the number of
+# concurrent_reads + concurrent_writes.
+# - default_weight -- default_weight is optional and allows for
+# overriding the default which is 1.
+# - weights -- Weights are optional and will default to 1 or the
+# overridden default_weight. The weight translates into how
+# many requests are handled during each turn of the
+# RoundRobin, based on the scheduler id.
+#
+# request_scheduler_options:
+# throttle_limit: 80
+# default_weight: 5
+# weights:
+# Keyspace1: 1
+# Keyspace2: 5
+
+# request_scheduler_id -- An identifer based on which to perform
+# the request scheduling. Currently the only valid option is keyspace.
+# request_scheduler_id: keyspace
+
+# The Index Interval determines how large the sampling of row keys
+# is for a given SSTable. The larger the sampling, the more effective
+# the index is at the cost of space.
+index_interval: 128
+
+# Keyspaces have ColumnFamilies. (Usually 1 KS per application.)
+# ColumnFamilies have Rows. (Dozens of CFs per KS.)
+# Rows contain Columns. (Many per CF.)
+# Columns contain name:value:timestamp. (Many per Row.)
+#
+# A KS is most similar to a schema, and a CF is most similar to a relational table.
+#
+# Keyspaces, ColumnFamilies, and Columns may carry additional
+# metadata that change their behavior. These are as follows:
+#
+# Keyspace required parameters:
+# - name: name of the keyspace; "system" is
+# reserved for Cassandra Internals.
+# - replica_placement_strategy: the class that determines how replicas
+# are distributed among nodes. Contains both the class as well as
+# configuration information. Must extend AbstractReplicationStrategy.
+# Out of the box, Cassandra provides
+# * org.apache.cassandra.locator.SimpleStrategy
+# * org.apache.cassandra.locator.NetworkTopologyStrategy
+# * org.apache.cassandra.locator.OldNetworkTopologyStrategy
+#
+# SimpleStrategy merely places the first
+# replica at the node whose token is closest to the key (as determined
+# by the Partitioner), and additional replicas on subsequent nodes
+# along the ring in increasing Token order.
+#
+# With NetworkTopologyStrategy,
+# for each datacenter, you can specify how many replicas you want
+# on a per-keyspace basis. Replicas are placed on different racks
+# within each DC, if possible. This strategy also requires rack aware
+# snitch, such as RackInferringSnitch or PropertyFileSnitch.
+# An example:
+# - name: Keyspace1
+# replica_placement_strategy: org.apache.cassandra.locator.NetworkTopologyStrategy
+# strategy_options:
+# DC1 : 3
+# DC2 : 2
+# DC3 : 1
+#
+# OldNetworkToplogyStrategy [formerly RackAwareStrategy]
+# places one replica in each of two datacenters, and the third on a
+# different rack in in the first. Additional datacenters are not
+# guaranteed to get a replica. Additional replicas after three are placed
+# in ring order after the third without regard to rack or datacenter.
+# - replication_factor: Number of replicas of each row
+# Keyspace optional paramaters:
+# - strategy_options: Additional information for the replication strategy.
+# - column_families:
+# ColumnFamily required parameters:
+# - name: name of the ColumnFamily. Must not contain the character "-".
+# - compare_with: tells Cassandra how to sort the columns for slicing
+# operations. The default is BytesType, which is a straightforward
+# lexical comparison of the bytes in each column. Other options are
+# AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, LongType,
+# and IntegerType (a generic variable-length integer type).
+# You can also specify the fully-qualified class name to a class of
+# your choice extending org.apache.cassandra.db.marshal.AbstractType.
+#
+# ColumnFamily optional parameters:
+# - keys_cached: specifies the number of keys per sstable whose
+# locations we keep in memory in "mostly LRU" order. (JUST the key
+# locations, NOT any column values.) Specify a fraction (value less
+# than 1) or an absolute number of keys to cache. Defaults to 200000
+# keys.
+# - rows_cached: specifies the number of rows whose entire contents we
+# cache in memory. Do not use this on ColumnFamilies with large rows,
+# or ColumnFamilies with high write:read ratios. Specify a fraction
+# (value less than 1) or an absolute number of rows to cache.
+# Defaults to 0. (i.e. row caching is off by default)
+# - comment: used to attach additional human-readable information about
+# the column family to its definition.
+# - read_repair_chance: specifies the probability with which read
+# repairs should be invoked on non-quorum reads. must be between 0
+# and 1. defaults to 1.0 (always read repair).
+# - gc_grace_seconds: specifies the time to wait before garbage
+# collecting tombstones (deletion markers). defaults to 864000 (10
+# days). See http://wiki.apache.org/cassandra/DistributedDeletes
+# - default_validation_class: specifies a validator class to use for
+# validating all the column values in the CF.
+# NOTE:
+# min_ must be less than max_compaction_threshold!
+# - min_compaction_threshold: the minimum number of SSTables needed
+# to start a minor compaction. increasing this will cause minor
+# compactions to start less frequently and be more intensive. setting
+# this to 0 disables minor compactions. defaults to 4.
+# - max_compaction_threshold: the maximum number of SSTables allowed
+# before a minor compaction is forced. decreasing this will cause
+# minor compactions to start more frequently and be less intensive.
+# setting this to 0 disables minor compactions. defaults to 32.
+# /NOTE
+# - row_cache_save_period_in_seconds: number of seconds between saving
+# row caches. The row caches can be saved periodically and if one
+# exists on startup it will be loaded.
+# - key_cache_save_period_in_seconds: number of seconds between saving
+# key caches. The key caches can be saved periodically and if one
+# exists on startup it will be loaded.
+# - memtable_flush_after_mins: The maximum time to leave a dirty table
+# unflushed. This should be large enough that it won't cause a flush
+# storm of all memtables during periods of inactivity.
+# - memtable_throughput_in_mb: The maximum size of the memtable before
+# it is flushed. If undefined, 1/8 * heapsize will be used.
+# - memtable_operations_in_millions: Number of operations in millions
+# before the memtable is flushed. If undefined, throughput / 64 * 0.3
+# will be used.
+# - column_metadata:
+# Column required parameters:
+# - name: binds a validator (and optionally an indexer) to columns
+# with this name in any row of the enclosing column family.
+# - validator: like cf.compare_with, an AbstractType that checks
+# that the value of the column is well-defined.
+# Column optional parameters:
+# NOTE:
+# index_name cannot be set if index_type is not also set!
+# - index_name: User-friendly name for the index.
+# - index_type: The type of index to be created. Currently only
+# KEYS is supported.
+# /NOTE
+#
+# NOTE:
+# this keyspace definition is for demonstration purposes only.
+# Cassandra will not load these definitions during startup. See
+# http://wiki.apache.org/cassandra/FAQ#no_keyspaces for an explanation.
+# /NOTE
+keyspaces:
+ - name: reddit
+ replica_placement_strategy: org.apache.cassandra.locator.RackUnawareStrategy
+ replication_factor: 3
+ column_families:
+ - column_type: Standard
+ compare_with: BytesType
+ name: permacache
+ row_cache_save_period_in_seconds: 3600
+ rows_cached: 100000
+ - column_type: Standard
+ compare_with: BytesType
+ name: urls
+ row_cache_save_period_in_seconds: 3600
+ rows_cached: 100000
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: LinkVote
+ rows_cached: 100000
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: CommentVote
+ rows_cached: 100000
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: Friend
+ rows_cached: 10000
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: Save
+ rows_cached: 100000
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: Hide
+ rows_cached: 100000
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: Click
+ rows_cached: 100000
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: VotesByLink
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: VotesByDay
+ - column_type: Standard
+ name: FriendsByAccount
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: SavesByAccount
+ - column_type: Standard
+ compare_with: UTF8Type
+ name: CommentSortsCache
+ row_cache_save_period_in_seconds: 3600
+ rows_cached: 200000
View
420 config/cassandra/storage-conf.xml
@@ -1,420 +0,0 @@
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ http://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing,
- ~ software distributed under the License is distributed on an
- ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- ~ KIND, either express or implied. See the License for the
- ~ specific language governing permissions and limitations
- ~ under the License.
--->
-<Storage>
- <!--======================================================================-->
- <!-- Basic Configuration -->
- <!--======================================================================-->
-
- <!--
- ~ The name of this cluster. This is mainly used to prevent machines in
- ~ one logical cluster from joining another.
- -->
- <ClusterName>reddit</ClusterName>
-
- <!--
- ~ Turn on to make new [non-seed] nodes automatically migrate the right data
- ~ to themselves. (If no InitialToken is specified, they will pick one
- ~ such that they will get half the range of the most-loaded node.)
- ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
- ~ so that you can't subsequently accidently bootstrap a node with
- ~ data on it. (You can reset this by wiping your data and commitlog
- ~ directories.)
- ~
- ~ Off by default so that new clusters and upgraders from 0.4 don't
- ~ bootstrap immediately. You should turn this on when you start adding
- ~ new nodes to a cluster that already has data on it. (If you are upgrading
- ~ from 0.4, start your cluster with it off once before changing it to true.
- ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
- ~ I/O before your cluster starts up.)
- -->
- <AutoBootstrap>false</AutoBootstrap>
-
- <!--
- ~ See http://wiki.apache.org/cassandra/HintedHandoff
- -->
- <HintedHandoffEnabled>true</HintedHandoffEnabled>
-
- <!--
- ~ Keyspaces and ColumnFamilies:
- ~ A ColumnFamily is the Cassandra concept closest to a relational
- ~ table. Keyspaces are separate groups of ColumnFamilies. Except in
- ~ very unusual circumstances you will have one Keyspace per application.
-
- ~ There is an implicit keyspace named 'system' for Cassandra internals.
- -->
- <Keyspaces>
- <Keyspace Name="Keyspace1">
- <!--
- ~ ColumnFamily definitions have one required attribute (Name)
- ~ and several optional ones.
- ~
- ~ The CompareWith attribute tells Cassandra how to sort the columns
- ~ for slicing operations. The default is BytesType, which is a
- ~ straightforward lexical comparison of the bytes in each column.
- ~ Other options are AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType,
- ~ and LongType. You can also specify the fully-qualified class
- ~ name to a class of your choice extending
- ~ org.apache.cassandra.db.marshal.AbstractType.
- ~
- ~ SuperColumns have a similar CompareSubcolumnsWith attribute.
- ~
- ~ BytesType: Simple sort by byte value. No validation is performed.
- ~ AsciiType: Like BytesType, but validates that the input can be
- ~ parsed as US-ASCII.
- ~ UTF8Type: A string encoded as UTF8
- ~ LongType: A 64bit long
- ~ LexicalUUIDType: A 128bit UUID, compared lexically (by byte value)
- ~ TimeUUIDType: a 128bit version 1 UUID, compared by timestamp
- ~
- ~ (To get the closest approximation to 0.3-style supercolumns, you
- ~ would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)
- ~
- ~ An optional `Comment` attribute may be used to attach additional
- ~ human-readable information about the column family to its definition.
- ~
- ~ The optional KeysCached attribute specifies
- ~ the number of keys per sstable whose locations we keep in
- ~ memory in "mostly LRU" order. (JUST the key locations, NOT any
- ~ column values.) Specify a fraction (value less than 1), a percentage
- ~ (ending in a % sign) or an absolute number of keys to cache.
- ~ KeysCached defaults to 200000 keys.
- ~
- ~ The optional RowsCached attribute specifies the number of rows
- ~ whose entire contents we cache in memory. Do not use this on
- ~ ColumnFamilies with large rows, or ColumnFamilies with high write:read
- ~ ratios. Specify a fraction (value less than 1), a percentage (ending in
- ~ a % sign) or an absolute number of rows to cache.
- ~ RowsCached defaults to 0, i.e., row cache is off by default.
- ~
- ~ Remember, when using caches as a percentage, they WILL grow with
- ~ your data set!
- -->
- <ColumnFamily Name="Standard1" CompareWith="BytesType"/>
- <ColumnFamily Name="Standard2"
- CompareWith="UTF8Type"
- KeysCached="100%"/>
- <ColumnFamily Name="StandardByUUID1" CompareWith="TimeUUIDType" />
- <ColumnFamily Name="Super1"
- ColumnType="Super"
- CompareWith="BytesType"
- CompareSubcolumnsWith="BytesType" />
- <ColumnFamily Name="Super2"
- ColumnType="Super"
- CompareWith="UTF8Type"
- CompareSubcolumnsWith="UTF8Type"
- RowsCached="10000"
- KeysCached="50%"
- Comment="A column family with supercolumns, whose column and subcolumn names are UTF8 strings"/>
-
- <!--
- ~ Strategy: Setting this to the class that implements
- ~ IReplicaPlacementStrategy will change the way the node picker works.
- ~ Out of the box, Cassandra provides
- ~ org.apache.cassandra.locator.RackUnawareStrategy and
- ~ org.apache.cassandra.locator.RackAwareStrategy (place one replica in
- ~ a different datacenter, and the others on different racks in the same
- ~ one.)
- -->
- <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
-
- <!-- Number of replicas of the data -->
- <ReplicationFactor>1</ReplicationFactor>
-
- <!--
- ~ EndPointSnitch: Setting this to the class that implements
- ~ AbstractEndpointSnitch, which lets Cassandra know enough
- ~ about your network topology to route requests efficiently.
- ~ Out of the box, Cassandra provides org.apache.cassandra.locator.EndPointSnitch,
- ~ and PropertyFileEndPointSnitch is available in contrib/.
- -->
- <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
- </Keyspace>
-
- <Keyspace Name="permacache">
- <ColumnFamily CompareWith="BytesType" Name="permacache" />
- <ColumnFamily CompareWith="BytesType" Name="urls" RowsCached="100000" />
-
- <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
- <ReplicationFactor>3</ReplicationFactor>
- <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
- </Keyspace>
-
- <Keyspace Name="reddit">
- <!-- Relations -->
- <ColumnFamily CompareWith="UTF8Type" Name="LinkVote" />
- <ColumnFamily CompareWith="UTF8Type" Name="CommentVote" />
-
- <!-- Views -->
- <ColumnFamily CompareWith="UTF8Type" Name="VotesByLink" />
- <ColumnFamily CompareWith="UTF8Type" Name="CommentSortsCache" RowsCached="100000" />
-
- <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
- <ReplicationFactor>3</ReplicationFactor>
- <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
- </Keyspace>
-
- </Keyspaces>
-
- <!--
- ~ Authenticator: any IAuthenticator may be used, including your own as long
- ~ as it is on the classpath. Out of the box, Cassandra provides
- ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
- ~ org.apache.cassandra.auth.SimpleAuthenticator
- ~ (SimpleAuthenticator uses access.properties and passwd.properties by
- ~ default).
- ~
- ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
- -->
- <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
-
- <!--
- ~ Partitioner: any IPartitioner may be used, including your own as long
- ~ as it is on the classpath. Out of the box, Cassandra provides
- ~ org.apache.cassandra.dht.RandomPartitioner,
- ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
- ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
- ~ (CollatingOPP colates according to EN,US rules, not naive byte
- ~ ordering. Use this as an example if you need locale-aware collation.)
- ~ Range queries require using an order-preserving partitioner.
- ~
- ~ Achtung! Changing this parameter requires wiping your data
- ~ directories, since the partitioner can modify the sstable on-disk
- ~ format.
- -->
- <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
-
- <!--
- ~ If you are using an order-preserving partitioner and you know your key
- ~ distribution, you can specify the token for this node to use. (Keys
- ~ are sent to the node with the "closest" token, so distributing your
- ~ tokens equally along the key distribution space will spread keys
- ~ evenly across your cluster.) This setting is only checked the first
- ~ time a node is started.
-
- ~ This can also be useful with RandomPartitioner to force equal spacing
- ~ of tokens around the hash space, especially for clusters with a small
- ~ number of nodes.
- -->
- <InitialToken></InitialToken>
-
- <!--
- ~ Directories: Specify where Cassandra should store different data on
- ~ disk. Keep the data disks and the CommitLog disks separate for best
- ~ performance
- -->
- <CommitLogDirectory>/cassandra/commitlog</CommitLogDirectory>
- <DataFileDirectories>
- <DataFileDirectory>/cassandra/data</DataFileDirectory>
- </DataFileDirectories>
-
- <!--
- ~ Addresses of hosts that are deemed contact points. Cassandra nodes
- ~ use this list of hosts to find each other and learn the topology of
- ~ the ring. You must change this if you are running multiple nodes!
- -->
- <Seeds>
- <Seed>pmc01</Seed>
- <Seed>pmc02</Seed>
- <Seed>pmc03</Seed>
- <Seed>pmc04</Seed>
- <Seed>pmc05</Seed>
- <Seed>pmc06</Seed>
- <Seed>pmc07</Seed>
- <Seed>pmc08</Seed>
- <Seed>pmc09</Seed>
- <Seed>pmc10</Seed>
- <Seed>pmc11</Seed>
- <Seed>pmc12</Seed>
- </Seeds>
-
- <!-- Miscellaneous -->
-
- <!-- Time to wait for a reply from other nodes before failing the command -->
- <RpcTimeoutInMillis>30000</RpcTimeoutInMillis>
- <!-- phi value that must be reached before a host is marked as down.
- most users should never adjust this -->
- <PhiConvictThreshold>10</PhiConvictThreshold>
- <!-- Size to allow commitlog to grow to before creating a new segment -->
- <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
-
-
- <!-- Local hosts and ports -->
-
- <!--
- ~ Address to bind to and tell other nodes to connect to. You _must_
- ~ change this if you want multiple nodes to be able to communicate!
- ~
- ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
- ~ will always do the Right Thing *if* the node is properly configured
- ~ (hostname, name resolution, etc), and the Right Thing is to use the
- ~ address associated with the hostname (it might not be).
- -->
- <ListenAddress></ListenAddress>
- <!-- internal communications port -->
- <StoragePort>7000</StoragePort>
-
- <!--
- ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
- ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
- ~ all interfaces.
- ~
- ~ Leaving this blank has the same effect it does for ListenAddress,
- ~ (i.e. it will be based on the configured hostname of the node).
- -->
- <ThriftAddress></ThriftAddress>
- <!-- Thrift RPC port (the port clients connect to). -->
- <ThriftPort>9160</ThriftPort>
- <!--
- ~ Whether or not to use a framed transport for Thrift. If this option
- ~ is set to true then you must also use a framed transport on the
- ~ client-side, (framed and non-framed transports are not compatible).
- -->
- <ThriftFramedTransport>false</ThriftFramedTransport>
-
-
- <!--======================================================================-->
- <!-- Memory, Disk, and Performance -->
- <!--======================================================================-->
-
- <!--
- ~ Access mode. mmapped i/o is substantially faster, but only practical on
- ~ a 64bit machine (which notably does not include EC2 "small" instances)
- ~ or relatively small datasets. "auto", the safe choice, will enable
- ~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
- ~ (which may allow you to get part of the benefits of mmap on a 32bit
- ~ machine by mmapping only index files) and "standard".
- ~ (The buffer size settings that follow only apply to standard,
- ~ non-mmapped i/o.)
- -->
- <DiskAccessMode>mmap_index_only</DiskAccessMode>
-
- <!--
- ~ Size of compacted row above which to log a warning. (If compacted
- ~ rows do not fit in memory, Cassandra will crash. This is explained
- ~ in http://wiki.apache.org/cassandra/CassandraLimitations and is
- ~ scheduled to be fixed in 0.7.)
- -->
- <RowWarningThresholdInMB>512</RowWarningThresholdInMB>
-
- <!--
- ~ Buffer size to use when performing contiguous column slices. Increase
- ~ this to the size of the column slices you typically perform.
- ~ (Name-based queries are performed with a buffer size of
- ~ ColumnIndexSizeInKB.)
- -->
- <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
-
- <!--
- ~ Buffer size to use when flushing memtables to disk. (Only one
- ~ memtable is ever flushed at a time.) Increase (decrease) the index
- ~ buffer size relative to the data buffer if you have few (many)
- ~ columns per key. Bigger is only better _if_ your memtables get large
- ~ enough to use the space. (Check in your data directory after your
- ~ app has been running long enough.) -->
- <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
- <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
-
- <!--
- ~ Add column indexes to a row after its contents reach this size.
- ~ Increase if your column values are large, or if you have a very large
- ~ number of columns. The competing causes are, Cassandra has to
- ~ deserialize this much of the row to read a single column, so you want
- ~ it to be small - at least if you do many partial-row reads - but all
- ~ the index data is read for each access, so you don't want to generate
- ~ that wastefully either.
- -->
- <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
-
- <!--
- ~ Flush memtable after this much data has been inserted, including
- ~ overwritten data. There is one memtable per column family, and
- ~ this threshold is based solely on the amount of data stored, not
- ~ actual heap memory usage (there is some overhead in indexing the
- ~ columns).
- -->
- <MemtableThroughputInMB>128</MemtableThroughputInMB>
- <!--
- ~ Throughput setting for Binary Memtables. Typically these are
- ~ used for bulk load so you want them to be larger.
- -->
- <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
- <!--
- ~ The maximum number of columns in millions to store in memory per
- ~ ColumnFamily before flushing to disk. This is also a per-memtable
- ~ setting. Use with MemtableThroughputInMB to tune memory usage.
- -->
- <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
- <!--
- ~ The maximum time to leave a dirty memtable unflushed.
- ~ (While any affected columnfamilies have unflushed data from a
- ~ commit log segment, that segment cannot be deleted.)
- ~ This needs to be large enough that it won't cause a flush storm
- ~ of all your memtables flushing at once because none has hit
- ~ the size or count thresholds yet.
- -->
- <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
-
- <!--
- ~ Unlike most systems, in Cassandra writes are faster than reads, so
- ~ you can afford more of those in parallel. A good rule of thumb is 2
- ~ concurrent reads per processor core. Increase ConcurrentWrites to
- ~ the number of clients writing at once if you enable CommitLogSync +
- ~ CommitLogSyncDelay. -->
- <ConcurrentReads>8</ConcurrentReads>
- <ConcurrentWrites>32</ConcurrentWrites>
-
- <!--
- ~ CommitLogSync may be either "periodic" or "batch." When in batch
- ~ mode, Cassandra won't ack writes until the commit log has been
- ~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
- ~ milliseconds for other writes, before performing the sync.
-
- ~ This is less necessary in Cassandra than in traditional databases
- ~ since replication reduces the odds of losing data from a failure
- ~ after writing the log entry but before it actually reaches the disk.
- ~ So the other option is "periodic," where writes may be acked immediately
- ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
- ~ milliseconds.
- -->
- <CommitLogSync>periodic</CommitLogSync>
- <!--
- ~ Interval at which to perform syncs of the CommitLog in periodic mode.
- ~ Usually the default of 10000ms is fine; increase it if your i/o
- ~ load is such that syncs are taking excessively long times.
- -->
- <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
- <!--
- ~ Delay (in milliseconds) during which additional commit log entries
- ~ may be written before fsync in batch mode. This will increase
- ~ latency slightly, but can vastly improve throughput where there are
- ~ many writers. Set to zero to disable (each entry will be synced
- ~ individually). Reasonable values range from a minimal 0.1 to 10 or
- ~ even more if throughput matters more than latency.
- -->
- <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
-
- <!--
- ~ Time to wait before garbage-collection deletion markers. Set this to
- ~ a large enough value that you are confident that the deletion marker
- ~ will be propagated to all replicas by the time this many seconds has
- ~ elapsed, even in the face of hardware failures. The default value is
- ~ ten days.
- -->
- <GCGraceSeconds>864000</GCGraceSeconds>
-</Storage>
View
52 r2/example.ini
@@ -32,9 +32,11 @@ MODSECRET = abcdefghijklmnopqrstuvwxyz0123456789
# secret for /prefs/feeds
FEEDSECRET = abcdefghijklmnopqrstuvwxyz0123456789
+INDEXTANK_API_URL =
+
# -- important settings --
# the domain that this app serves itself up as
-domain = localhost
+domain = reddit.local
# if you use www for the old-timey feel, put it here
domain_prefix =
# the user used for "system" operations and messages
@@ -80,6 +82,7 @@ admin_message_acct = reddit
# data cache (used for caching Thing objects)
num_mc_clients = 5
memcaches = 127.0.0.1:11211
+stalecaches =
# render caches (the second is "remote" and the local is optional but in the same format)
local_rendercache =
rendercaches = 127.0.0.1:11211
@@ -94,7 +97,7 @@ permacache_memcaches = 127.0.0.1:11211
cassandra_seeds = 127.0.0.1:9160
# read/write consistency levels for Cassandra
cassandra_rcl = ONE
-cassandra_wcl = QUORUM
+cassandra_wcl = ONE
# -- url cache options --
url_caches = 127.0.0.1:11211
@@ -117,6 +120,8 @@ adtracker_url = /static/pixel.png
adframetracker_url = http://pixel.reddit.com/pixel/of_defenestration.png
# open redirector to bounce clicks off of on sponsored links for tracking
clicktracker_url = /static/pixel.png
+# new pixel
+newtracker_url =
# amqp
amqp_host = localhost:5672
@@ -128,17 +133,23 @@ amqp_virtual_host = /
# list of all databases named in the subsequent table
databases = main, comment, vote, email, authorize, award, hc
-#db name db host user, pass
-main_db = reddit, 127.0.0.1, reddit, password
-comment_db = reddit, 127.0.0.1, reddit, password
-comment2_db = reddit, 127.0.0.1, reddit, password
-vote_db = reddit, 127.0.0.1, reddit, password
-email_db = reddit, 127.0.0.1, reddit, password
-authorize_db = reddit, 127.0.0.1, reddit, password
-award_db = reddit, 127.0.0.1, reddit, password
-hc_db = reddit, 127.0.0.1, reddit, password
+db_user = reddit
+db_pass = password
+db_port = 5432
+db_pool_size = 3
+db_pool_overflow_size = 3
+
+#db name db host user, pass, port, conn, overflow_conn
+main_db = reddit, 127.0.0.1, *, *, *, *, *
+comment_db = reddit, 127.0.0.1, *, *, *, *, *
+comment2_db = reddit, 127.0.0.1, *, *, *, *, *
+vote_db = reddit, 127.0.0.1, *, *, *, *, *
+email_db = reddit, 127.0.0.1, *, *, *, *, *
+authorize_db = reddit, 127.0.0.1, *, *, *, *, *
+award_db = reddit, 127.0.0.1, *, *, *, *, *
+hc_db = reddit, 127.0.0.1, *, *, *, *, *
-hardcache_categories = *:hc
+hardcache_categories = *:hc:hc
# this setting will prefix all of the table names
db_app_name = reddit
@@ -197,13 +208,17 @@ tracking_secret = abcdefghijklmnopqrstuvwxyz0123456789
## -- Self-service sponsored link stuff --
# (secure) payment domain
-payment_domain = http://pay.localhost/
-ad_domain = http://localhost
+payment_domain = http://reddit.local/
+ad_domain = http://reddit.local
+allowed_pay_countries = United States, United Kingdom, Canada
sponsors =
-# authorize.net credentials
+
+# authorize.net credentials (blank authorizenetapi to disable)
+authorizenetapi =
+# authorizenetapi = https://api.authorize.net/xml/v1/request.api
authorizenetname =
authorizenetkey =
-authorizenetapi = https://api.authorize.net/xml/v1/request.api
+
min_promote_bid = 20
max_promote_bid = 9999
min_promote_future = 2
@@ -227,7 +242,7 @@ authorized_cnames =
num_query_queue_workers = 5
query_queue_worker = http://cslowe.local:8000
enable_doquery = True
-use_query_cache = False
+use_query_cache = True
write_query_queue = True
# -- stylesheet editor --
@@ -243,6 +258,9 @@ stylesheet_rtl = reddit-rtl.css
# location of the static directory
static_path = /static/
+# make frontpage 100% dart
+frontpage_dart = false
+
# -- translator UI --
# enable/disable access to the translation UI in /admin/i18n
translator = true
View
6 r2/r2/config/middleware.py
@@ -255,7 +255,7 @@ def filter(self, execution_func, prof_arg = None):
return [res]
class DomainMiddleware(object):
- lang_re = re.compile(r"^\w\w(-\w\w)?$")
+ lang_re = re.compile(r"\A\w\w(-\w\w)?\Z")
def __init__(self, app):
self.app = app
@@ -371,7 +371,7 @@ def __call__(self, environ, start_response):
return self.app(environ, start_response)
class DomainListingMiddleware(object):
- domain_pattern = re.compile(r'^/domain/(([-\w]+\.)+[\w]+)')
+ domain_pattern = re.compile(r'\A/domain/(([-\w]+\.)+[\w]+)')
def __init__(self, app):
self.app = app
@@ -386,7 +386,7 @@ def __call__(self, environ, start_response):
return self.app(environ, start_response)
class ExtensionMiddleware(object):
- ext_pattern = re.compile(r'\.([^/]+)$')
+ ext_pattern = re.compile(r'\.([^/]+)\Z')
extensions = (('rss' , ('xml', 'text/xml; charset=UTF-8')),
('xml' , ('xml', 'text/xml; charset=UTF-8')),
View
6 r2/r2/config/rewrites.py
@@ -23,9 +23,9 @@
rewrites = (#these first two rules prevent the .embed rewrite from
#breaking other js that should work
- ("^/_(.*)", "/_$1"),
- ("^/static/(.*\.js)", "/static/$1"),
+ ("\A/_(.*)", "/_$1"),
+ ("\A/static/(.*\.js)", "/static/$1"),
#This next rewrite makes it so that all the embed stuff works.
- ("^(.*)(?<!button)(?<!buttonlite)(\.js)$", "$1.embed"))
+ ("\A(.*)(?<!button)(?<!buttonlite)(\.js)\Z", "$1.embed"))
rewrites = tuple((re.compile(r[0]), r[1]) for r in rewrites)
View
10 r2/r2/config/routing.py
@@ -173,6 +173,8 @@ def make_map(global_conf={}, app_conf={}):
mc('/thanks', controller='forms', action="thanks", secret = '')
mc('/thanks/:secret', controller='forms', action="thanks")
+ mc('/gold', controller='forms', action="gold")
+
mc('/password', controller='forms', action="password")
mc('/:action', controller='front',
requirements=dict(action="random|framebuster|selfserviceoatmeal"))
@@ -206,7 +208,11 @@ def make_map(global_conf={}, app_conf={}):
requirements=dict(action="options|over18|unlogged_options|optout|optin|login|reg"))
mc('/api/distinguish/:how', controller='api', action="distinguish")
- mc('/api/ipn/:secret', controller='api', action='ipn')
+ # wherever this is, google has to agree.
+ mc('/api/gcheckout', controller='ipn', action='gcheckout')
+ mc('/api/spendcreddits', controller='ipn', action="spendcreddits")
+ mc('/api/ipn/:secret', controller='ipn', action='ipn')
+ mc('/ipn/:secret', controller='ipn', action='ipn')
mc('/api/:action/:url_user', controller='api',
requirements=dict(action="login|register"))
mc('/api/gadget/click/:ids', controller = 'api', action='gadget', type='click')
@@ -214,7 +220,7 @@ def make_map(global_conf={}, app_conf={}):
mc('/api/:action', controller='promote',
requirements=dict(action="promote|unpromote|edit_promo|link_thumb|freebie|promote_note|update_pay|refund|traffic_viewer|rm_traffic_viewer|edit_campaign|delete_campaign|meta_promo|add_roadblock|rm_roadblock"))
mc('/api/:action', controller='apiminimal',
- requirements=dict(action="onload"))
+ requirements=dict(action="onload|new_captcha"))
mc('/api/:action', controller='api')
mc("/button_info", controller="api", action="info", limit = 1)
View
2  r2/r2/controllers/__init__.py
@@ -63,4 +63,4 @@
from api import ApiminimalController
from admin import AdminController
from redirect import RedirectController
-
+from ipn import IpnController
View
305 r2/r2/controllers/api.py
@@ -73,8 +73,7 @@ def reject_vote(thing):
class ApiminimalController(MinimalController):
"""
- Put API calls in here which won't come from logged in users (or
- don't rely on the user being logged int)
+ Put API calls in here which don't rely on the user being logged in
"""
@validatedForm(promoted = VByName('ids', thing_cls = Link,
@@ -104,8 +103,13 @@ def add_tracker(dest, where, what):
if sponsorships:
for s in sponsorships:
- add_tracker(s.sponsorship_url, s._fullname,
- "%s_%s" % (s._fullname, s.sponsorship_name))
+ if getattr(s, 'sponsorship_url', None):
+ add_tracker(s.sponsorship_url, s._fullname,
+ "%s_%s" % (s._fullname, s.sponsorship_name))
+
+ @validatedForm()
+ def POST_new_captcha(self, form, jquery, *a, **kw):
+ jquery("body").captcha(get_iden())
class ApiController(RedditController):
@@ -169,9 +173,9 @@ def POST_feedback(self, form, jquery, name, email, reason, message):
body = VMarkdown(['text', 'message']))
def POST_compose(self, form, jquery, to, subject, body, ip):
"""
- handles message composition under /message/compose.
+ handles message composition under /message/compose.
"""
- if not (form.has_errors("to", errors.USER_DOESNT_EXIST,
+ if not (form.has_errors("to", errors.USER_DOESNT_EXIST,
errors.NO_USER, errors.SUBREDDIT_NOEXIST) or
form.has_errors("subject", errors.NO_SUBJECT) or
form.has_errors("text", errors.NO_TEXT, errors.TOO_LONG) or
@@ -265,7 +269,7 @@ def POST_submit(self, form, jquery, url, selftext, kind, title,
g.log.warning("%s is trying to submit url=None (title: %r)"
% (request.ip, title))
elif check_domain:
- banmsg = is_banned_domain(url)
+ banmsg = is_banned_domain(url, request.ip)
# Uncomment if we want to let spammers know we're on to them
# if banmsg:
@@ -312,7 +316,7 @@ def POST_submit(self, form, jquery, url, selftext, kind, title,
# well, nothing left to do but submit it
l = Link._submit(request.post.title, url if kind == 'link' else 'self',
- c.user, sr, ip)
+ c.user, sr, ip, spam=c.user._spam)
if banmsg:
admintools.spam(l, banner = "domain (%s)" % banmsg)
@@ -382,22 +386,24 @@ def _login(self, form, user, dest='', rem = None):
form.redirect(dest)
- @validatedForm(VRatelimit(rate_ip = True, prefix = 'login_',
- error = errors.WRONG_PASSWORD),
+ @validatedForm(VDelay("login"),
user = VLogin(['user', 'passwd']),
username = VLength('user', max_length = 100),
dest = VDestination(),
rem = VBoolean('rem'),
reason = VReason('reason'))
def POST_login(self, form, jquery, user, username, dest, rem, reason):
+ if form.has_errors('vdelay', errors.RATELIMIT):
+ jquery(".recover-password").addClass("attention")
+ return
if reason and reason[0] == 'redirect':
dest = reason[1]
if login_throttle(username, wrong_password = form.has_errors("passwd",
errors.WRONG_PASSWORD)):
- VRatelimit.ratelimit(rate_ip = True, prefix = 'login_', seconds=1)
-
+ VDelay.record_violation("login", seconds=1, growfast=True)
+ jquery(".recover-password").addClass("attention")
c.errors.add(errors.WRONG_PASSWORD, field = "passwd")
if not form.has_errors("passwd", errors.WRONG_PASSWORD):
@@ -681,9 +687,26 @@ def POST_del(self, thing):
#comments have special delete tasks
elif isinstance(thing, Comment):
+ parent_id = getattr(thing, 'parent_id', None)
+ link_id = thing.link_id
+ recipient = None
+
+ if parent_id:
+ parent_comment = Comment._byID(parent_id, data=True)
+ recipient = Account._byID(parent_comment.author_id)
+ else:
+ parent_link = Link._byID(link_id, data=True)
+ if parent_link.is_self:
+ recipient = Account._byID(parent_link.author_id)
+
thing._delete()
delete_comment(thing)
- queries.new_comment(thing, None)
+
+ if recipient:
+ inbox_class = Inbox.rel(Account, Comment)
+ d = inbox_class._fast_query(recipient, thing, ("inbox", "selfreply"))
+ rels = filter(None, d.values()) or None
+ queries.new_comment(thing, rels)
@noresponse(VUser(), VModhash(),
thing = VByName('id'))
@@ -776,6 +799,8 @@ def POST_comment(self, commentform, jquery, parent, comment, ip):
#check the parent type here cause we need that for the
#ratelimit checks
if isinstance(parent, Message):
+ if not getattr(parent, "repliable", True):
+ abort(403, 'forbidden')
is_message = True
should_ratelimit = False
else:
@@ -950,6 +975,8 @@ def POST_juryvote(self, dir, thing, ip):
dir = VInt('dir', min=-1, max=1),
thing = VByName('id'))
def POST_vote(self, dir, thing, ip, vote_type):
+ from r2.models.admintools import valid_vote
+
ip = request.ip
user = c.user
store = True
@@ -961,9 +988,7 @@ def POST_vote(self, dir, thing, ip, vote_type):
reject_vote(thing)
store = False
- # TODO: temporary hack until we migrate the rest of the vote data
- if thing._date < datetime(2009, 4, 17, 0, 0, 0, 0, g.tz):
- g.log.debug("POST_vote: ignoring old vote on %s" % thing._fullname)
+ if not valid_vote(thing):
store = False
if getattr(c.user, "suspicious", False):
@@ -976,11 +1001,6 @@ def POST_vote(self, dir, thing, ip, vote_type):
organic = vote_type == 'organic'
queries.queue_vote(user, thing, dir, ip, organic, store = store,
cheater = (errors.CHEATER, None) in c.errors)
- if store:
- # update relevant caches
- if isinstance(thing, Link):
- set_last_modified(c.user, 'liked')
- set_last_modified(c.user, 'disliked')
@validatedForm(VUser(),
VModhash(),
@@ -1335,223 +1355,6 @@ def POST_distinguish(self, form, jquery, thing, how):
jquery(".content").replace_things(w, True, True)
jquery(".content .link .rank").hide()
-# TODO: we're well beyond the point where this function should have been
-# broken up and moved to its own file
- @textresponse(paypal_secret = VPrintable('secret', 50),
- payment_status = VPrintable('payment_status', 20),
- txn_id = VPrintable('txn_id', 20),
- paying_id = VPrintable('payer_id', 50),
- payer_email = VPrintable('payer_email', 250),
- item_number = VPrintable('item_number', 20),
- mc_currency = VPrintable('mc_currency', 20),
- mc_gross = VFloat('mc_gross'),
- custom = VPrintable('custom', 50))
- def POST_ipn(self, paypal_secret, payment_status, txn_id, paying_id,
- payer_email, item_number, mc_currency, mc_gross, custom):
-
- if paypal_secret != g.PAYPAL_SECRET:
- log_text("invalid IPN secret",
- "%s guessed the wrong IPN secret" % request.ip,
- "warning")
- raise ValueError
-
- if request.POST:
- parameters = request.POST.copy()
- else:
- parameters = request.GET.copy()
-
- if payment_status is None:
- payment_status = ''
-
- psl = payment_status.lower()
-
- if psl == 'completed':
- pass
- elif psl == 'refunded':
- log_text("refund", "Just got notice of a refund.", "info")
- # TODO: something useful when this happens -- and don't
- # forget to verify first
- return "Ok"
- elif psl == 'pending':
- log_text("pending",
- "Just got notice of a Pending, whatever that is.", "info")
- # TODO: something useful when this happens -- and don't
- # forget to verify first
- return "Ok"
- elif psl == 'reversed':
- log_text("canceled_reversal",
- "Just got notice of a PayPal reversal.", "info")
- # TODO: something useful when this happens -- and don't
- # forget to verify first
- return "Ok"
- elif psl == 'canceled_reversal':
- log_text("canceled_reversal",
- "Just got notice of a PayPal 'canceled reversal'.", "info")
- return "Ok"
- elif psl == '':
- pass
- else:
- for k, v in parameters.iteritems():
- g.log.info("IPN: %r = %r" % (k, v))
-
- raise ValueError("Unknown IPN status: %r" % payment_status)
-
- if parameters['txn_type'] == 'subscr_signup':
- return "Ok"
- elif parameters['txn_type'] == 'subscr_cancel':
- cancel_subscription(parameters['subscr_id'])
- return "Ok"
- elif parameters['txn_type'] == 'subscr_failed':
- log_text("failed_subscription",
- "Just got notice of a failed PayPal resub.", "info")
- return "Ok"
- elif parameters['txn_type'] == 'subscr_modify':
- log_text("modified_subscription",
- "Just got notice of a modified PayPal sub.", "info")
- return "Ok"
- elif parameters['txn_type'] in ('new_case',
- 'recurring_payment_suspended_due_to_max_failed_payment'):
- return "Ok"
- elif parameters['txn_type'] == 'subscr_payment' and psl == 'completed':
- subscr_id = parameters['subscr_id']
- elif parameters['txn_type'] == 'web_accept' and psl == 'completed':
- subscr_id = None
- else:
- raise ValueError("Unknown IPN txn_type / psl %r" %
- ((parameters['txn_type'], psl),))
-
- if mc_currency != 'USD':
- raise ValueError("Somehow got non-USD IPN %r" % mc_currency)
-
- if g.cache.get("ipn-debug"):
- g.cache.delete("ipn-debug")
- for k, v in parameters.iteritems():
- g.log.info("IPN: %r = %r" % (k, v))
-
- parameters['cmd']='_notify-validate'
- try:
- safer = dict([k, v.encode('utf-8')] for k, v in parameters.items())
- params = urllib.urlencode(safer)
- except UnicodeEncodeError:
- g.log.error("problem urlencoding %r" % (parameters,))
- raise
- req = urllib2.Request(g.PAYPAL_URL, params)
- req.add_header("Content-type", "application/x-www-form-urlencoded")
-
- response = urllib2.urlopen(req)
- status = response.read()
-# TODO: stop not doing this
-# if status != "VERIFIED":
-# raise ValueError("Invalid IPN response: %r" % status)
-
- pennies = int(mc_gross * 100)
-
- days = None
- if item_number and item_number in ('rgsub', 'rgonetime'):
- if pennies == 2999:
- secret_prefix = "ys_"
- days = 366
- elif pennies == 399:
- secret_prefix = "m_"
- days = 31
- else:
- raise ValueError("Got %d pennies via PayPal?" % pennies)
- # old formula: days = 60 + int (31 * pennies / 250.0)
- else:
- raise ValueError("Got item number %r via PayPal?" % item_number)
-
- account_id = accountid_from_paypalsubscription(subscr_id)
-
- if account_id:
- try:
- account = Account._byID(account_id)
- except NotFound:
- g.log.info("Just got IPN renewal for deleted account #%d"
- % account_id)
- return "Ok"
-
- create_claimed_gold ("P" + txn_id, payer_email, paying_id,
- pennies, days, None, account_id,
- c.start_time, subscr_id)
- admintools.engolden(account, days)
-
- g.log.info("Just applied IPN renewal for %s, %d days" %
- (account.name, days))
- return "Ok"
-
- if custom:
- gold_dict = g.hardcache.get("gold_dict-" + custom)
- if gold_dict is None:
- raise ValueError("No gold_dict for %r" % custom)
-
- buyer_name = gold_dict['buyer']
- try:
- buyer = Account._by_name(buyer_name)
- except NotFound:
- g.log.info("Just got IPN for unknown buyer %s" % buyer_name)
- return "Ok" # nothing we can do until they complain
-
- if gold_dict['kind'] == 'self':
- create_claimed_gold ("P" + txn_id, payer_email, paying_id,
- pennies, days, None, buyer._id,
- c.start_time, subscr_id)
- admintools.engolden(buyer, days)
-
- g.log.info("Just applied IPN for %s, %d days" %
- (buyer.name, days))
-
-#TODO: send a PM thanking them and showing them /r/lounge
-
- g.hardcache.delete("gold_dict-" + custom)
-
- return "Ok"
- elif gold_dict['kind'] == 'gift':
- recipient_name = gold_dict['recipient']
- try:
- recipient = Account._by_name(recipient_name)
- except NotFound:
- g.log.info("Just got IPN for unknown recipient %s"
- % recipient_name)
- return "Ok" # nothing we can do until they complain
-
- create_claimed_gold ("P" + txn_id, payer_email, paying_id,
- pennies, days, None, recipient._id,
- c.start_time, subscr_id)
- admintools.engolden(recipient, days)
-
- g.log.info("Just applied IPN from %s to %s, %d days" %
- (buyer.name, recipient.name, days))
-
-#TODO: send PMs to buyer and recipient
-
- else:
- raise ValueError("Invalid gold_dict[kind] %r" %
- gold_dict['kind'])
-
- gold_secret = secret_prefix + randstr(10)
-
- create_unclaimed_gold("P" + txn_id, payer_email, paying_id,
- pennies, days, gold_secret, c.start_time,
- subscr_id)
-
- notify_unclaimed_gold(txn_id, gold_secret, payer_email, "Paypal")
-
- g.log.info("Just got IPN for %d days, secret=%s" % (days, gold_secret))
-
- return "Ok"
-
- @textresponse(sn = VLength('serial-number', 100))
- def POST_gcheckout(self, sn):
- if sn:
- g.log.error( "GOOGLE CHECKOUT: %s" % sn)
- new_google_transaction(sn)
- return '<notification-acknowledgment xmlns="http://checkout.google.com/schema/2" serial-number="%s" />' % sn
- else:
- g.log.error("GOOGLE CHCEKOUT: didn't work")
- g.log.error(repr(list(request.POST.iteritems())))
-
-
-
@noresponse(VUser(),
VModhash(),
thing = VByName('id'))
@@ -1770,9 +1573,8 @@ def GET_bookmarklet(self, action, uh, links):
@validatedForm(VUser(),
- code = VPrintable("code", 30),
- postcard_okay = VOneOf("postcard", ("yes", "no")),)
- def POST_claimgold(self, form, jquery, code, postcard_okay):
+ code = VPrintable("code", 30))
+ def POST_claimgold(self, form, jquery, code):
if not code:
c.errors.add(errors.NO_TEXT, field = "code")
form.has_errors("code", errors.NO_TEXT)
@@ -1802,17 +1604,21 @@ def POST_claimgold(self, form, jquery, code, postcard_okay):
if subscr_id:
c.user.gold_subscr_id = subscr_id
- admintools.engolden(c.user, days)
+ if code.startswith("cr_"):
+ c.user.gold_creddits += int(days / 31)
+ c.user._commit()
+ form.set_html(".status", _("claimed! now go to someone's userpage and give them a present!"))
+ else:
+ admintools.engolden(c.user, days)
- g.cache.set("recent-gold-" + c.user.name, True, 600)
- form.set_html(".status", _("claimed!"))
- jquery(".lounge").show()
+ g.cache.set("recent-gold-" + c.user.name, True, 600)
+ form.set_html(".status", _("claimed!"))
+ jquery(".lounge").show()
# Activate any errors we just manually set
form.has_errors("code", errors.INVALID_CODE, errors.CLAIMED_CODE,
errors.NO_TEXT)
-
@validatedForm(user = VUserWithEmail('name'))
def POST_password(self, form, jquery, user):
if form.has_errors('name', errors.USER_DOESNT_EXIST):
@@ -1824,7 +1630,7 @@ def POST_password(self, form, jquery, user):
form.set_html(".status",
_("an email will be sent to that account's address shortly"))
-
+
@validatedForm(cache_evt = VCacheKey('reset', ('key',)),
password = VPassword(['passwd', 'passwd2']))
def POST_resetpassword(self, form, jquery, cache_evt, password):
@@ -1862,11 +1668,6 @@ def POST_frame(self):
c.user._commit()
-
- @validatedForm()
- def POST_new_captcha(self, form, jquery, *a, **kw):
- jquery("body").captcha(get_iden())
-
@noresponse(VAdmin(),
tr = VTranslation("lang"),
user = nop('user'))
View
35 r2/r2/controllers/embed.py
@@ -23,6 +23,7 @@
from r2.lib.template_helpers import get_domain
from r2.lib.pages import Embed, BoringPage, HelpPage
from r2.lib.filters import websafe, SC_OFF, SC_ON
+from r2.lib.memoize import memoize
from pylons.i18n import _
from pylons import c, g, request
@@ -30,6 +31,22 @@
from urllib2 import HTTPError
+@memoize("renderurl_cached", time=60)
+def renderurl_cached(path):
+ # Needed so http://reddit.com/help/ works
+ fp = path.rstrip("/")
+ u = "http://code.reddit.com/wiki" + fp + '?stripped=1'
+
+ g.log.debug("Pulling %s for help" % u)
+
+ try:
+ return fp, proxyurl(u)
+ except HTTPError, e:
+ if e.code != 404:
+ print "error %s" % e.code
+ print e.fp.read()
+ return (None, None)
+
class EmbedController(RedditController):
allow_stylesheets = True
@@ -73,20 +90,10 @@ def renderurl(self, override=None):
else:
path = request.path
- # Needed so http://reddit.com/help/ works
- fp = path.rstrip("/")
- u = "http://code.reddit.com/wiki" + fp + '?stripped=1'
-
- g.log.debug("Pulling %s for help" % u)
-
- try:
- content = proxyurl(u)
- return self.rendercontent(content, fp)
- except HTTPError, e:
- if e.code != 404:
- print "error %s" % e.code
- print e.fp.read()
- return self.abort404()
+ fp, content = renderurl_cached(path)
+ if content is None:
+ self.abort404()
+ return self.rendercontent(content, fp)
GET_help = POST_help = renderurl
View
2  r2/r2/controllers/error.py
@@ -119,7 +119,7 @@ def send404(self):
c.response.status_code = 404
if 'usable_error_content' in request.environ:
return request.environ['usable_error_content']
- if c.site._spam and not c.user_is_admin:
+ if c.site.spammy() and not c.user_is_admin:
subject = ("the subreddit /r/%s has been incorrectly banned" %
c.site.name)
lnk = ("/r/redditrequest/submit?url=%s&title=%s"
View
2  r2/r2/controllers/errors.py
@@ -56,7 +56,7 @@
('SUBREDDIT_NOTALLOWED', _("you aren't allowed to post there.")),
('SUBREDDIT_REQUIRED', _('you must specify a reddit')),
('BAD_SR_NAME', _('that name isn\'t going to work')),
- ('RATELIMIT', _('you are trying to submit too fast. try again in %(time)s.')),
+ ('RATELIMIT', _('you are doing that too much. try again in %(time)s.')),
('EXPIRED', _('your session has expired')),
('DRACONIAN', _('you must accept the terms first')),
('BANNED_IP', "IP banned"),
View
101 r2/r2/controllers/front.py
@@ -30,6 +30,7 @@
from r2.lib.menus import *
from r2.lib.utils import to36, sanitize_url, check_cheating, title_to_url
from r2.lib.utils import query_string, UrlParser, link_from_url, link_duplicates
+from r2.lib.utils import randstr
from r2.lib.template_helpers import get_domain
from r2.lib.filters import unsafe
from r2.lib.emailer import has_opted_out, Email
@@ -37,7 +38,7 @@
from r2.lib.db import queries
from r2.lib.strings import strings
from r2.lib.solrsearch import RelatedSearchQuery, SubredditSearchQuery
-from r2.lib.indextank import IndextankQuery, IndextankException
+from r2.lib.indextank import IndextankQuery, IndextankException, InvalidIndextankQuery
from r2.lib.contrib.pysolr import SolrError
from r2.lib import jsontemplates
from r2.lib import sup
@@ -45,6 +46,7 @@
from listingcontroller import ListingController
from pylons import c, request, request, Response
+import string
import random as rand
import re, socket
import time as time_module
@@ -251,6 +253,8 @@ def GET_comments(self, article, comment, context, sort, limit, depth):
if comment:
displayPane.append(PermalinkMessage(article.make_permalink_slow()))
+ displayPane.append(LinkCommentSep())
+
# insert reply box only for logged in user
if c.user_is_loggedin and can_comment_link(article) and not is_api():
#no comment box for permalinks
@@ -274,7 +278,6 @@ def GET_comments(self, article, comment, context, sort, limit, depth):
# Used in template_helpers
c.previous_visits = previous_visits
-
# finally add the comment listing
displayPane.append(CommentPane(article, CommentSortMenu.operator(sort),
comment, context, num, **kw))
@@ -569,7 +572,7 @@ def GET_search_reddits(self, query, reverse, after, count, num):
simple=True).render()
return res
- verify_langs_regex = re.compile(r"^[a-z][a-z](,[a-z][a-z])*$")
+ verify_langs_regex = re.compile(r"\A[a-z][a-z](,[a-z][a-z])*\Z")
@base_listing
@validate(query = nop('q'),
sort = VMenu('sort', SearchSortMenu, remember=False),
@@ -587,20 +590,37 @@ def GET_search(self, query, num, reverse, after, count, sort, restrict_sr):
site = c.site
try:
- q = IndextankQuery(query, site, sort)
-
- num, t, spane = self._search(q, num = num, after = after, reverse = reverse,
- count = count)
+ cleanup_message = None
+ try:
+ q = IndextankQuery(query, site, sort)
+ num, t, spane = self._search(q, num=num, after=after,
+ reverse = reverse, count = count)
+ except InvalidIndextankQuery:
+ # delete special characters from the query and run again
+ special_characters = '+-&|!(){}[]^"~*?:\\'
+ translation = dict((ord(char), None)
+ for char in list(special_characters))
+ cleaned = query.translate(translation)
+
+ q = IndextankQuery(cleaned, site, sort)
+ num, t, spane = self._search(q, num=num, after=after,
+ reverse = reverse, count = count)
+ cleanup_message = _('I couldn\'t understand your query, ' +
+ 'so I simplified it and searched for ' +
+ '"%(clean_query)s" instead.') % {
+ 'clean_query': cleaned }
+
res = SearchPage(_('search results'), query, t, num, content=spane,
nav_menus = [SearchSortMenu(default=sort)],
- search_params = dict(sort = sort),
- simple=False, site=c.site, restrict_sr=restrict_sr).render()
+ search_params = dict(sort = sort),
+ infotext=cleanup_message,
+ simple=False, site=c.site,
+ restrict_sr=restrict_sr).render()
return res
except (IndextankException, socket.error), e:
return self.search_fail(e)
-
def _search(self, query_obj, num, after, reverse, count=0):
"""Helper function for interfacing with search. Basically a
thin wrapper for SearchBuilder."""
@@ -983,3 +1003,64 @@ def GET_try_compact(self, dest):
def GET_thanks(self, secret):
"""The page to claim reddit gold trophies"""
return BoringPage(_("thanks"), content=Thanks(secret)).render()
+
+ @validate(VUser(),
+ goldtype = VOneOf("goldtype",
+ ("autorenew", "onetime", "creddits", "gift")),
+ period = VOneOf("period", ("monthly", "yearly")),
+ months = VInt("months"),
+ # variables below are just for gifts
+ signed = VBoolean("signed"),
+ recipient_name = VPrintable("recipient", max_length = 50),
+ giftmessage = VLength("giftmessage", 10000))
+ def GET_gold(self, goldtype, period, months,
+ signed, recipient_name, giftmessage):
+ start_over = False
+ recipient = None
+ if goldtype == "autorenew":
+ if period is None:
+ start_over = True
+ elif goldtype in ("onetime", "creddits"):
+ if months is None or months < 1:
+ start_over = True
+ elif goldtype == "gift":
+ if months is None or months < 1:
+ start_over = True
+ try:
+ recipient = Account._by_name(recipient_name or "")
+ except NotFound:
+ start_over = True
+ else:
+ goldtype = ""
+ start_over = True
+
+ if start_over:
+ return BoringPage(_("reddit gold"),
+ show_sidebar = False,
+ content=Gold(goldtype, period, months, signed,
+ recipient, recipient_name)).render()
+ else:
+ payment_blob = dict(goldtype = goldtype,
+ account_id = c.user._id,
+ account_name = c.user.name,
+ status = "initialized")
+
+ if goldtype == "gift":
+ payment_blob["signed"] = signed
+ payment_blob["recipient"] = recipient_name
+ payment_blob["giftmessage"] = giftmessage
+
+ passthrough = randstr(15)
+
+ g.hardcache.set("payment_blob-" + passthrough,
+ payment_blob, 86400 * 30)
+
+ g.log.info("just set payment_blob-%s" % passthrough)
+
+ return BoringPage(_("reddit gold"),
+ show_sidebar = False,
+ content=GoldPayment(goldtype, period, months,
+ signed, recipient,
+ giftmessage, passthrough)
+ ).render()
+
View
476 r2/r2/controllers/ipn.py
@@ -0,0 +1,476 @@
+from xml.dom.minidom import Document
+from httplib import HTTPSConnection
+from urlparse import urlparse
+import base64
+
+from pylons.controllers.util import abort
+from pylons import c, g, response
+from pylons.i18n import _
+
+from validator import *
+from r2.models import *
+
+from reddit_base import RedditController
+
+def get_blob(code):
+ key = "payment_blob-" + code
+ with g.make_lock("payment_blob_lock-" + code):
+ blob = g.hardcache.get(key)
+ if not blob:
+ raise NotFound("No payment_blob-" + code)
+ if blob.get('status', None) != 'initialized':
+ raise ValueError("payment_blob %s has status = %s" %
+ (code, blob.get('status', None)))
+ blob['status'] = "locked"
+ g.hardcache.set(key, blob, 86400 * 30)
+ return key, blob
+
+def dump_parameters(parameters):
+ for k, v in parameters.iteritems():
+ g.log.info("IPN: %r = %r" % (k, v))
+
+def check_payment_status(payment_status):
+ if payment_status is None:
+ payment_status = ''
+
+ psl = payment_status.lower()
+
+ if psl == 'completed':
+ return (None, psl)
+ elif psl == 'refunded':
+ log_text("refund", "Just got notice of a refund.", "info")
+ # TODO: something useful when this happens -- and don't
+ # forget to verify first
+ return ("Ok", psl)
+ elif psl == 'pending':
+ log_text("pending",
+ "Just got notice of a Pending, whatever that is.", "info")
+ # TODO: something useful when this happens -- and don't
+ # forget to verify first
+ return ("Ok", psl)
+ elif psl == 'reversed':
+ log_text("reversal",
+ "Just got notice of a PayPal reversal.", "info")
+ # TODO: something useful when this happens -- and don't
+ # forget to verify first
+ return ("Ok", psl)
+ elif psl == 'canceled_reversal':
+ log_text("canceled_reversal",
+ "Just got notice of a PayPal 'canceled reversal'.", "info")
+ return ("Ok", psl)
+ elif psl == '':
+ return (None, psl)
+ else:
+ raise ValueError("Unknown IPN status: %r" % payment_status)
+
+def check_txn_type(txn_type, psl):
+ if txn_type == 'subscr_signup':
+ return ("Ok", None)
+ elif txn_type == 'subscr_cancel':
+ return ("Ok", "cancel")
+ elif txn_type == 'subscr_eot':
+ return ("Ok", None)
+ elif txn_type == 'subscr_failed':
+ log_text("failed_subscription",
+ "Just got notice of a failed PayPal resub.", "info")
+ return ("Ok", None)
+ elif txn_type == 'subscr_modify':
+ log_text("modified_subscription",
+ "Just got notice of a modified PayPal sub.", "info")
+ return ("Ok", None)
+ elif txn_type == 'send_money':
+ return ("Ok", None)
+ elif txn_type in ('new_case',
+ 'recurring_payment_suspended_due_to_max_failed_payment'):
+ return ("Ok", None)
+ elif txn_type == 'subscr_payment' and psl == 'completed':
+ return (None, "new")
+ elif txn_type == 'web_accept' and psl == 'completed':
+ return (None, None)
+ else:
+ raise ValueError("Unknown IPN txn_type / psl %r" %
+ ((txn_type, psl),))
+
+
+def verify_ipn(parameters):
+ paraemeters['cmd'] = '_notify-validate'
+ try:
+ safer = dict([k, v.encode('utf-8')] for k, v in parameters.items())
+ params = urllib.urlencode(safer)
+ except UnicodeEncodeError:
+ g.log.error("problem urlencoding %r" % (parameters,))
+ raise
+ req = urllib2.Request(g.PAYPAL_URL, params)
+ req.add_header("Content-type", "application/x-www-form-urlencoded")
+
+ response = urllib2.urlopen(req)
+ status = response.read()
+
+ if status != "VERIFIED":
+ raise ValueError("Invalid IPN response: %r" % status)
+
+
+def existing_subscription(subscr_id):
+ account_id = accountid_from_paypalsubscription(subscr_id)
+
+ if account_id is None:
+ return None
+
+ try:
+ account = Account._byID(account_id)
+ except NotFound:
+ g.log.info("Just got IPN renewal for deleted account #%d"
+ % account_id)
+ return "deleted account"
+
+ return account
+
+def months_and_days_from_pennies(pennies):
+ if pennies >= 2999:
+ months = 12 * (pennies / 2999)
+ days = 366 * (pennies / 2999)
+ else:
+ months = pennies / 399
+ days = 31 * months
+ return (months, days)
+
+def send_gift(buyer, recipient, months, days, signed, giftmessage):
+ admintools.engolden(recipient, days)
+ if signed:
+ sender = buyer.name
+ md_sender = "[%s](/user/%s)" % (sender, sender)
+ else:
+ sender = "someone"
+ md_sender = "An anonymous redditor"
+
+ create_gift_gold (buyer._id, recipient._id, days, c.start_time, signed)
+ if months == 1:
+ amount = "a month"
+ else:
+ amount = "%d months" % months
+
+ subject = sender + " just sent you reddit gold!"
+ message = strings.youve_got_gold % dict(sender=md_sender, amount=amount)
+
+ if giftmessage and giftmessage.strip():
+ message += "\n\n" + strings.giftgold_note + giftmessage
+
+ send_system_message(recipient, subject, message)
+
+ g.log.info("%s gifted %s to %s" % (buyer.name, amount, recipient.name))
+
+def _google_ordernum_request(ordernums):
+ d = Document()
+ n = d.createElement("notification-history-request")
+ n.setAttribute("xmlns", "http://checkout.google.com/schema/2")
+ d.appendChild(n)
+
+ on = d.createElement("order-numbers")
+ n.appendChild(on)
+
+ for num in tup(ordernums):
+ gon = d.createElement('google-order-number')
+ gon.appendChild(d.createTextNode("%s" % num))
+ on.appendChild(gon)
+