Skip to content

Commit

Permalink
21 Jul 2010 merge
Browse files Browse the repository at this point in the history
* Use Flaptor's Indextank product for search, including santip's patch
* for their new API
* Add Cassandra ConsistencyLevels to the ini file, and storage-conf.xml
* to the public repo
* Patch contributed by umbrae in ticket reddit-archive#929: Add jumpToContent support
* for Keyboard Accessibility
* reddit gold
    - paypal/postcard support
    - friends with benefits
    - profile-page sorting for gold members
    - move domain listings into the permacache
  • Loading branch information
ketralnis committed Jul 22, 2010
1 parent 52da322 commit 0ae8f2f
Show file tree
Hide file tree
Showing 70 changed files with 2,802 additions and 2,927 deletions.
216 changes: 216 additions & 0 deletions config/cassandra/storage-conf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
<Storage>
<!--======================================================================-->
<!-- Basic Configuration -->
<!--======================================================================-->

<ClusterName>reddit</ClusterName>

<AutoBootstrap>false</AutoBootstrap>
<HintedHandoffEnabled>true</HintedHandoffEnabled>

<Keyspaces>
<Keyspace Name="permacache">
<ColumnFamily CompareWith="BytesType" Name="permacache" RowsCached="3000000" />

<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<ReplicationFactor>3</ReplicationFactor>
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
</Keyspace>

<Keyspace Name="urls">
<ColumnFamily CompareWith="UTF8Type" Name="urls" />

<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<ReplicationFactor>3</ReplicationFactor>
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
</Keyspace>

<Keyspace Name="reddit">
<!-- Relations -->
<ColumnFamily CompareWith="UTF8Type" Name="LinkVote" />
<ColumnFamily CompareWith="UTF8Type" Name="CommentVote" />

<!-- Views -->
<ColumnFamily CompareWith="UTF8Type" Name="VotesByLink" />

<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<ReplicationFactor>3</ReplicationFactor>
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
</Keyspace>

</Keyspaces>

<Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>

<Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>

<InitialToken></InitialToken>

<CommitLogDirectory>/cassandra/commitlog</CommitLogDirectory>
<DataFileDirectories>
<DataFileDirectory>/cassandra/data</DataFileDirectory>
</DataFileDirectories>

<Seeds>
<Seed>pmc01</Seed>
<Seed>pmc02</Seed>
<Seed>pmc03</Seed>
<Seed>pmc06</Seed>
<Seed>pmc07</Seed>
<Seed>pmc08</Seed>
</Seeds>

<!-- Miscellaneous -->

<!-- Time to wait for a reply from other nodes before failing the command -->
<RpcTimeoutInMillis>30000</RpcTimeoutInMillis>
<!-- phi value that must be reached before a host is marked as down.
most users should never adjust this -->
<PhiConvictThreshold>10</PhiConvictThreshold>
<!-- Size to allow commitlog to grow to before creating a new segment -->
<CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>

<!-- Local hosts and ports -->

<ListenAddress></ListenAddress>
<!-- internal communications port -->
<StoragePort>7000</StoragePort>

<ThriftAddress></ThriftAddress>
<!-- Thrift RPC port (the port clients connect to). -->
<ThriftPort>9160</ThriftPort>

<ThriftFramedTransport>false</ThriftFramedTransport>


<!--======================================================================-->
<!-- Memory, Disk, and Performance -->
<!--======================================================================-->

<!--
~ Access mode. mmapped i/o is substantially faster, but only practical on
~ a 64bit machine (which notably does not include EC2 "small" instances)
~ or relatively small datasets. "auto", the safe choice, will enable
~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
~ (which may allow you to get part of the benefits of mmap on a 32bit
~ machine by mmapping only index files) and "standard".
~ (The buffer size settings that follow only apply to standard,
~ non-mmapped i/o.)
-->
<DiskAccessMode>mmap_index_only</DiskAccessMode>

<!--
~ Size of compacted row above which to log a warning. (If compacted
~ rows do not fit in memory, Cassandra will crash. This is explained
~ in http://wiki.apache.org/cassandra/CassandraLimitations and is
~ scheduled to be fixed in 0.7.)
-->
<RowWarningThresholdInMB>512</RowWarningThresholdInMB>

<!--
~ Buffer size to use when performing contiguous column slices. Increase
~ this to the size of the column slices you typically perform.
~ (Name-based queries are performed with a buffer size of
~ ColumnIndexSizeInKB.)
-->
<SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>

<!--
~ Buffer size to use when flushing memtables to disk. (Only one
~ memtable is ever flushed at a time.) Increase (decrease) the index
~ buffer size relative to the data buffer if you have few (many)
~ columns per key. Bigger is only better _if_ your memtables get large
~ enough to use the space. (Check in your data directory after your
~ app has been running long enough.) -->
<FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
<FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>

<!--
~ Add column indexes to a row after its contents reach this size.
~ Increase if your column values are large, or if you have a very large
~ number of columns. The competing causes are, Cassandra has to
~ deserialize this much of the row to read a single column, so you want
~ it to be small - at least if you do many partial-row reads - but all
~ the index data is read for each access, so you don't want to generate
~ that wastefully either.
-->
<ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>

<!--
~ Flush memtable after this much data has been inserted, including
~ overwritten data. There is one memtable per column family, and
~ this threshold is based solely on the amount of data stored, not
~ actual heap memory usage (there is some overhead in indexing the
~ columns).
-->
<MemtableThroughputInMB>64</MemtableThroughputInMB>
<!--
~ Throughput setting for Binary Memtables. Typically these are
~ used for bulk load so you want them to be larger.
-->
<BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
<!--
~ The maximum number of columns in millions to store in memory per
~ ColumnFamily before flushing to disk. This is also a per-memtable
~ setting. Use with MemtableThroughputInMB to tune memory usage.
-->
<MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
<!--
~ The maximum time to leave a dirty memtable unflushed.
~ (While any affected columnfamilies have unflushed data from a
~ commit log segment, that segment cannot be deleted.)
~ This needs to be large enough that it won't cause a flush storm
~ of all your memtables flushing at once because none has hit
~ the size or count thresholds yet. For production, a larger
~ value such as 1440 is recommended.
-->
<MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>

<!--
~ Unlike most systems, in Cassandra writes are faster than reads, so
~ you can afford more of those in parallel. A good rule of thumb is 2
~ concurrent reads per processor core. Increase ConcurrentWrites to
~ the number of clients writing at once if you enable CommitLogSync +
~ CommitLogSyncDelay. -->
<ConcurrentReads>8</ConcurrentReads>
<ConcurrentWrites>32</ConcurrentWrites>

<!--
~ CommitLogSync may be either "periodic" or "batch." When in batch
~ mode, Cassandra won't ack writes until the commit log has been
~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
~ milliseconds for other writes, before performing the sync.
~ This is less necessary in Cassandra than in traditional databases
~ since replication reduces the odds of losing data from a failure
~ after writing the log entry but before it actually reaches the disk.
~ So the other option is "periodic," where writes may be acked immediately
~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
~ milliseconds.
-->
<CommitLogSync>periodic</CommitLogSync>
<!--
~ Interval at which to perform syncs of the CommitLog in periodic mode.
~ Usually the default of 10000ms is fine; increase it if your i/o
~ load is such that syncs are taking excessively long times.
-->
<CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
<!--
~ Delay (in milliseconds) during which additional commit log entries
~ may be written before fsync in batch mode. This will increase
~ latency slightly, but can vastly improve throughput where there are
~ many writers. Set to zero to disable (each entry will be synced
~ individually). Reasonable values range from a minimal 0.1 to 10 or
~ even more if throughput matters more than latency.
-->
<!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->

<!--
~ Time to wait before garbage-collection deletion markers. Set this to
~ a large enough value that you are confident that the deletion marker
~ will be propagated to all replicas by the time this many seconds has
~ elapsed, even in the face of hardware failures. The default value is
~ ten days.
-->
<GCGraceSeconds>864000</GCGraceSeconds>
</Storage>
5 changes: 5 additions & 0 deletions r2/draw_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ def draw_box(label, color, center = False):
draw_box(" %s load: %s" % (host.host, host.load()),
get_load_level(host))

draw_box(" ==== MEDIA ==== ", "#BBBBBB", center = True)
for host in hosts:
if host.host.startswith('media'):
draw_box(" %s load: %s" % (host.host, host.load()),
get_load_level(host))
draw_box(" ==== SEARCH ==== ", "#BBBBBB", center = True)
for host in hosts:
if host.host.startswith('search'):
Expand Down
9 changes: 8 additions & 1 deletion r2/example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ servicecaches = 127.0.0.1:11211
permacache_memcaches = 127.0.0.1:11211
# cassandra hosts. one of these will be chosen at random by pycassa
cassandra_seeds = 127.0.0.1:9160
# read/write consistency levels for Cassandra
cassandra_rcl = ONE
cassandra_wcl = QUORUM

# -- url cache options --
url_caches = 127.0.0.1:11211
Expand Down Expand Up @@ -285,6 +288,8 @@ MIN_UP_KARMA = 1
MIN_RATE_LIMIT_KARMA = 10
MIN_RATE_LIMIT_COMMENT_KARMA = 1
QUOTA_THRESHOLD = 5
# Links and comments older than this many days qualify for historic preservation
REPLY_AGE_LIMIT = 180

# min amount of karma to edit
WIKI_KARMA = 100
Expand All @@ -302,6 +307,8 @@ num_comments = 200
max_comments = 500
# list of reddits to auto-subscribe users to
automatic_reddits =
# special reddit that only reddit gold subscribers can use
lounge_reddit =
# cutoff number of reddits to show unsubscribed users
num_default_reddits = 10
# how deep do we go into the top listing when fetching /random
Expand Down Expand Up @@ -338,5 +345,5 @@ beaker.session_secret = somesecret
# WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT*
# Debug mode will enable the interactive debugging tool, allowing ANYONE to
# execute malicious code after an exception is raised.
set debug = true
#set debug = false

4 changes: 4 additions & 0 deletions r2/r2/config/routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ def make_map(global_conf={}, app_conf={}):
mc('/message/moderator/:subwhere', controller='message', action='listing',
where = 'moderator')

mc('/thanks', controller='forms', action="thanks", secret = '')
mc('/thanks/:secret', controller='forms', action="thanks")

mc('/password', controller='forms', action="password")
mc('/:action', controller='front',
requirements=dict(action="random|framebuster|selfserviceoatmeal"))
Expand Down Expand Up @@ -202,6 +205,7 @@ def make_map(global_conf={}, app_conf={}):
requirements=dict(action="options|over18|unlogged_options|optout|optin|login|reg"))

mc('/api/distinguish/:how', controller='api', action="distinguish")
mc('/api/ipn/:secret', controller='api', action='ipn')
mc('/api/:action/:url_user', controller='api',
requirements=dict(action="login|register"))
mc('/api/gadget/click/:ids', controller = 'api', action='gadget', type='click')
Expand Down
Loading

0 comments on commit 0ae8f2f

Please sign in to comment.