switch from rollover to date-scoped-keys #370

Merged
merged 1 commit into from Dec 24, 2011
View
@@ -5,8 +5,5 @@ Gemcutter::Application.load_tasks
desc "Run all tests and features"
task :default => [:test, :cucumber]
-desc "Run daily at 00:00 UTC"
-task :daily_cron => %w[gemcutter:downloads:rollover]
-
desc "Run weekly at 00:00 UTC"
task :weekly_cron => %w[gemcutter:store_legacy_index]
@@ -1,15 +1,16 @@
class Download
COUNT_KEY = "downloads"
- TODAY_KEY = "downloads:today"
- YESTERDAY_KEY = "downloads:yesterday"
ALL_KEY = "downloads:all"
def self.incr(name, full_name)
+ today = Time.zone.today.to_s
$redis.incr(COUNT_KEY)
$redis.incr(rubygem_key(name))
$redis.incr(version_key(full_name))
- $redis.zincrby(TODAY_KEY, 1, full_name)
+ $redis.zincrby(today_key, 1, full_name)
$redis.zincrby(ALL_KEY, 1, full_name)
+ $redis.hincrby(version_history_key(full_name), today, 1)
+ $redis.hincrby(rubygem_history_key(name), today, 1)
end
def self.count
@@ -18,7 +19,7 @@ def self.count
def self.today(*versions)
versions.flatten.inject(0) do |sum, version|
- sum + $redis.zscore(TODAY_KEY, version.full_name).to_i
+ sum + $redis.zscore(today_key, version.full_name).to_i
end
end
@@ -35,7 +36,7 @@ def self.for_version(full_name)
end
def self.most_downloaded_today(n=5)
- items = $redis.zrevrange(TODAY_KEY, 0, (n-1), :with_scores => true)
+ items = $redis.zrevrange(today_key, 0, (n-1), :with_scores => true)
items.in_groups_of(2).collect do |full_name, downloads|
version = Version.find_by_full_name(full_name)
@@ -52,7 +53,7 @@ def self.most_downloaded_all_time(n=5)
end
def self.counts_by_day_for_versions(versions, days)
- dates = (days.days.ago.to_date...Time.zone.today).map &:to_s
+ dates = (days.days.ago.to_date...Time.zone.today).map(&:to_s)
versions.inject({}) do |downloads, version|
$redis.hmget(self.history_key(version), *dates).each_with_index do |count, idx|
@@ -66,7 +67,7 @@ def self.counts_by_day_for_versions(versions, days)
def self.counts_by_day_for_version_in_date_range(version, start, stop)
downloads = ActiveSupport::OrderedHash.new
- dates = (start..stop).map &:to_s
+ dates = (start..stop).map(&:to_s)
$redis.hmget(self.history_key(version), *dates).each_with_index do |count, idx|
downloads["#{dates[idx]}"] = count.to_i
@@ -96,37 +97,18 @@ def self.key(what)
def self.history_key(what)
case what
when Version
- "downloads:version_history:#{what.full_name}"
+ version_history_key(what.full_name)
when Rubygem
- "downloads:rubygem_history:#{what.name}"
- end
- end
-
- def self.rollover
- $redis.rename TODAY_KEY, YESTERDAY_KEY
-
- yesterday = 1.day.ago.to_date.to_s
- versions = {}
-
- Version.find_each(include: :rubygem, batch_size: 10000) do |version|
- versions[version.full_name] = version
- end
-
- $redis.zrange(YESTERDAY_KEY, 0, -1, with_scores: true).in_groups_of(2).each do |(full_name, score)|
- if version = versions[full_name]
- $redis.hincrby history_key(version), yesterday, score.to_i
- $redis.hincrby history_key(version.rubygem), yesterday, score.to_i
- version.rubygem.increment! :downloads, score.to_i
- end
+ rubygem_history_key(what.name)
end
end
def self.cardinality
- $redis.zcard(TODAY_KEY)
+ $redis.zcard(today_key)
end
def self.rank(version)
- if rank = $redis.zrevrank(TODAY_KEY, version.full_name)
+ if rank = $redis.zrevrank(today_key, version.full_name)
rank + 1
else
0
@@ -142,6 +124,28 @@ def self.highest_rank(versions)
end
end
+ def self.cleanup_today_keys
@qrush
qrush Dec 21, 2011 Member

What's the purpose of this? I also don't see why today_keys exists, is it for the test suite only?

@phoet
phoet Dec 21, 2011 Contributor

i added this but forgot to add a rake task to clean up the today-keys.

i think that in the beginning there will be no problem with them, but they will grow day to day, cause there is no rollover anymore.

if you think this won't be a problem i'll remove them. otherwise i can add a rake task for a scheduled cleanup.

@qrush
qrush Dec 22, 2011 Member

Ok, sounds legit. We need to upgrade Redis at some point and that will help with memory usage.

+ $redis.del(*today_keys)
+ end
+
+ def self.today_keys
+ today_keys = $redis.keys("downloads:today:*")
+ today_keys.delete(today_key)
+ today_keys
+ end
+
+ def self.today_key(date_string=Time.zone.today)
+ "downloads:today:#{date_string}"
+ end
+
+ def self.version_history_key(full_name)
+ "downloads:version_history:#{full_name}"
+ end
+
+ def self.rubygem_history_key(name)
+ "downloads:rubygem_history:#{name}"
+ end
+
def self.version_key(full_name)
"downloads:version:#{full_name}"
end
View
@@ -9,7 +9,7 @@ development:
asset_cacher: false
test:
- host: www.example.com
+ host: localhost
local_storage: true
redirector: false
delivery_method: :test
@@ -1,62 +0,0 @@
-namespace "gemcutter:downloads" do
- desc "Daily rollover and aggregation"
- task :rollover => :environment do
- Download.rollover
- end
-
- desc "Migrate from downloads table to redis"
- task :migrate => :environment do
- # create index "index_downloads_on_created_at_and_date" on downloads (version_id, date(created_at));
- # vacuum analyze downloads
- $redis.flushdb
-
- class Dl < ActiveRecord::Base
- set_table_name "downloads"
- include Pacecar
- end
-
- versions = Version.all(:include => :rubygem)
- vmap = {}
- versions.each do |version|
- print "> #{Download.key(version)} "
- puts $redis[Download.key(version)] = version['downloads_count']
- vmap[version.id] = version
- end
-
- puts "*" * 80
-
- rubygems = Rubygem.all
- rubygems.each do |rubygem|
- print ">> #{rubygem} "
- puts $redis[Download.key(rubygem)] = rubygem['downloads']
- end
-
- puts "*" * 80
-
- Dl.created_at_inside(Date.current.to_datetime, Date.current.to_datetime + 1.day).group_by(&:version_id).each do |version_id, downloads|
- name = vmap[version_id].full_name
- count = downloads.size
- puts ">>> #{name}: #{count}"
- $redis.zincrby(Download::TODAY_KEY, count, name)
- end
-
- puts "*" * 80
-
- total = Dl.count
- size = 10_000
- $redis.set Download::COUNT_KEY, total
-
- puts "Converting downloads..."
- Dl.find_in_batches(:select => "id, version_id, date(created_at)", :batch_size => size) do |batch|
- puts ">>>> #{total}"
- batch.group_by(&:version_id).each do |version_id, downloads|
- version = vmap[version_id]
- downloads.group_by { |dl| dl['date'] }.each do |date, dls|
- $redis.hincrby Download.history_key(version), date, dls.size
- $redis.hincrby Download.history_key(version.rubygem), date, dls.size
- end
- end
- total -= size
- end
- end
-end
@@ -92,8 +92,9 @@ class StatsControllerTest < ActionController::TestCase
@other_version = Factory(:version)
@latest_version = Factory(:version, :rubygem => rubygem)
- Download.incr(rubygem.name, @version.full_name)
- Download.rollover
+ Timecop.freeze(1.day.ago) do
+ Download.incr(rubygem.name, @version.full_name)
+ end
5.times { Download.incr(rubygem.name, @latest_version.full_name) }
4.times { Download.incr(rubygem.name, @version.full_name) }
@@ -31,39 +31,6 @@ class DownloadTest < ActiveSupport::TestCase
assert_equal 0, Download.today(other_platform_version)
end
- context "with some gems downloaded" do
- setup do
- @rubygem = Factory(:rubygem)
- @version1 = Factory(:version, :rubygem => @rubygem, :number => "1.0.0")
- @version2 = Factory(:version, :rubygem => @rubygem, :number => "2.0.0")
-
- Download.incr(@rubygem.name, @version1.full_name)
- Download.incr(@rubygem.name, @version2.full_name)
- Download.incr(@rubygem.name, @version2.full_name)
- end
-
- should "roll over downloads into history hashes" do
- Download.rollover
- assert ! $redis.exists(Download::TODAY_KEY)
- assert $redis.exists(Download::YESTERDAY_KEY)
-
- rubygem_history = $redis.hgetall(Download.history_key(@rubygem))
- version1_history = $redis.hgetall(Download.history_key(@version1))
- version2_history = $redis.hgetall(Download.history_key(@version2))
-
- yesterday = 1.day.ago.to_date.to_s
- assert_equal 3, rubygem_history[yesterday].to_i
- assert_equal 1, version1_history[yesterday].to_i
- assert_equal 2, version2_history[yesterday].to_i
- end
-
- should "update the database when we roll over" do
- Download.rollover
-
- assert_equal 3, @rubygem.reload.read_attribute(:downloads)
- end
- end
-
should "find most downloaded today" do
@rubygem_1 = Factory(:rubygem)
@version_1 = Factory(:version, :rubygem => @rubygem_1)
@@ -75,10 +42,12 @@ class DownloadTest < ActiveSupport::TestCase
@rubygem_3 = Factory(:rubygem)
@version_4 = Factory(:version, :rubygem => @rubygem_3)
- Download.incr(@rubygem_1.name, @version_1.full_name)
- Download.incr(@rubygem_1.name, @version_2.full_name)
- Download.incr(@rubygem_2.name, @version_3.full_name)
- Download.rollover
+ Timecop.freeze(Date.today - 1) do
+ Download.incr(@rubygem_1.name, @version_1.full_name)
+ Download.incr(@rubygem_1.name, @version_2.full_name)
+ Download.incr(@rubygem_2.name, @version_3.full_name)
+ end
+
Download.incr(@rubygem_1.name, @version_1.full_name)
3.times { Download.incr(@rubygem_2.name, @version_3.full_name) }
2.times { Download.incr(@rubygem_1.name, @version_2.full_name) }
@@ -142,10 +111,12 @@ class DownloadTest < ActiveSupport::TestCase
@rubygem_3 = Factory(:rubygem)
@version_4 = Factory(:version, :rubygem => @rubygem_3)
- Download.incr(@rubygem_1, @version_1.full_name)
- Download.incr(@rubygem_1, @version_2.full_name)
- Download.incr(@rubygem_2, @version_3.full_name)
- Download.rollover
+ Timecop.freeze(1.day.ago) do
+ Download.incr(@rubygem_1, @version_1.full_name)
+ Download.incr(@rubygem_1, @version_2.full_name)
+ Download.incr(@rubygem_2, @version_3.full_name)
+ end
+
Download.incr(@rubygem_2, @version_3.full_name)
Download.incr(@rubygem_1, @version_1.full_name)
Download.incr(@rubygem_2, @version_3.full_name)
@@ -182,4 +153,16 @@ class DownloadTest < ActiveSupport::TestCase
assert_equal 0, Download.highest_rank([FactoryGirl.build(:version), FactoryGirl.build(:version)])
end
+ should "delete all old today keys except the current" do
+ rubygem = Factory(:rubygem)
+ version = Factory(:version, :rubygem => rubygem)
+ 10.times do |n|
+ Timecop.freeze(n.days.ago) do
+ 3.times { Download.incr(rubygem.name, version.full_name) }
+ end
+ end
+ assert_equal 9, Download.today_keys.size
+ Download.cleanup_today_keys
+ assert_equal 0, Download.today_keys.size
+ end
end
Oops, something went wrong.