Skip to content

Commit

Permalink
updated models for normalized data. #213
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Mar 23, 2015
1 parent 8ccaf24 commit d2e94f6
Show file tree
Hide file tree
Showing 38 changed files with 241 additions and 193 deletions.
9 changes: 5 additions & 4 deletions app/models/api_response.rb
Expand Up @@ -11,18 +11,19 @@ class ApiResponse < ActiveRecord::Base
scope :unresolved, -> { where("unresolved = ?", true) }
scope :filter, ->(id) { where("unresolved = ?", true).where("id <= ?", id) }
scope :total, ->(duration) { where(created_at: (Time.zone.now.beginning_of_hour - duration.hours)..Time.zone.now.beginning_of_hour) }
scope :decreasing, ->(source_ids) { where("event_count < previous_count").where(skipped: false).where(source_id: source_ids) }
scope :increasing, ->(number, source_ids) { where("update_interval IS NOT NULL").where("((event_count - previous_count) / update_interval) >= ?", number).where(source_id: source_ids) }
scope :decreasing, ->(source_ids) { where("total < previous_total").where(skipped: false).where(source_id: source_ids) }
scope :increasing, ->(number, source_ids) { where("update_interval IS NOT NULL").where("((total - previous_total) / update_interval) >= ?", number).where(source_id: source_ids) }
scope :slow, ->(number) { where("duration >= ?", number * 1000).where(skipped: false) }
scope :work_not_updated, ->(number) { where("event_count IS NULL").where("update_interval >= ?", number) }
scope :ratio, ->(number) { where("if(pdf, html / pdf, 0) >= ?", number).where(skipped: false) }
scope :work_not_updated, ->(number) { where("total IS NULL").where("update_interval >= ?", number) }
scope :source_not_updated, ->(number) { where("update_interval >= ?", number) }

# we need integer division, which is handled differently by MySQL and Postgres. Workaround is to use FLOOR.
scope :citation_milestone, ->(number, source_ids) {
if number == 0
limit(0)
else
where("FLOOR(event_count / ?) > FLOOR(previous_count / ?)", number, number).where("source_id IN (?)", source_ids)
where("FLOOR(total / ?) > FLOOR(previous_total / ?)", number, number).where("source_id IN (?)", source_ids)
end
}

Expand Down
3 changes: 0 additions & 3 deletions app/models/concerns/couchable.rb
Expand Up @@ -88,9 +88,6 @@ def get_lagotto_database

def put_lagotto_database
put_lagotto_data(ENV['COUCHDB_URL'])
filter = Faraday::UploadIO.new('design_doc/filter.json', 'application/json')
put_lagotto_data("#{ENV['COUCHDB_URL']}/_design/filter", data: filter)

reports = Faraday::UploadIO.new('design_doc/reports.json', 'application/json')
put_lagotto_data("#{ENV['COUCHDB_URL']}/_design/reports", data: reports)
end
Expand Down
4 changes: 2 additions & 2 deletions app/models/concerns/countable.rb
Expand Up @@ -11,13 +11,13 @@ def event_count
if ActionController::Base.perform_caching
Rails.cache.read("#{name}/event_count/#{update_date}").to_i
else
retrieval_statuses.sum(:event_count)
retrieval_statuses.sum(:total)
end
end

def event_count=(timestamp)
Rails.cache.write("#{name}/event_count/#{timestamp}",
retrieval_statuses.sum(:event_count))
retrieval_statuses.sum(:total))
end

def work_count
Expand Down
2 changes: 1 addition & 1 deletion app/models/concerns/networkable.rb
Expand Up @@ -85,7 +85,7 @@ def faraday_conn(content_type = 'json')
Faraday.new do |c|
c.headers['Accept'] = accept_header
c.headers['User-Agent'] = "Lagotto #{Rails.application.config.version} - http://#{ENV['SERVERNAME']}"
c.use FaradayMiddleware::FollowRedirects, :limit => 10, :cookie => :all
c.use FaradayMiddleware::FollowRedirects, limit: 10, cookie: :all
c.request :multipart
c.request :json if accept_header == 'application/json'
c.use Faraday::Response::RaiseError
Expand Down
22 changes: 22 additions & 0 deletions app/models/day.rb
@@ -0,0 +1,22 @@
class Day < ActiveRecord::Base
belongs_to :source
belongs_to :work
belongs_to :retrieval_status

scope :past, -> { where.not(year: Time.zone.now.year,
month: Time.zone.now.month,
day: Time.zone.now.day) }

# summary metrics, removing nil
def metrics
{ year: year,
month: month,
day: day,
pdf: pdf,
html: html,
readers: readers,
comments: comments,
likes: likes,
total: total }.compact
end
end
2 changes: 1 addition & 1 deletion app/models/doc.rb
Expand Up @@ -8,7 +8,7 @@ def self.all
end

def self.find(param)
name = all.find { |doc| doc.downcase == "#{param.downcase}.md" }
name = all.find { |doc| doc.downcase.match(/#{param.downcase}\.[md|html]/) }
if name.present?
new(name)
else
Expand Down
3 changes: 3 additions & 0 deletions app/models/event.rb
@@ -0,0 +1,3 @@
class Event < ActiveRecord::Base

end
2 changes: 1 addition & 1 deletion app/models/filters/citation_milestone_alert.rb
Expand Up @@ -9,7 +9,7 @@ def run_filter(state)
{ source_id: response.source_id,
work_id: response.work_id,
level: Alert::INFO,
message: "Work has been cited #{response.event_count} times" }
message: "Work has been cited #{response.total} times" }
end
raise_alerts(responses)
end
Expand Down
2 changes: 1 addition & 1 deletion app/models/filters/event_count_decreasing_error.rb
Expand Up @@ -9,7 +9,7 @@ def run_filter(state)
{ source_id: response.source_id,
work_id: response.work_id,
level: Alert::INFO,
message: "Event count decreased from #{response.previous_count} to #{response.event_count}" }
message: "Event count decreased from #{response.previous_total} to #{response.total}" }
end
raise_alerts(responses)
end
Expand Down
Expand Up @@ -9,7 +9,7 @@ def run_filter(state)
{ source_id: response.source_id,
work_id: response.work_id,
level: Alert::INFO,
message: "Event count increased by #{response.event_count - response.previous_count} in #{response.update_interval} day(s)" }
message: "Event count increased by #{response.total - response.previous_total} in #{response.update_interval} day(s)" }
end
raise_alerts(responses)
end
Expand Down
29 changes: 17 additions & 12 deletions app/models/filters/html_ratio_too_high_error.rb
Expand Up @@ -2,27 +2,32 @@

class HtmlRatioTooHighError < Filter
def run_filter(state)
source = Source.where(name: "counter").first
first_response = ApiResponse.filter(state[:id]).first
responses = first_response.get_html_ratio
responses = ApiResponse.filter(state[:id]).slow(limit)

if responses.count > 0
responses = responses.map do |response|
doi = response['id'] && response['id'][8..-1]
work = Work.where(doi: doi).first
work_id = work && work.id
date = Time.zone.now.to_date.to_formatted_s(:short)

{ source_id: source.id,
work_id: work_id,
responses = responses.to_a.map do |response|
{ source_id: response.source_id,
work_id: response.work_id,
level: Alert::INFO,
message: "HTML/PDF ratio is #{response['value']['ratio']} with #{response['value']['html']} HTML views on #{date}" }
message: "HTML/PDF ratio is #{response.html / response.pdf} with #{response.html} views" }
end
raise_alerts(responses)
end

responses.count
end

def get_config_fields
[{ field_name: "limit", field_type: "text_field", field_hint: "Raise an error if html to pdf ratio is higher than the specified value." }]
end

def limit
config.limit || 50
end

def source_ids
config.source_ids || Source.active.joins(:group).where("groups.name" => 'viewed').pluck(:id)
end
end

module Exceptions
Expand Down
165 changes: 70 additions & 95 deletions app/models/history.rb
Expand Up @@ -2,23 +2,23 @@

class History
# we can get data_from_source in 3 different formats
# - hash with event_count == 0: SUCCESS NO DATA
# - hash with event_count > 0: SUCCESS
# - hash with event_count nil or error: ERROR
# - hash with total == 0: SUCCESS NO DATA
# - hash with total > 0: SUCCESS
# - hash with total nil or error: ERROR
#
# SUCCESS NO DATA
# The source knows about the work identifier, but returns an event_count of 0
# The source knows about the work identifier, but returns an total of 0
#
# SUCCESS
# The source knows about the work identifier, and returns an event_count > 0
# The source knows about the work identifier, and returns an total > 0
#
# ERROR
# An error occured, typically 408 (Request Timeout), 403 (Too Many Requests) or 401 (Unauthorized)
# It could also be an error in our code. 404 (Not Found) errors are handled as SUCCESS NO DATA
# We don't update retrieval status and set skipped to true,
# so that the request is repeated later. We could get stuck, but we see this in alerts
#
# This class returns a hash in the format event_count: 12, previous_count: 8, skipped: false, update_interval: 31
# This class returns a hash in the format total: 12, previous_total: 8, skipped: false, update_interval: 31
# This hash can be used to track API responses, e.g. when event counts go down

# include HTTP request helpers
Expand All @@ -27,21 +27,27 @@ class History
# include metrics helpers
include Measurable

attr_accessor :retrieval_status, :works, :event_count, :previous_count, :previous_retrieved_at, :event_metrics, :events_by_day, :events_by_month, :events_url, :status, :rs_rev, :rh_rev, :data
attr_accessor :retrieval_status, :works, :total, :pdf, :html, :readers, :comments, :likes, :extra, :previous_total, :previous_retrieved_at, :event_metrics, :events_by_day, :events_by_month, :events_url, :status, :rs_rev, :rh_rev, :data

def initialize(rs_id, data = {})
@retrieval_status = RetrievalStatus.find(rs_id)
@previous_count = retrieval_status.event_count
@previous_total = retrieval_status.total
@previous_retrieved_at = retrieval_status.retrieved_at

@status = case
when data[:error] then :error
when data[:event_count].nil? then :error
when data[:event_count] > 0 then :success
when data[:event_count] == 0 then :success_no_data
when data[:error] || data[:total].nil? then :error
when data[:total] > 0 then :success
when data[:total] == 0 then :success_no_data
end

@event_count = data[:event_count]
@pdf = data.fetch(:pdf, nil)
@html = data.fetch(:html, nil)
@readers = data.fetch(:readers, nil)
@comments = data.fetch(:comments, nil)
@likes = data.fetch(:likes, nil)
@total = data.fetch(:total, nil).to_i

@extra = data.fetch(:extra, nil)

if not_error?
@event_metrics = data[:event_metrics] || get_event_metrics(citations: 0)
Expand Down Expand Up @@ -70,91 +76,70 @@ def save_to_retrieval_statuses
retrieval_status.update_attributes(retrieved_at: retrieved_at,
scheduled_at: retrieval_status.stale_at,
queued_at: nil,
event_count: event_count,
total: total,
pdf: pdf,
html: html,
readers: readers,
comments: comments,
likes: likes,
event_metrics: event_metrics,
events_url: events_url)
events_url: events_url,
extra: extra)
end

def save_to_works
@works.map { |item| Work.find_or_create(item) }
works.map { |item| Work.find_or_create(item) }
end

def save_to_days
@events_by_day.map { |item| Day.where(retrieval_status_id: retrieval_status.id,
day: item["day"],
month: item["month"],
year: item["year"]).first_or_create(
work_id: retrieval_status.work_id,
source_id: retrieval_status.source_id,
total_count: item["total_count"],
html_count: item["html_count"],
pdf_count: item["pdf_count"]) }
Array(events_by_day).map { |item| Day.where(retrieval_status_id: retrieval_status.id,
day: item[:day],
month: item[:month],
year: item[:year]).first_or_create(
work_id: retrieval_status.work_id,
source_id: retrieval_status.source_id,
total: item[:total],
pdf: item[:pdf],
html: item[:html],
readers: item[:readers],
comments: item[:comments],
likes: item[:likes]) }
end

def save_to_months
@events_by_month.map { |item| Month.where(retrieval_status_id: retrieval_status.id,
month: item["month"],
year: item["year"]).first_or_create(
work_id: retrieval_status.work_id,
source_id: retrieval_status.source_id,
total_count: item["total_count"],
html_count: item["html_count"],
pdf_count: item["pdf_count"]) }
Array(events_by_month).map { |item| Month.where(retrieval_status_id: retrieval_status.id,
month: item[:month],
year: item[:year]).first_or_create(
work_id: retrieval_status.work_id,
source_id: retrieval_status.source_id,
total: item[:total],
pdf: item[:pdf],
html: item[:html],
readers: item[:readers],
comments: item[:comments],
likes: item[:likes]) }
end

def get_events_by_day(event_arr = nil)
event_arr = Array(event_arr)

def get_events_by_day
# track daily events only the first 30 days after publication
# return entry for older works
return event_arr if today - retrieval_status.work.published_on > 30

# count entries not including the current day
event_arr.delete_if { |item| item['day'] == today.day && item['month'] == today.month && item['year'] == today.year }

if ['counter', 'pmc', 'copernicus', 'figshare'].include?(retrieval_status.source.name)
html = event_metrics[:html] - event_arr.reduce(0) { |sum, item| sum + item['html'] }
pdf = event_metrics[:pdf] - event_arr.reduce(0) { |sum, item| sum + item['pdf'] }
return nil if today - retrieval_status.work.published_on > 30

item = { 'year' => today.year,
'month' => today.month,
'day' => today.day,
'html_count' => html,
'pdf_count' => pdf }
else
total = event_count - event_arr.reduce(0) { |sum, item| sum + item['total'] }
item = { 'year' => today.year,
'month' => today.month,
'day' => today.day,
'total_count' => total }
end

event_arr << item
hsh = get_new_events(retrieval_status.days.past)
[hsh.merge(year: today.year, month: today.month, day: today.day)]
end

def get_events_by_month(event_arr = nil)
event_arr = Array(event_arr)

# count entries not including the current month
event_arr.delete_if { |item| item['month'] == today.month && item['year'] == today.year }

if ['copernicus', 'figshare'].include?(retrieval_status.source.name)
html = event_metrics[:html] - event_arr.reduce(0) { |sum, item| sum + item['html'] }
pdf = event_metrics[:pdf] - event_arr.reduce(0) { |sum, item| sum + item['pdf'] }

item = { 'year' => today.year,
'month' => today.month,
'html_count' => html,
'pdf_count' => pdf }
else
total = event_count - event_arr.reduce(0) { |sum, item| sum + item['total'] }

item = { 'year' => today.year,
'month' => today.month,
'total_count' => total }
end
def get_events_by_month
hsh = get_new_events(retrieval_status.months.past)
[hsh.merge(year: today.year, month: today.month)]
end

event_arr << item
def get_new_events(rows)
{ pdf: pdf.nil? ? nil : pdf - rows.sum(:pdf),
html: html.nil? ? nil : html - rows.sum(:html),
readers: readers.nil? ? nil : readers - rows.sum(:readers),
comments: comments.nil? ? nil : comments - rows.sum(:comments),
likes: likes.nil? ? nil : likes - rows.sum(:likes),
total: total.nil? ? nil : total - rows.sum(:total) }.compact
end

def not_error?
Expand Down Expand Up @@ -186,21 +171,11 @@ def retrieved_at
Time.zone.now
end

def data
{ pid: retrieval_status.work.pid,
retrieved_at: retrieved_at,
source: retrieval_status.source.name,
events: events,
events_url: events_url,
event_metrics: event_metrics,
events_by_day: events_by_day,
events_by_month: events_by_month,
doc_type: "current" }
end

def to_hash
{ event_count: event_count,
previous_count: previous_count,
{ total: total,
html: html,
pdf: pdf,
previous_total: previous_total,
skipped: skipped,
update_interval: update_interval }
end
Expand Down

0 comments on commit d2e94f6

Please sign in to comment.