Skip to content

Commit

Permalink
HELIO-3888 Log and Measure epub searches
Browse files Browse the repository at this point in the history
  • Loading branch information
sethaj committed May 7, 2021
1 parent 9bff38b commit e0a586b
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 32 deletions.
3 changes: 3 additions & 0 deletions app/controllers/e_pubs_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,13 @@ def search
query = params[:q] || ''
# due to performance issues, must have 3 or more characters to search
return render json: { q: query, search_results: [] } if query.length < 3
start = (Time.now.to_f * 1000.0).to_i
results = Rails.cache.fetch(search_cache_key(@noid, query), expires_in: 30.days) do
epub = EPub::Publication.from_directory(UnpackService.root_path_from_noid(@noid, 'epub'))
epub.search(query)
end
finish = (Time.now.to_f * 1000.0).to_i
EpubSearchLog.create(noid: @noid, query: query, time: finish - start, hits: results[:search_results].count, search_results: results)
render json: results
rescue StandardError => e
Rails.logger.error "EPubsController.search raised #{e}"
Expand Down
6 changes: 6 additions & 0 deletions app/models/epub_search_log.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# frozen_string_literal: true

class EpubSearchLog < ApplicationRecord
include Filterable
scope :query_like, ->(like) { where("query like ?", "%#{like}%") }
end
13 changes: 13 additions & 0 deletions db/migrate/20210507185550_create_epub_search_logs.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class CreateEpubSearchLogs < ActiveRecord::Migration[5.2]
def change
create_table :epub_search_logs do |t|
t.string :noid, index: true
t.text :query
t.integer :time
t.integer :hits
t.mediumtext :search_results

t.timestamps
end
end
end
21 changes: 12 additions & 9 deletions db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2021_04_08_212028) do
ActiveRecord::Schema.define(version: 2021_05_07_185550) do

create_table "api_requests", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.integer "user_id"
Expand Down Expand Up @@ -161,6 +161,17 @@
t.text "toc", limit: 16777215
end

create_table "epub_search_log", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "noid"
t.text "query"
t.integer "time"
t.integer "hits"
t.text "search_results", limit: 16777215
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["noid"], name: "index_epub_search_log_on_noid"
end

create_table "featured_representatives", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "work_id"
t.string "file_set_id"
Expand Down Expand Up @@ -428,14 +439,6 @@
t.index ["grantor_id"], name: "index_proxy_deposit_rights_on_grantor_id"
end

create_table "robotrons", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "ip", null: false
t.integer "hits", default: 0
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["ip"], name: "index_robotrons_on_ip"
end

create_table "roles", id: :integer, options: "ENGINE=InnoDB DEFAULT CHARSET=utf8", force: :cascade do |t|
t.string "resource_type"
t.integer "resource_id"
Expand Down
50 changes: 27 additions & 23 deletions lib/e_pub/search.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require "skylight"

module EPub
class Search
def initialize(publication)
Expand Down Expand Up @@ -65,29 +67,31 @@ def results_from_chapters(db_results, query)
results[:search_results] = []

db_results.each do |chapter|
file = File.join(@publication.root_path, File.dirname(@publication.content_file), chapter[:href])
doc = Nokogiri::XML(File.open(file))
doc.remove_namespaces!

matches = []
body = doc.xpath("//body")
body.children.each do |node|
matches << find_targets(node, query)
end

matches = matches.flatten.compact

matches.each_index do |index|
match = matches[index]

# De-duplicate identical snippets with slightly different CFIs that are neighbors.
# Since we need the CFIs in the reader for syntax highlighting, still send those,
# just not snippets
empty_snippet = "" if match.snippet == matches[index - 1].snippet && matches.length > 1

results[:search_results].push(cfi: "#{chapter[:basecfi]}#{match.cfi}",
title: chapter[:title],
snippet: empty_snippet || match.snippet)
Skylight.instrument title: "EpubSearch Text In Each Chapter" do
file = File.join(@publication.root_path, File.dirname(@publication.content_file), chapter[:href])
doc = Nokogiri::XML(File.open(file))
doc.remove_namespaces!

matches = []
body = doc.xpath("//body")
body.children.each do |node|
matches << find_targets(node, query)
end

matches = matches.flatten.compact

matches.each_index do |index|
match = matches[index]

# De-duplicate identical snippets with slightly different CFIs that are neighbors.
# Since we need the CFIs in the reader for syntax highlighting, still send those,
# just not snippets
empty_snippet = "" if match.snippet == matches[index - 1].snippet && matches.length > 1

results[:search_results].push(cfi: "#{chapter[:basecfi]}#{match.cfi}",
title: chapter[:title],
snippet: empty_snippet || match.snippet)
end
end
end
results
Expand Down
6 changes: 6 additions & 0 deletions spec/controllers/e_pubs_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@
# to get search highlighting
@snippets = JSON.parse(response.body)["search_results"].map { |result| result["snippet"].presence }.compact
expect(@snippets.length).to eq 102
expect(EpubSearchLog.first.noid).to eq file_set.id
expect(EpubSearchLog.first.query).to eq "White Whale"
expect(EpubSearchLog.first.hits).to eq 105
end
end

Expand Down Expand Up @@ -312,6 +315,9 @@
expect(response).to have_http_status(:success)
expect(JSON.parse(response.body)["q"]).to eq "glubmerschmup"
expect(JSON.parse(response.body)["search_results"]).to eq []
expect(EpubSearchLog.first.noid).to eq file_set.id
expect(EpubSearchLog.first.query).to eq "glubmerschmup"
expect(EpubSearchLog.first.hits).to eq 0
end
end
end
Expand Down

0 comments on commit e0a586b

Please sign in to comment.