From 13959e7be9a2a45a65bfffb97d4185d5f59e19b6 Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Sat, 10 Aug 2024 00:26:55 -0700 Subject: [PATCH 1/3] Full-text search on Postgres --- app/controllers/messages_controller.rb | 13 +++++++++++-- app/views/messages/index.html.erb | 9 +++------ db/migrate/20240810063115_add_pg_trgm.rb | 5 +++++ db/migrate/20240810064037_add_trigram_index.rb | 15 +++++++++++++++ db/schema.rb | 3 ++- 5 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 db/migrate/20240810063115_add_pg_trgm.rb create mode 100644 db/migrate/20240810064037_add_trigram_index.rb diff --git a/app/controllers/messages_controller.rb b/app/controllers/messages_controller.rb index ec84559..13afd85 100644 --- a/app/controllers/messages_controller.rb +++ b/app/controllers/messages_controller.rb @@ -3,12 +3,21 @@ class MessagesController < ApplicationController # GET /messages or /messages.json def index - @messages = Message.all + query = params[:q] + if query + @messages = Message.where('body like ?', "%#{query}%") + else + @messages = Message.all + end end # GET /messages/1 or /messages/1.json def show - @message = Message.from_s3(params[:list_name], params[:list_seq]) + if params[:id] + @message = Message.find(params[:id]) + else + @message = Message.from_s3(params[:list_name], params[:list_seq]) + end end # GET /messages/new diff --git a/app/views/messages/index.html.erb b/app/views/messages/index.html.erb index 809266a..4b5d127 100644 --- a/app/views/messages/index.html.erb +++ b/app/views/messages/index.html.erb @@ -2,13 +2,10 @@

Messages

-
+
+ <%= link_to "New message", new_message_path %> diff --git a/db/migrate/20240810063115_add_pg_trgm.rb b/db/migrate/20240810063115_add_pg_trgm.rb new file mode 100644 index 0000000..9629962 --- /dev/null +++ b/db/migrate/20240810063115_add_pg_trgm.rb @@ -0,0 +1,5 @@ +class AddPgTrgm < ActiveRecord::Migration[7.1] + def change + enable_extension 'pg_trgm' + end +end diff --git a/db/migrate/20240810064037_add_trigram_index.rb b/db/migrate/20240810064037_add_trigram_index.rb new file mode 100644 index 0000000..291a602 --- /dev/null +++ b/db/migrate/20240810064037_add_trigram_index.rb @@ -0,0 +1,15 @@ +class AddTrigramIndex < ActiveRecord::Migration[7.1] + def up + # According to https://www.postgresql.org/docs/9.1/textsearch-indexes.html + # + # > As a rule of thumb, GIN indexes are best for static data because + # > lookups are faster. For dynamic data, GiST indexes are faster to update. + # + # So we use GIN here instead of GiST. + add_index(:messages, :body, using: :gin, opclass: { body: :gin_trgm_ops }) + end + + def down + remove_index(:messages, :body) + end +end diff --git a/db/schema.rb b/db/schema.rb index 3330b7b..fcde555 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,13 +10,14 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_08_10_040807) do +ActiveRecord::Schema[7.1].define(version: 2024_08_10_064037) do create_table "messages", force: :cascade do |t| t.string "subject" t.string "from" t.text "body" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.index ["body"], name: "index_messages_on_body" end end From 41f1822ad7d39e7cae87a2bb24223f536ffd94f7 Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Fri, 9 Aug 2024 23:18:00 -0700 Subject: [PATCH 2/3] Handle invalid UTF-8 chars --- app/models/message.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/message.rb b/app/models/message.rb index 87d0bf4..56c9368 100644 --- a/app/models/message.rb +++ b/app/models/message.rb @@ -9,7 +9,7 @@ def self.from_s3(list_name, list_seq) end def self.from_string(str) - hs, body = str.split(/\n\n/, 2) + hs, body = str.encode('utf-8', invalid: :replace).split(/\n\n/, 2) headers = hs.split(/\n/).map { |line| line.split(/:\s+/, 2) }.to_h From 38fd78006a8c7f182de81e8a99d1d229c69dcee5 Mon Sep 17 00:00:00 2001 From: Kazuyoshi Kato Date: Sat, 10 Aug 2024 00:46:15 -0700 Subject: [PATCH 3/3] Use pg_trgm's operator to sort results correctly --- app/controllers/messages_controller.rb | 7 ++++++- app/views/messages/index.html.erb | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/app/controllers/messages_controller.rb b/app/controllers/messages_controller.rb index 13afd85..2c38c38 100644 --- a/app/controllers/messages_controller.rb +++ b/app/controllers/messages_controller.rb @@ -5,7 +5,12 @@ class MessagesController < ApplicationController def index query = params[:q] if query - @messages = Message.where('body like ?', "%#{query}%") + # %> and <-> are defined by pg_trgm. + # https://www.postgresql.org/docs/17/pgtrgm.html + @messages = Message.find_by_sql([ + 'SELECT * FROM messages WHERE body %> ? ORDER BY body <-> ? LIMIT 20', + query, query, + ]) else @messages = Message.all end diff --git a/app/views/messages/index.html.erb b/app/views/messages/index.html.erb index 4b5d127..0d32916 100644 --- a/app/views/messages/index.html.erb +++ b/app/views/messages/index.html.erb @@ -4,7 +4,10 @@
    <% @messages.each do |message| %> -
  • <%= link_to message.subject, message %>
  • +
  • + <%= link_to message.subject, message %> + <%= message.body %> +
  • <% end %>