Permalink
Browse files

Added PostgreSQL powered search

Heavily inspired by http://blog.lotech.org/postgres-full-text-search-with-django.html

Each content type now has a search_document field on it. These are currently
populated using the ./manage.py reindex_all command, but will soon be
populated by triggers instead.

the /search/?q=django page returns paginated search results.
  • Loading branch information...
simonw committed Sep 30, 2017
1 parent 1ef7b3a commit 7e3a02178e3ca71c464ae68a3b68d70e5fa66692
Showing with 194 additions and 3 deletions.
  1. +35 −0 blog/management/commands/reindex_all.py
  2. +31 −0 blog/migrations/0005_search_document.py
  3. +32 −0 blog/models.py
  4. +57 −1 blog/views.py
  5. +4 −2 config/urls.py
  6. +35 −0 templates/search.html
@@ -0,0 +1,35 @@
from django.core.management.base import BaseCommand
from django.db.models import Value, F, Func
from django.contrib.postgres.search import SearchVector
from blog.models import Entry, Blogmark, Quotation
class Command(BaseCommand):
help = "Re-indexes all entries, blogmarks, quotations"
def handle(self, *args, **kwargs):
print 'entries', Entry.objects.update(search_document=entry_vector_fields_only)
print 'blogmarks', Blogmark.objects.update(search_document=blogmark_vector_fields_only)
print 'quotations', Quotation.objects.update(search_document=quotation_vector_fields_only)
def strip_tags_func(field):
return Func(
F(field), Value('<.*?>'), Value(''), Value('g'), function='regexp_replace'
)
entry_vector_fields_only = (
SearchVector('title', weight='A') +
SearchVector(strip_tags_func('body'), weight='C')
)
blogmark_vector_fields_only = (
SearchVector('link_title', weight='A') +
SearchVector(strip_tags_func('commentary'), weight='C')
)
quotation_vector_fields_only = (
SearchVector('source', weight='A') +
SearchVector('quotation', weight='B')
)
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.5 on 2017-09-30 20:45
from __future__ import unicode_literals
import django.contrib.postgres.search
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('blog', '0004_metadata_json'),
]
operations = [
migrations.AddField(
model_name='blogmark',
name='search_document',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddField(
model_name='entry',
name='search_document',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
migrations.AddField(
model_name='quotation',
name='search_document',
field=django.contrib.postgres.search.SearchVectorField(null=True),
),
]
View
@@ -4,6 +4,7 @@
from django.contrib.contenttypes.models import ContentType
from django.contrib.contenttypes.fields import GenericForeignKey
from django.contrib.postgres.fields import JSONField
from django.contrib.postgres.search import SearchVectorField
from django.utils.html import escape
import re
from xml.etree import ElementTree
@@ -78,6 +79,7 @@ class BaseModel(models.Model):
tags = models.ManyToManyField(Tag, blank=True)
slug = models.SlugField(max_length=64)
metadata = JSONField()
search_document = SearchVectorField(null=True)
def tag_summary(self):
return u' '.join(t.tag for t in self.tags.all())
@@ -280,3 +282,33 @@ def spam_status_options(self):
class Meta:
ordering = ('-created',)
get_latest_by = 'created'
def load_mixed_objects(dicts):
"""
Takes a list of dictionaries, each of which must at least have a 'type'
and a 'pk' key. Returns a list of ORM objects of those various types.
Each returned ORM object has a .original_dict attribute populated.
"""
to_fetch = {}
for d in dicts:
to_fetch.setdefault(d['type'], set()).add(d['pk'])
fetched = {}
for key, model in (
('blogmark', Blogmark),
('entry', Entry),
('quotation', Quotation),
):
ids = to_fetch.get(key) or []
objects = model.objects.filter(pk__in=ids)
for obj in objects:
fetched[(key, obj.pk)] = obj
# Build list in same order as dicts argument
to_return = []
for d in dicts:
item = fetched.get((d['type'], d['pk'])) or None
if item:
item.original_dict = d
to_return.append(item)
return to_return
View
@@ -2,7 +2,9 @@
from django.utils.dates import MONTHS_3_REV
from django.utils.timezone import utc
from django.contrib.admin.views.decorators import staff_member_required
from django.contrib.postgres.search import SearchQuery, SearchRank
from django.views.decorators.cache import never_cache
from django.db import models
from django.conf import settings
from django.core.paginator import (
Paginator,
@@ -19,8 +21,10 @@
Quotation,
Photo,
Photoset,
Tag
Tag,
load_mixed_objects,
)
import time
import datetime
import itertools
import CloudFlare
@@ -381,3 +385,55 @@ def tools(request):
return render(request, 'tools.html', {
'msg': request.GET.get('msg')
})
def search(request):
q = request.GET.get('q')
if q:
return search_results(request, q)
else:
return render(request, 'search.html')
def search_results(request, q):
start = time.time()
query = SearchQuery(q)
rank_annotation = SearchRank(models.F('search_document'), query)
qs = Entry.objects.annotate(
rank=rank_annotation,
type=models.Value('entry', output_field=models.CharField())
).filter(search_document=query).values('pk', 'type', 'created', 'rank').union(
Blogmark.objects.annotate(
rank=rank_annotation,
type=models.Value('blogmark', output_field=models.CharField())
).filter(search_document=query).values('pk', 'type', 'created', 'rank'),
Quotation.objects.annotate(
rank=rank_annotation,
type=models.Value('quotation', output_field=models.CharField())
).filter(search_document=query).values('pk', 'type', 'created', 'rank'),
)
paginator = Paginator(qs, 40)
page_number = request.GET.get('page') or '1'
try:
page = paginator.page(page_number)
except PageNotAnInteger:
raise Http404
except EmptyPage:
raise Http404
results = []
for obj in load_mixed_objects(page.object_list):
results.append({
'type': obj.original_dict['type'],
'rank': obj.original_dict['rank'],
'obj': obj,
})
end = time.time()
return render(request, 'search.html', {
'q': q,
'results': results,
'total': paginator.count,
'page': page,
'duration': end - start,
})
View
@@ -16,11 +16,13 @@ def static_redirect(request):
url(r'^(\d{4})/(\w{3})/(\d{1,2})/$', blog_views.archive_day),
url(r'^(\d{4})/(\w{3})/(\d{1,2})/([\-\w]+)/$', blog_views.archive_item),
url(r'^search/$', blog_views.search),
url(r'^tags/$', blog_views.tag_index),
url(r'^tags/(.*?)/$', blog_views.archive_tag),
url(r'^tools/$', blog_views.tools),
url(r'^write/$', blog_views.write),
# (r'^about/$', blog_views.about),
url(r'^tags/$', blog_views.tag_index),
url(r'^tags/(.*?)/$', blog_views.archive_tag),
url(r'^admin/', include(admin.site.urls)),
url(r'^static/', static_redirect),
View
@@ -0,0 +1,35 @@
{% extends "item_base.html" %}
{% block title %}Search{% if q %} for “{{ q }}”{% endif %}{% endblock %}
{% block item_content %}
{% load blog_tags %}
<h2>Search{% if q %} for “{{ q }}”{% endif %}</h2>
<form action="{{ request.path }}" method="GET">
<input type="search" name="q" value="{{ q }}" style="width: 80%">
<input type="submit" value="Search">
</form>
<br>
{% if total %}
<p><strong>{{ total }} result{{ total|pluralize }}</strong></p>
{% blog_mixed_list_with_dates results %}
{% if page.paginator.num_pages > 1 %}
<div class="pagination">
<span class="step-links">
{% if page.has_previous %}
<a href="?q={{ q|urlencode }}&amp;page={{ page.previous_page_number }}">&laquo; previous</a>
{% endif %}
<span class="current">
Page {{ page.number }} / {{ page.paginator.num_pages }}
</span>
{% if page.has_next %}
<a href="?q={{ q|urlencode }}&amp;page={{ page.next_page_number }}">next &raquo;</a>
{% endif %}
</span>
</div>
{% endif %}
{% endif %}
{% endblock %}

0 comments on commit 7e3a021

Please sign in to comment.