Skip to content

Commit

Permalink
Fixes relative date Whoosh queries and adds testing to ensure it rema…
Browse files Browse the repository at this point in the history
…ins working with multiple timezomes
  • Loading branch information
stumpylog committed Jan 31, 2023
1 parent dace08d commit 3b938e6
Show file tree
Hide file tree
Showing 2 changed files with 268 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/documents/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dateutil.parser import isoparse
from django.conf import settings
from django.utils import timezone
from documents.models import Comment
from documents.models import Document
from whoosh import classify
Expand Down Expand Up @@ -262,7 +263,7 @@ def _get_query(self):
["content", "title", "correspondent", "tag", "type", "comments"],
self.searcher.ixreader.schema,
)
qp.add_plugin(DateParserPlugin())
qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
q = qp.parse(q_str)

corrected = self.searcher.correct_query(q, q_str)
Expand Down
266 changes: 266 additions & 0 deletions src/documents/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import urllib.request
import uuid
import zipfile
from datetime import timedelta
from pathlib import Path
from unittest import mock
from unittest.mock import MagicMock
Expand All @@ -23,6 +24,7 @@
from django.contrib.auth.models import User
from django.test import override_settings
from django.utils import timezone
from dateutil.relativedelta import relativedelta
from documents import bulk_edit
from documents import index
from documents.models import Correspondent
Expand Down Expand Up @@ -505,6 +507,270 @@ def test_search_invalid_page(self):
response = self.client.get("/api/documents/?query=content&page=3&page_size=10")
self.assertEqual(response.status_code, 404)

@override_settings(
TIME_ZONE="UTC",
)
def test_search_added_in_last_week(self):
"""
GIVEN:
- Three documents added right now
- The timezone is UTC time
WHEN:
- Query for documents added in the last 7 days
THEN:
- The two recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
)
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)

response = self.client.get("/api/documents/?query=added:[-1 week to now]")
results = response.data["results"]
# Expect 3 documents returned
self.assertEqual(len(results), 3)

for idx, subset in enumerate(
[
{"id": 1, "title": "invoice"},
{"id": 2, "title": "bank statement 1"},
{"id": 3, "title": "bank statement 3"},
],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})

@override_settings(
TIME_ZONE="America/Chicago",
)
def test_search_added_in_last_week_with_timezone_behind(self):
"""
GIVEN:
- Two documents added right now
- One document added over a week ago
- The timezone is behind UTC time (-6)
WHEN:
- Query for documents added in the last 7 days
THEN:
- The two recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# 7 days, 1 hour and 1 minute ago
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
)
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)

response = self.client.get("/api/documents/?query=added:[-1 week to now]")
results = response.data["results"]

# Expect 2 documents returned
self.assertEqual(len(results), 2)

for idx, subset in enumerate(
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})

@override_settings(
TIME_ZONE="Europe/Sofia",
)
def test_search_added_in_last_week_with_timezone_ahead(self):
"""
GIVEN:
- Two documents added right now
- One document added over a week ago
- The timezone is behind UTC time (+2)
WHEN:
- Query for documents added in the last 7 days
THEN:
- The two recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# 7 days, 1 hour and 1 minute ago
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
)
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)

response = self.client.get("/api/documents/?query=added:[-1 week to now]")
results = response.data["results"]

# Expect 2 documents returned
self.assertEqual(len(results), 2)

for idx, subset in enumerate(
[{"id": 1, "title": "invoice"}, {"id": 2, "title": "bank statement 1"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})

def test_search_added_in_last_month(self):
"""
GIVEN:
- One document added right now
- One documents added about a week ago
- One document added over 1 month
WHEN:
- Query for documents added in the last month
THEN:
- The two recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
# 1 month, 1 day ago
added=timezone.now() - relativedelta(months=1, days=1),
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# 7 days, 1 hour and 1 minute ago
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
)

with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)

response = self.client.get("/api/documents/?query=added:[-1 month to now]")
results = response.data["results"]

# Expect 2 documents returned
self.assertEqual(len(results), 2)

for idx, subset in enumerate(
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})

@override_settings(
TIME_ZONE="America/Denver",
)
def test_search_added_in_last_month_timezone_behind(self):
"""
GIVEN:
- One document added right now
- One documents added about a week ago
- One document added over 1 month
- The timezone is behind UTC time (-6 or -7)
WHEN:
- Query for documents added in the last month
THEN:
- The two recent documents are returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
# 1 month, 1 day ago
added=timezone.now() - relativedelta(months=1, days=1),
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# 7 days, 1 hour and 1 minute ago
added=timezone.now() - timedelta(days=7, hours=1, minutes=1),
)

with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)

response = self.client.get("/api/documents/?query=added:[-1 month to now]")
results = response.data["results"]

# Expect 2 documents returned
self.assertEqual(len(results), 2)

for idx, subset in enumerate(
[{"id": 1, "title": "invoice"}, {"id": 3, "title": "bank statement 3"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})

@mock.patch("documents.index.autocomplete")
def test_search_autocomplete(self, m):
m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
Expand Down

0 comments on commit 3b938e6

Please sign in to comment.