Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ and this project adheres to
- ♻️(frontend) preserve @ character when esc is pressed after typing it #1512
- ♻️(frontend) make summary button fixed to remain visible during scroll #1581
- ♻️(frontend) pdf embed use full width #1526
- 🚸(backend) use unaccented full name for user search #1637

### Fixed

Expand Down
20 changes: 14 additions & 6 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""API endpoints"""

# pylint: disable=too-many-lines

import base64
Expand All @@ -18,7 +19,7 @@
from django.db import connection, transaction
from django.db import models as db
from django.db.models.expressions import RawSQL
from django.db.models.functions import Left, Length
from django.db.models.functions import Greatest, Left, Length
from django.http import Http404, StreamingHttpResponse
from django.urls import reverse
from django.utils import timezone
Expand All @@ -37,6 +38,7 @@
from rest_framework.permissions import AllowAny

from core import authentication, choices, enums, models
from core.api.filters import remove_accents
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.services.converter_services import (
Expand Down Expand Up @@ -188,13 +190,15 @@ def get_queryset(self):
queryset = queryset.exclude(documentaccess__document_id=document_id)

filter_data = filterset.form.cleaned_data
query = filter_data["q"]
query = remove_accents(filter_data["q"])

# For emails, match emails by Levenstein distance to prevent typing errors
if "@" in query:
return (
queryset.annotate(
distance=RawSQL("levenshtein(email::text, %s::text)", (query,))
distance=RawSQL(
"levenshtein(unaccent(email::text), %s::text)", (query,)
)
)
.filter(distance__lte=3)
.order_by("distance", "email")[: settings.API_USERS_LIST_LIMIT]
Expand All @@ -203,11 +207,15 @@ def get_queryset(self):
# Use trigram similarity for non-email-like queries
# For performance reasons we filter first by similarity, which relies on an
# index, then only calculate precise similarity scores for sorting purposes

return (
queryset.filter(email__trigram_word_similar=query)
.annotate(similarity=TrigramSimilarity("email", query))
queryset.annotate(
sim_email=TrigramSimilarity("email", query),
sim_name=TrigramSimilarity("full_name", query),
)
.annotate(similarity=Greatest("sim_email", "sim_name"))
.filter(similarity__gt=0.2)
.order_by("-similarity", "email")[: settings.API_USERS_LIST_LIMIT]
.order_by("-similarity")[: settings.API_USERS_LIST_LIMIT]
)

@drf.decorators.action(
Expand Down
37 changes: 37 additions & 0 deletions src/backend/core/migrations/0027_auto_20251120_0956.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Generated by Django 5.2.8 on 2025-11-20 09:56

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("core", "0026_comments"),
]

operations = [
migrations.RunSQL(
sql="""
CREATE OR REPLACE FUNCTION public.immutable_unaccent(regdictionary, text)
RETURNS text
LANGUAGE c IMMUTABLE PARALLEL SAFE STRICT AS
'$libdir/unaccent', 'unaccent_dict';

CREATE OR REPLACE FUNCTION public.f_unaccent(text)
RETURNS text
LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT
RETURN public.immutable_unaccent(regdictionary 'public.unaccent', $1);

CREATE INDEX IF NOT EXISTS user_email_unaccent_trgm_idx
ON impress_user
USING gin (f_unaccent(email) gin_trgm_ops);

CREATE INDEX IF NOT EXISTS user_full_name_unaccent_trgm_idx
ON impress_user
USING gin (f_unaccent(full_name) gin_trgm_ops);
""",
reverse_sql="""
DROP INDEX IF EXISTS user_email_unaccent_trgm_idx;
DROP INDEX IF EXISTS user_full_name_unaccent_trgm_idx;
""",
),
]
125 changes: 125 additions & 0 deletions src/backend/core/tests/test_api_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,131 @@ def test_api_users_list_query_email():
assert user_ids == []


def test_api_users_list_query_email_with_internationalized_domain_names():
"""
Authenticated users should be able to list users and filter by email.
It should work even if the email address contains an internationalized domain name.
"""
user = factories.UserFactory()

client = APIClient()
client.force_login(user)

jean = factories.UserFactory(email="jean.martin@éducation.fr")
marie = factories.UserFactory(email="marie.durand@education.fr")
kurokawa = factories.UserFactory(email="contact@黒川.日本")

response = client.get("/api/v1.0/users/?q=jean.martin@education.fr")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(jean.id)]

response = client.get("/api/v1.0/users/?q=jean.martin@éducation.fr")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(jean.id)]

response = client.get("/api/v1.0/users/?q=marie.durand@education.fr")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(marie.id)]

response = client.get("/api/v1.0/users/?q=marie.durand@éducation.fr")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(marie.id)]

response = client.get("/api/v1.0/users/?q=contact@黒川.日本")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(kurokawa.id)]


def test_api_users_list_query_full_name():
"""
Authenticated users should be able to list users and filter by full name.
Only results with a Trigram similarity greater than 0.2 with the query should be returned.
"""
user = factories.UserFactory()

client = APIClient()
client.force_login(user)

dave = factories.UserFactory(email="contact@work.com", full_name="David Bowman")

response = client.get(
"/api/v1.0/users/?q=David",
)
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(dave.id)]

response = client.get("/api/v1.0/users/?q=Bowman")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(dave.id)]

response = client.get("/api/v1.0/users/?q=bowman")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(dave.id)]

response = client.get("/api/v1.0/users/?q=BOWMAN")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(dave.id)]

response = client.get("/api/v1.0/users/?q=BoWmAn")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(dave.id)]

response = client.get("/api/v1.0/users/?q=Bovin")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == []


def test_api_users_list_query_accented_full_name():
"""
Authenticated users should be able to list users and filter by full name with accents.
Only results with a Trigram similarity greater than 0.2 with the query should be returned.
"""
user = factories.UserFactory()

client = APIClient()
client.force_login(user)

fred = factories.UserFactory(
email="contact@work.com", full_name="Frédérique Lefèvre"
)

response = client.get("/api/v1.0/users/?q=Frédérique")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(fred.id)]

response = client.get("/api/v1.0/users/?q=Frederique")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(fred.id)]

response = client.get("/api/v1.0/users/?q=Lefèvre")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(fred.id)]

response = client.get("/api/v1.0/users/?q=Lefevre")
assert response.status_code == 200
user_ids = [user["id"] for user in response.json()]
assert user_ids == [str(fred.id)]

response = client.get("/api/v1.0/users/?q=François Lorfebvre")
assert response.status_code == 200
users = [user["full_name"] for user in response.json()]
assert users == []


def test_api_users_list_limit(settings):
"""
Authenticated users should be able to list users and the number of results
Expand Down
Loading