diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f3b5e6f02..e8dd1932f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to - ♻️(frontend) preserve @ character when esc is pressed after typing it #1512 - ♻️(frontend) make summary button fixed to remain visible during scroll #1581 - ♻️(frontend) pdf embed use full width #1526 +- 🚸(backend) use unaccented full name for user search #1637 ### Fixed diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 1c1b9ef50a..7594770bdd 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1,4 +1,5 @@ """API endpoints""" + # pylint: disable=too-many-lines import base64 @@ -18,7 +19,7 @@ from django.db import connection, transaction from django.db import models as db from django.db.models.expressions import RawSQL -from django.db.models.functions import Left, Length +from django.db.models.functions import Greatest, Left, Length from django.http import Http404, StreamingHttpResponse from django.urls import reverse from django.utils import timezone @@ -37,6 +38,7 @@ from rest_framework.permissions import AllowAny from core import authentication, choices, enums, models +from core.api.filters import remove_accents from core.services.ai_services import AIService from core.services.collaboration_services import CollaborationService from core.services.converter_services import ( @@ -188,13 +190,15 @@ def get_queryset(self): queryset = queryset.exclude(documentaccess__document_id=document_id) filter_data = filterset.form.cleaned_data - query = filter_data["q"] + query = remove_accents(filter_data["q"]) # For emails, match emails by Levenstein distance to prevent typing errors if "@" in query: return ( queryset.annotate( - distance=RawSQL("levenshtein(email::text, %s::text)", (query,)) + distance=RawSQL( + "levenshtein(unaccent(email::text), %s::text)", (query,) + ) ) .filter(distance__lte=3) .order_by("distance", "email")[: settings.API_USERS_LIST_LIMIT] @@ -203,11 +207,15 @@ def get_queryset(self): # Use trigram similarity for non-email-like queries # For performance reasons we filter first by similarity, which relies on an # index, then only calculate precise similarity scores for sorting purposes + return ( - queryset.filter(email__trigram_word_similar=query) - .annotate(similarity=TrigramSimilarity("email", query)) + queryset.annotate( + sim_email=TrigramSimilarity("email", query), + sim_name=TrigramSimilarity("full_name", query), + ) + .annotate(similarity=Greatest("sim_email", "sim_name")) .filter(similarity__gt=0.2) - .order_by("-similarity", "email")[: settings.API_USERS_LIST_LIMIT] + .order_by("-similarity")[: settings.API_USERS_LIST_LIMIT] ) @drf.decorators.action( diff --git a/src/backend/core/migrations/0027_auto_20251120_0956.py b/src/backend/core/migrations/0027_auto_20251120_0956.py new file mode 100644 index 0000000000..fe795ff5f2 --- /dev/null +++ b/src/backend/core/migrations/0027_auto_20251120_0956.py @@ -0,0 +1,37 @@ +# Generated by Django 5.2.8 on 2025-11-20 09:56 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("core", "0026_comments"), + ] + + operations = [ + migrations.RunSQL( + sql=""" + CREATE OR REPLACE FUNCTION public.immutable_unaccent(regdictionary, text) + RETURNS text + LANGUAGE c IMMUTABLE PARALLEL SAFE STRICT AS + '$libdir/unaccent', 'unaccent_dict'; + + CREATE OR REPLACE FUNCTION public.f_unaccent(text) + RETURNS text + LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT + RETURN public.immutable_unaccent(regdictionary 'public.unaccent', $1); + + CREATE INDEX IF NOT EXISTS user_email_unaccent_trgm_idx + ON impress_user + USING gin (f_unaccent(email) gin_trgm_ops); + + CREATE INDEX IF NOT EXISTS user_full_name_unaccent_trgm_idx + ON impress_user + USING gin (f_unaccent(full_name) gin_trgm_ops); + """, + reverse_sql=""" + DROP INDEX IF EXISTS user_email_unaccent_trgm_idx; + DROP INDEX IF EXISTS user_full_name_unaccent_trgm_idx; + """, + ), + ] diff --git a/src/backend/core/tests/test_api_users.py b/src/backend/core/tests/test_api_users.py index a0a4355280..926e731bd4 100644 --- a/src/backend/core/tests/test_api_users.py +++ b/src/backend/core/tests/test_api_users.py @@ -76,6 +76,131 @@ def test_api_users_list_query_email(): assert user_ids == [] +def test_api_users_list_query_email_with_internationalized_domain_names(): + """ + Authenticated users should be able to list users and filter by email. + It should work even if the email address contains an internationalized domain name. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + jean = factories.UserFactory(email="jean.martin@éducation.fr") + marie = factories.UserFactory(email="marie.durand@education.fr") + kurokawa = factories.UserFactory(email="contact@黒川.日本") + + response = client.get("/api/v1.0/users/?q=jean.martin@education.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(jean.id)] + + response = client.get("/api/v1.0/users/?q=jean.martin@éducation.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(jean.id)] + + response = client.get("/api/v1.0/users/?q=marie.durand@education.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(marie.id)] + + response = client.get("/api/v1.0/users/?q=marie.durand@éducation.fr") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(marie.id)] + + response = client.get("/api/v1.0/users/?q=contact@黒川.日本") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(kurokawa.id)] + + +def test_api_users_list_query_full_name(): + """ + Authenticated users should be able to list users and filter by full name. + Only results with a Trigram similarity greater than 0.2 with the query should be returned. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + dave = factories.UserFactory(email="contact@work.com", full_name="David Bowman") + + response = client.get( + "/api/v1.0/users/?q=David", + ) + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=Bowman") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=bowman") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=BOWMAN") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=BoWmAn") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(dave.id)] + + response = client.get("/api/v1.0/users/?q=Bovin") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [] + + +def test_api_users_list_query_accented_full_name(): + """ + Authenticated users should be able to list users and filter by full name with accents. + Only results with a Trigram similarity greater than 0.2 with the query should be returned. + """ + user = factories.UserFactory() + + client = APIClient() + client.force_login(user) + + fred = factories.UserFactory( + email="contact@work.com", full_name="Frédérique Lefèvre" + ) + + response = client.get("/api/v1.0/users/?q=Frédérique") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=Frederique") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=Lefèvre") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=Lefevre") + assert response.status_code == 200 + user_ids = [user["id"] for user in response.json()] + assert user_ids == [str(fred.id)] + + response = client.get("/api/v1.0/users/?q=François Lorfebvre") + assert response.status_code == 200 + users = [user["full_name"] for user in response.json()] + assert users == [] + + def test_api_users_list_limit(settings): """ Authenticated users should be able to list users and the number of results