From 390a08ef397f718feb4d5ae23d9f5a60df905021 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Mon, 10 Nov 2025 16:11:23 -0500 Subject: [PATCH 1/3] adding all non-course indexes to command: --- learning_resources_search/tasks.py | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/learning_resources_search/tasks.py b/learning_resources_search/tasks.py index 0685518561..93c2926b46 100644 --- a/learning_resources_search/tasks.py +++ b/learning_resources_search/tasks.py @@ -37,14 +37,9 @@ from learning_resources_search.constants import ( CONTENT_FILE_TYPE, COURSE_TYPE, - LEARNING_PATH_TYPE, + LEARNING_RESOURCE_TYPES, PERCOLATE_INDEX_TYPE, - PODCAST_EPISODE_TYPE, - PODCAST_TYPE, - PROGRAM_TYPE, SEARCH_CONN_EXCEPTIONS, - VIDEO_PLAYLIST_TYPE, - VIDEO_TYPE, IndexestoUpdate, ) from learning_resources_search.exceptions import ReindexError, RetryError @@ -624,14 +619,7 @@ def start_recreate_index(self, indexes, remove_existing_reindexing_tags): ) ] - for resource_type in [ - PROGRAM_TYPE, - PODCAST_TYPE, - PODCAST_EPISODE_TYPE, - LEARNING_PATH_TYPE, - VIDEO_TYPE, - VIDEO_PLAYLIST_TYPE, - ]: + for resource_type in set(LEARNING_RESOURCE_TYPES) - {COURSE_TYPE}: if resource_type in indexes: index_tasks = index_tasks + [ index_learning_resources.si( @@ -692,14 +680,7 @@ def start_update_index(self, indexes, etl_source): if PERCOLATE_INDEX_TYPE in indexes: index_tasks = index_tasks + get_update_percolator_tasks() - for resource_type in [ - PROGRAM_TYPE, - PODCAST_TYPE, - PODCAST_EPISODE_TYPE, - LEARNING_PATH_TYPE, - VIDEO_TYPE, - VIDEO_PLAYLIST_TYPE, - ]: + for resource_type in set(LEARNING_RESOURCE_TYPES) - {COURSE_TYPE}: if resource_type in indexes: index_tasks = index_tasks + get_update_learning_resource_tasks( resource_type From d73b6f4b6d962b66ff4c508358d8e72f50fac1eb Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Tue, 11 Nov 2025 13:08:29 -0500 Subject: [PATCH 2/3] fixing vector search indexes --- vector_search/tasks.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/vector_search/tasks.py b/vector_search/tasks.py index b9e724e695..b5f93cd74d 100644 --- a/vector_search/tasks.py +++ b/vector_search/tasks.py @@ -15,17 +15,10 @@ ) from learning_resources.utils import load_course_blocklist from learning_resources_search.constants import ( - ARTICLE_TYPE, CONTENT_FILE_TYPE, COURSE_TYPE, - LEARNING_PATH_TYPE, LEARNING_RESOURCE_TYPES, - PODCAST_EPISODE_TYPE, - PODCAST_TYPE, - PROGRAM_TYPE, SEARCH_CONN_EXCEPTIONS, - VIDEO_PLAYLIST_TYPE, - VIDEO_TYPE, ) from learning_resources_search.exceptions import RetryError from learning_resources_search.tasks import wrap_retry_exception @@ -173,15 +166,7 @@ def start_embed_resources(self, indexes, skip_content_files, overwrite): chunk_size=settings.QDRANT_CHUNK_SIZE, ) ] - for resource_type in [ - PROGRAM_TYPE, - PODCAST_TYPE, - PODCAST_EPISODE_TYPE, - LEARNING_PATH_TYPE, - VIDEO_TYPE, - VIDEO_PLAYLIST_TYPE, - ARTICLE_TYPE, - ]: + for resource_type in set(LEARNING_RESOURCE_TYPES) - {COURSE_TYPE}: if resource_type in indexes: for ids in chunks( LearningResource.objects.filter( From edcdbc7b645ca3ad708f91c99912aca2f8d18168 Mon Sep 17 00:00:00 2001 From: shankar ambady Date: Wed, 12 Nov 2025 10:17:56 -0500 Subject: [PATCH 3/3] revised tests to ensure we catch all resource types --- learning_resources_search/tasks_test.py | 8 ++++---- vector_search/tasks_test.py | 14 ++++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/learning_resources_search/tasks_test.py b/learning_resources_search/tasks_test.py index bd106dbddd..148fab0af7 100644 --- a/learning_resources_search/tasks_test.py +++ b/learning_resources_search/tasks_test.py @@ -134,7 +134,7 @@ def test_system_exit_retry(mocker): @pytest.mark.parametrize( "indexes", - [["course"], ["program"]], + [["course"], ["program"], list(LEARNING_RESOURCE_TYPES)], ) def test_start_recreate_index(mocker, mocked_celery, user, indexes): """ @@ -252,8 +252,7 @@ def test_start_recreate_index(mocker, mocked_celery, user, indexes): course.learning_resource_id, index_types=IndexestoUpdate.reindexing_index.value, ) - - if PROGRAM_TYPE in indexes: + if indexes == [PROGRAM_TYPE]: assert index_learning_resources_mock.si.call_count == 2 index_learning_resources_mock.si.assert_any_call( [programs[0].learning_resource_id, programs[1].learning_resource_id], @@ -462,6 +461,7 @@ def test_bulk_deindex_learning_resources(mocker, with_error): [ (["program"], None), (["course, content_file"], None), + (list(LEARNING_RESOURCE_TYPES), None), (["course"], ETLSource.xpro.value), (["content_file"], ETLSource.xpro.value), (["content_file"], ETLSource.oll.value), @@ -588,7 +588,7 @@ def test_start_update_index(mocker, mocked_celery, indexes, etl_source, settings COURSE_TYPE, ) - if PROGRAM_TYPE in indexes: + if indexes == [PROGRAM_TYPE]: assert index_learning_resources_mock.si.call_count == 2 index_learning_resources_mock.si.assert_any_call( [programs[0].learning_resource_id, programs[1].learning_resource_id], diff --git a/vector_search/tasks_test.py b/vector_search/tasks_test.py index 1da0c61f65..4017ea6cc6 100644 --- a/vector_search/tasks_test.py +++ b/vector_search/tasks_test.py @@ -20,6 +20,7 @@ from learning_resources.models import ContentFile, LearningResource from learning_resources_search.constants import ( COURSE_TYPE, + LEARNING_RESOURCE_TYPES, ) from main.utils import now_in_utc from vector_search.tasks import ( @@ -34,10 +35,7 @@ pytestmark = pytest.mark.django_db -@pytest.mark.parametrize( - "index", - ["course", "program"], -) +@pytest.mark.parametrize("index", list(LEARNING_RESOURCE_TYPES)) def test_start_embed_resources(mocker, mocked_celery, index): """ start_embed_resources should generate embeddings for each resource type @@ -64,11 +62,11 @@ def test_start_embed_resources(mocker, mocked_celery, index): ) resource_ids = [c.pk for c in courses] else: - programs = sorted( - ProgramFactory.create_batch(4), - key=lambda program: program.learning_resource_id, + resources = sorted( + LearningResourceFactory.create_batch(4, resource_type=index), + key=lambda resource: resource.id, ) - resource_ids = [p.pk for p in programs] + resource_ids = [p.pk for p in resources] generate_embeddings_mock = mocker.patch( "vector_search.tasks.generate_embeddings", autospec=True