From 63e02688886d1c31282b46a699df229d7d33ebba Mon Sep 17 00:00:00 2001 From: alesan99 Date: Mon, 19 Jan 2026 13:33:10 -0600 Subject: [PATCH 1/4] WIP optimize tree creation --- specifyweb/backend/trees/defaults.py | 106 +++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 16 deletions(-) diff --git a/specifyweb/backend/trees/defaults.py b/specifyweb/backend/trees/defaults.py index 62d96ebbb3b..4b36655f170 100644 --- a/specifyweb/backend/trees/defaults.py +++ b/specifyweb/backend/trees/defaults.py @@ -124,20 +124,95 @@ def __init__(self, tree_type: str, tree_name: str): self.tree_def_model, self.tree_rank_model, self.tree_node_model = get_models(tree_type) self.tree_def = self.tree_def_model.objects.get(name=tree_name) - self.tree_def_item_map = self.create_rank_map() + + self.create_rank_map() self.root_parent = self.tree_node_model.objects.filter( definitionitem__rankid=0, definition=self.tree_def ).first() + self.counter = 0 + self.batch_size = 1000 + def create_rank_map(self): """Rank lookup map to reduce queries""" - return { - rank.name: rank - for rank in self.tree_rank_model.objects.filter(treedef=self.tree_def) - } + ranks = list(self.tree_rank_model.objects.filter(treedef=self.tree_def)) + self.tree_def_item_map = {rank.name: rank for rank in ranks} + # Buffers for batches + self.rankid_map = {rank.rankid: rank for rank in ranks} + self.buffers = {rank.rankid: {} for rank in ranks} + + def add_node_to_buffer(self, node, rank_id, row_id): + """Add node to the current batch of nodes to be created""" + if rank_id not in self.buffers: + self.buffers[rank_id] = {} + self.buffers[rank_id][row_id] = node + return node + + def get_node_in_buffer(self, rank_id: int, name: str): + """Get a node if its already in the current batch's buffer. Prevents duplication.""" + buffer = self.buffers.get(rank_id, {}) + for node in buffer.values(): + if node.name == name: + return node + return None + + def flush(self, force=False): + """Flushes this batch's buffer if the batch is complete. Bulk creates the nodes in a complete batch.""" + self.counter += 1 + if not (force or self.counter > self.batch_size): + return + logger.debug(f"Batch creating {self.batch_size} rows.") + + created_map: Dict[int, Dict[str, Any]] = {} + + # Go through ranks in ascending order and bulk create nodes + ordered_rank_ids = sorted(self.buffers.keys()) + for rank_id in ordered_rank_ids: + logger.debug(f"On rank {rank_id}") + buffer = self.buffers.get(rank_id, {}) + + rank = self.rankid_map.get(rank_id) + if rank is None: + # Can't create nodes because this rank doesn't exist + # TODO: Make sure that this works correctly (parenting might get broken) + continue -def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: dict[str, RankMappingConfiguration]): + nodes_to_create = [] + # Update the nodes' parents to a saved version of their parents + for row_id, node in list(buffer.items()): + parent = getattr(node, 'parent', None) + if parent is not None and getattr(parent, 'pk', None) is None: + saved_parent = created_map.get(parent.rankid, {}).get(parent.name) + # Handle root + if not saved_parent and parent.name == getattr(self.root_parent, 'name', None): + saved_parent = self.root_parent + if saved_parent: + node.parent = saved_parent + + # Create node if its parent has been created + if getattr(node.parent, 'pk', None) is not None: + nodes_to_create.append(node) + else: + logger.warning(f"Could not create {node.name} because a valid parent could not be resolved.") + + if nodes_to_create: + self.tree_node_model.objects.bulk_create(nodes_to_create, ignore_conflicts=True) + + # Store which nodes were created in this batch + created_names = [n.name for n in nodes_to_create] + created_nodes = self.tree_node_model.objects.filter( + definition=self.tree_def, + definitionitem=rank, + name__in=created_names + ) + created_map[rank_id] = {n.name: n for n in created_nodes} + + self.buffers[rank_id] = {} + + self.counter = 0 + +def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: dict[str, RankMappingConfiguration], row_id: int): """ Given one CSV row and a column mapping / rank configuration dictionary, walk through the 'ranks' in order, creating or updating each tree record and linking @@ -175,14 +250,10 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di continue # Create the node at this rank if it isn't already there. - obj = tree_node_model.objects.filter( - name=record_name, - fullname=record_name, - definition=tree_def, - definitionitem=tree_def_item, - parent=parent, - ).first() - if obj is None: + buffered = context.get_node_in_buffer(tree_def_item.rankid, record_name) + if buffered is not None: + obj = buffered + else: data = { 'name': record_name, 'fullname': record_name, @@ -193,7 +264,8 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di **defaults } obj = tree_node_model(**data) - obj.save(skip_tree_extras=True) + obj = context.add_node_to_buffer(obj, tree_def_item.rankid, row_id) + # obj.save(skip_tree_extras=True) parent = obj rank_id += 10 @@ -280,8 +352,10 @@ def progress(cur: int, additional_total: int=0) -> None: progress(0, total_rows) for row in stream_csv_from_url(url): - add_default_tree_record(context, row, tree_cfg) + add_default_tree_record(context, row, tree_cfg, current) + context.flush() progress(1, 0) + context.flush(force=True) except Exception as e: if specify_user_id and specify_collection_id: Message.objects.create( From f62b51bbbbf7a07e74df71cfd6a8e1dc911f2a6a Mon Sep 17 00:00:00 2001 From: Caroline D <108160931+CarolineDenis@users.noreply.github.com> Date: Wed, 21 Jan 2026 14:40:09 -0500 Subject: [PATCH 2/4] Fix: Update localization --- .../lib/components/SetupTool/setupResources.ts | 17 ++++++++++------- .../js_src/lib/localization/setupTool.ts | 10 +++------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/specifyweb/frontend/js_src/lib/components/SetupTool/setupResources.ts b/specifyweb/frontend/js_src/lib/components/SetupTool/setupResources.ts index fd6c7163ad9..225b95831e5 100644 --- a/specifyweb/frontend/js_src/lib/components/SetupTool/setupResources.ts +++ b/specifyweb/frontend/js_src/lib/components/SetupTool/setupResources.ts @@ -2,6 +2,7 @@ import type { LocalizedString } from 'typesafe-i18n'; import { formsText } from '../../localization/forms'; import { setupToolText } from '../../localization/setupTool'; +import { statsText } from '../../localization/stats'; import type { RA } from '../../utils/types'; // Default for max field length. @@ -367,17 +368,19 @@ export const resources: RA = [ required: true, default: fullNameDirections[0].value.toString(), }, - // Pre-loading is disabled for now for taxon trees. - // { - // name: 'preload', - // label: setupToolText_preloadTree(), - // type: 'boolean', - // }, + /* + * Pre-loading is disabled for now for taxon trees. + * { + * name: 'preload', + * label: setupToolText_preloadTree(), + * type: 'boolean', + * }, + */ ], }, { resourceName: 'collection', - label: setupToolText.collection(), + label: statsText.collection(), fields: [ { name: 'collectionName', diff --git a/specifyweb/frontend/js_src/lib/localization/setupTool.ts b/specifyweb/frontend/js_src/lib/localization/setupTool.ts index 4f208906262..65db8bb48f8 100644 --- a/specifyweb/frontend/js_src/lib/localization/setupTool.ts +++ b/specifyweb/frontend/js_src/lib/localization/setupTool.ts @@ -127,10 +127,10 @@ export const setupToolText = createDictionary({ 'en-us': 'Full Name Direction', }, preloadTree: { - 'en-us': 'Pre-load Tree' + 'en-us': 'Pre-load Tree', }, preloadTreeDescription: { - 'en-us': 'Download default records for this tree.' + 'en-us': 'Download default records for this tree.', }, // Storage Tree @@ -169,9 +169,6 @@ export const setupToolText = createDictionary({ }, // Collection - collection: { - 'en-us': 'Collection', - }, collectionName: { 'en-us': 'Collection Name', }, @@ -215,8 +212,7 @@ export const setupToolText = createDictionary({ 'en-us': 'Last Name', }, specifyUserLastNameDescription: { - 'en-us': - 'The last name of the agent associated with the account.', + 'en-us': 'The last name of the agent associated with the account.', }, taxonTreeSetUp: { From 36c8cdccf1b1b3ab22de47cb7b50e6c5855aac30 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Wed, 21 Jan 2026 15:06:51 -0600 Subject: [PATCH 3/4] correct parent ranks when creating a new tree --- specifyweb/backend/trees/defaults.py | 147 +++++++++++++++++++-------- specifyweb/specify/models.py | 2 +- 2 files changed, 103 insertions(+), 46 deletions(-) diff --git a/specifyweb/backend/trees/defaults.py b/specifyweb/backend/trees/defaults.py index 4b36655f170..64b5c88f158 100644 --- a/specifyweb/backend/trees/defaults.py +++ b/specifyweb/backend/trees/defaults.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, List, Dict, Iterator, Optional, TypedDict, NotRequired +from typing import Any, Callable, List, Dict, Iterator, Optional, TypedDict, Tuple, NotRequired import json import requests import csv @@ -63,7 +63,7 @@ def initialize_default_tree(tree_type: str, discipline_or_institution, tree_name tree_rank_model( treedef=tree_def, name=rank.get('name'), - title=rank.get('title') or rank.get('name').title(), + title=(rank.get('title') or rank.get('name').title()), rankid=int(rank.get('rank', rank_id)), isenforced=rank.get('enforced', True), isinfullname=rank.get('infullname', False), @@ -71,10 +71,23 @@ def initialize_default_tree(tree_type: str, discipline_or_institution, tree_name ) ) rank_id += 10 + if treedefitems_bulk: tree_rank_model.objects.bulk_create(treedefitems_bulk, ignore_conflicts=False) # Create a root node + created_items = list( + tree_rank_model.objects.filter(treedef=tree_def).order_by('rankid') + ) + + parent_item = None + for item in created_items: + item.parent = parent_item + parent_item = item + + tree_rank_model.objects.bulk_update(created_items, ['parent']) + + # Create a root node for non-taxon trees # New taxon trees are expected to be empty if tree_type != 'taxon': create_default_root(tree_def, tree_type) @@ -127,10 +140,23 @@ def __init__(self, tree_type: str, tree_name: str): self.create_rank_map() self.root_parent = self.tree_node_model.objects.filter( - definitionitem__rankid=0, + definitionitem__rankid=0, definition=self.tree_def ).first() + # Ensure a real root exists for new trees, otherwise everything starts with parent=None + if self.root_parent is None: + root_rank = self.rankid_map.get(0) + if root_rank is not None: + self.root_parent = self.tree_node_model( + name=self.tree_def.name, + fullname=self.tree_def.name, + definition=self.tree_def, + definitionitem=root_rank, + parent=None, + ) + self.root_parent.save(skip_tree_extras=True) + self.counter = 0 self.batch_size = 1000 @@ -138,75 +164,108 @@ def create_rank_map(self): """Rank lookup map to reduce queries""" ranks = list(self.tree_rank_model.objects.filter(treedef=self.tree_def)) self.tree_def_item_map = {rank.name: rank for rank in ranks} - # Buffers for batches self.rankid_map = {rank.rankid: rank for rank in ranks} - self.buffers = {rank.rankid: {} for rank in ranks} - def add_node_to_buffer(self, node, rank_id, row_id): - """Add node to the current batch of nodes to be created""" - if rank_id not in self.buffers: - self.buffers[rank_id] = {} - self.buffers[rank_id][row_id] = node + # buffers[rank_id] = {(rank_id, parent_key, name): node} + self.buffers: Dict[int, Dict[Tuple[int, Tuple[str, Optional[int]], str], Any]] = { + rank.rankid: {} for rank in ranks + } + + def _parent_key(self, parent) -> Tuple[str, Optional[int]]: + if parent is None: + return ("root", None) + pk = getattr(parent, "pk", None) + if pk is not None: + return ("pk", int(pk)) + return ("tmp", id(parent)) + + def add_node_to_buffer(self, node, rank_id: int): + """Add node to current batch buffer, de-duping on (rank_id, parent, name).""" + parent = getattr(node, "parent", None) + key = (rank_id, self._parent_key(parent), node.name) + self.buffers.setdefault(rank_id, {}) + self.buffers[rank_id][key] = node return node - def get_node_in_buffer(self, rank_id: int, name: str): + def get_node_in_buffer(self, rank_id: int, name: str, parent): """Get a node if its already in the current batch's buffer. Prevents duplication.""" - buffer = self.buffers.get(rank_id, {}) - for node in buffer.values(): - if node.name == name: - return node - return None + key = (rank_id, self._parent_key(parent), name) + return self.buffers.get(rank_id, {}).get(key) - def flush(self, force=False): + def flush(self, force: bool = False): """Flushes this batch's buffer if the batch is complete. Bulk creates the nodes in a complete batch.""" self.counter += 1 - if not (force or self.counter > self.batch_size): + if not (force or self.counter >= self.batch_size): return - logger.debug(f"Batch creating {self.batch_size} rows.") - created_map: Dict[int, Dict[str, Any]] = {} - - # Go through ranks in ascending order and bulk create nodes - ordered_rank_ids = sorted(self.buffers.keys()) - for rank_id in ordered_rank_ids: - logger.debug(f"On rank {rank_id}") + created_map: Dict[int, Dict[Tuple[str, Tuple[str, Optional[int]]], Any]] = {} + + # create ranks in ascending order so parents exist before children + for rank_id in sorted(self.buffers.keys()): buffer = self.buffers.get(rank_id, {}) + if not buffer: + continue rank = self.rankid_map.get(rank_id) if rank is None: - # Can't create nodes because this rank doesn't exist - # TODO: Make sure that this works correctly (parenting might get broken) + self.buffers[rank_id] = {} continue nodes_to_create = [] - # Update the nodes' parents to a saved version of their parents - for row_id, node in list(buffer.items()): - parent = getattr(node, 'parent', None) - if parent is not None and getattr(parent, 'pk', None) is None: - saved_parent = created_map.get(parent.rankid, {}).get(parent.name) - # Handle root - if not saved_parent and parent.name == getattr(self.root_parent, 'name', None): - saved_parent = self.root_parent - if saved_parent: + for (node_rankid, node_parent_key, node_name), node in list(buffer.items()): + parent = getattr(node, "parent", None) + + # Resolve unsaved parent from nodes created earlier in this batch + if parent is not None and getattr(parent, "pk", None) is None: + parent_rankid = getattr(getattr(parent, "definitionitem", None), "rankid", None) + + saved_parent = None + if parent_rankid is not None: + parent_lookup_key = ( + parent.name, + self._parent_key(getattr(parent, "parent", None)) + ) + saved_parent = created_map.get(parent_rankid, {}).get(parent_lookup_key) + + # Fallback to real root + if saved_parent is None and self.root_parent is not None: + if parent.name == self.root_parent.name: + saved_parent = self.root_parent + + if saved_parent is not None: node.parent = saved_parent - # Create node if its parent has been created - if getattr(node.parent, 'pk', None) is not None: + # Only root nodes may have parent=None + if node_rankid == 0: nodes_to_create.append(node) else: - logger.warning(f"Could not create {node.name} because a valid parent could not be resolved.") + if getattr(getattr(node, "parent", None), "pk", None) is not None: + nodes_to_create.append(node) + else: + logger.warning( + f"Skipping {node.name} (rank {node_rankid}) – parent could not be resolved" + ) if nodes_to_create: self.tree_node_model.objects.bulk_create(nodes_to_create, ignore_conflicts=True) - # Store which nodes were created in this batch + # Load the saved copies so children can point to them created_names = [n.name for n in nodes_to_create] created_nodes = self.tree_node_model.objects.filter( definition=self.tree_def, definitionitem=rank, name__in=created_names ) - created_map[rank_id] = {n.name: n for n in created_nodes} + + # Map by parent_key and name + rank_created: Dict[Tuple[str, Tuple[str, Optional[int]]], Any] = {} + for n in created_nodes: + key = ( + n.name, + self._parent_key(getattr(n, "parent", None)) + ) + rank_created[key] = n + created_map[rank_id] = rank_created self.buffers[rank_id] = {} @@ -250,7 +309,7 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di continue # Create the node at this rank if it isn't already there. - buffered = context.get_node_in_buffer(tree_def_item.rankid, record_name) + buffered = context.get_node_in_buffer(tree_def_item.rankid, record_name, parent) if buffered is not None: obj = buffered else: @@ -260,12 +319,10 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di 'definition': tree_def, 'definitionitem': tree_def_item, 'parent': parent, - 'rankid': tree_def_item.rankid, **defaults } obj = tree_node_model(**data) - obj = context.add_node_to_buffer(obj, tree_def_item.rankid, row_id) - # obj.save(skip_tree_extras=True) + obj = context.add_node_to_buffer(obj, tree_def_item.rankid) parent = obj rank_id += 10 diff --git a/specifyweb/specify/models.py b/specifyweb/specify/models.py index 03b9b7b327d..05518bee121 100644 --- a/specifyweb/specify/models.py +++ b/specifyweb/specify/models.py @@ -7164,7 +7164,7 @@ class Taxontreedefitem(model_extras.Taxontreedefitem): # Relationships: Many-to-One createdbyagent = models.ForeignKey('Agent', db_column='CreatedByAgentID', related_name='+', null=True, on_delete=protect_with_blockers) modifiedbyagent = models.ForeignKey('Agent', db_column='ModifiedByAgentID', related_name='+', null=True, on_delete=protect_with_blockers) - parent = models.ForeignKey('TaxonTreeDefItem', db_column='ParentItemID', related_name='children', null=True, on_delete=models.DO_NOTHING) + parent = models.ForeignKey('TaxonTreeDefItem', db_column='ParentItemID', related_name='children', null=True, on_delete=models.CASCADE) treedef = models.ForeignKey('TaxonTreeDef', db_column='TaxonTreeDefID', related_name='treedefitems', null=False, on_delete=models.CASCADE) class Meta: From 57b3adc461c91dcabea6e3362f0121ddac89cb82 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 22 Jan 2026 15:42:31 -0600 Subject: [PATCH 4/4] fix tree rank issue --- specifyweb/backend/trees/defaults.py | 118 +++++++++++++-------------- 1 file changed, 57 insertions(+), 61 deletions(-) diff --git a/specifyweb/backend/trees/defaults.py b/specifyweb/backend/trees/defaults.py index 64b5c88f158..7b9f158da8b 100644 --- a/specifyweb/backend/trees/defaults.py +++ b/specifyweb/backend/trees/defaults.py @@ -143,6 +143,9 @@ def __init__(self, tree_type: str, tree_name: str): definitionitem__rankid=0, definition=self.tree_def ).first() + + if self.root_parent is not None and not hasattr(self.root_parent, "_tmp_path"): + self.root_parent._tmp_path = f"/{self.root_parent.name}" # Ensure a real root exists for new trees, otherwise everything starts with parent=None if self.root_parent is None: @@ -154,8 +157,10 @@ def __init__(self, tree_type: str, tree_name: str): definition=self.tree_def, definitionitem=root_rank, parent=None, + rankid=root_rank.rankid, ) self.root_parent.save(skip_tree_extras=True) + self.root_parent._tmp_path = f"/{self.root_parent.name}" self.counter = 0 self.batch_size = 1000 @@ -165,43 +170,35 @@ def create_rank_map(self): ranks = list(self.tree_rank_model.objects.filter(treedef=self.tree_def)) self.tree_def_item_map = {rank.name: rank for rank in ranks} self.rankid_map = {rank.rankid: rank for rank in ranks} + self.buffers: Dict[int, Dict[str, Any]] = {rank.rankid: {} for rank in ranks} - # buffers[rank_id] = {(rank_id, parent_key, name): node} - self.buffers: Dict[int, Dict[Tuple[int, Tuple[str, Optional[int]], str], Any]] = { - rank.rankid: {} for rank in ranks - } - - def _parent_key(self, parent) -> Tuple[str, Optional[int]]: - if parent is None: - return ("root", None) - pk = getattr(parent, "pk", None) - if pk is not None: - return ("pk", int(pk)) - return ("tmp", id(parent)) - - def add_node_to_buffer(self, node, rank_id: int): - """Add node to current batch buffer, de-duping on (rank_id, parent, name).""" - parent = getattr(node, "parent", None) - key = (rank_id, self._parent_key(parent), node.name) + def get_node_in_buffer(self, rank_id: int, path: str): + """Get a node if it's already in the current batch's buffer.""" + return self.buffers.get(rank_id, {}).get(path) + + def add_node_to_buffer(self, node, rank_id: int, path: str): + """Add node to current batch buffer, de-duping on a stable path.""" self.buffers.setdefault(rank_id, {}) - self.buffers[rank_id][key] = node + node._tmp_path = path # stable across saved/unsaved + self.buffers[rank_id][path] = node return node - def get_node_in_buffer(self, rank_id: int, name: str, parent): - """Get a node if its already in the current batch's buffer. Prevents duplication.""" - key = (rank_id, self._parent_key(parent), name) - return self.buffers.get(rank_id, {}).get(key) - def flush(self, force: bool = False): """Flushes this batch's buffer if the batch is complete. Bulk creates the nodes in a complete batch.""" self.counter += 1 if not (force or self.counter >= self.batch_size): return - created_map: Dict[int, Dict[Tuple[str, Tuple[str, Optional[int]]], Any]] = {} + created_map: Dict[str, Any] = {} + + # Ensure root has a stable path key for this run + if self.root_parent is not None and not hasattr(self.root_parent, "_tmp_path"): + self.root_parent._tmp_path = f"/{self.root_parent.name}" - # create ranks in ascending order so parents exist before children for rank_id in sorted(self.buffers.keys()): + if rank_id == 0: + self.buffers[rank_id] = {} + continue buffer = self.buffers.get(rank_id, {}) if not buffer: continue @@ -212,20 +209,16 @@ def flush(self, force: bool = False): continue nodes_to_create = [] - for (node_rankid, node_parent_key, node_name), node in list(buffer.items()): + for path, node in list(buffer.items()): parent = getattr(node, "parent", None) - # Resolve unsaved parent from nodes created earlier in this batch + # Resolve parent if it's unsaved if parent is not None and getattr(parent, "pk", None) is None: - parent_rankid = getattr(getattr(parent, "definitionitem", None), "rankid", None) - - saved_parent = None - if parent_rankid is not None: - parent_lookup_key = ( - parent.name, - self._parent_key(getattr(parent, "parent", None)) - ) - saved_parent = created_map.get(parent_rankid, {}).get(parent_lookup_key) + parent_path = getattr(parent, "_tmp_path", None) + if parent_path: + saved_parent = created_map.get(parent_path) + else: + saved_parent = None # Fallback to real root if saved_parent is None and self.root_parent is not None: @@ -235,37 +228,34 @@ def flush(self, force: bool = False): if saved_parent is not None: node.parent = saved_parent - # Only root nodes may have parent=None - if node_rankid == 0: + # Non-root nodes must have a saved parent + if getattr(getattr(node, "parent", None), "pk", None) is not None: nodes_to_create.append(node) else: - if getattr(getattr(node, "parent", None), "pk", None) is not None: - nodes_to_create.append(node) - else: - logger.warning( - f"Skipping {node.name} (rank {node_rankid}) – parent could not be resolved" - ) + logger.warning( + f"Skipping {node.name} (rank {rank_id}) – parent could not be resolved" + ) if nodes_to_create: self.tree_node_model.objects.bulk_create(nodes_to_create, ignore_conflicts=True) - # Load the saved copies so children can point to them - created_names = [n.name for n in nodes_to_create] - created_nodes = self.tree_node_model.objects.filter( + # Re-fetch nodes created for this rank in this flush. + pairs = {(n.name, n.parent_id) for n in nodes_to_create} + + q = self.tree_node_model.objects.filter( definition=self.tree_def, definitionitem=rank, - name__in=created_names + name__in=[p[0] for p in pairs], ) + fetched = [n for n in q if (n.name, n.parent_id) in pairs] + fetched_map = {(n.name, n.parent_id): n for n in fetched} - # Map by parent_key and name - rank_created: Dict[Tuple[str, Tuple[str, Optional[int]]], Any] = {} - for n in created_nodes: - key = ( - n.name, - self._parent_key(getattr(n, "parent", None)) - ) - rank_created[key] = n - created_map[rank_id] = rank_created + # Populate created_map using each node's path + for n in nodes_to_create: + key = (n.name, n.parent_id) + saved = fetched_map.get(key) + if saved is not None: + created_map[getattr(n, "_tmp_path")] = saved self.buffers[rank_id] = {} @@ -280,7 +270,6 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di tree_node_model = context.tree_node_model tree_def = context.tree_def parent = context.root_parent - rank_id = 10 for rank_mapping in tree_cfg['ranks']: rank_name = rank_mapping['name'] @@ -309,7 +298,14 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di continue # Create the node at this rank if it isn't already there. - buffered = context.get_node_in_buffer(tree_def_item.rankid, record_name, parent) + parent_path = getattr(parent, "_tmp_path", None) + if parent_path is None: + parent_path = f"/{getattr(parent, 'name', 'ROOT')}" + setattr(parent, "_tmp_path", parent_path) + + path = f"{parent_path}/{record_name}" + + buffered = context.get_node_in_buffer(tree_def_item.rankid, path) if buffered is not None: obj = buffered else: @@ -319,13 +315,13 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di 'definition': tree_def, 'definitionitem': tree_def_item, 'parent': parent, + 'rankid': tree_def_item.rankid, **defaults } obj = tree_node_model(**data) - obj = context.add_node_to_buffer(obj, tree_def_item.rankid) + obj = context.add_node_to_buffer(obj, tree_def_item.rankid, path) parent = obj - rank_id += 10 @app.task(base=LogErrorsTask, bind=True) def create_default_tree_task(self, url: str, discipline_id: int, tree_discipline_name: str, specify_collection_id: Optional[int],