Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 148 additions & 21 deletions specifyweb/backend/trees/defaults.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Callable, List, Dict, Iterator, Optional, TypedDict, NotRequired
from typing import Any, Callable, List, Dict, Iterator, Optional, TypedDict, Tuple, NotRequired
import json
import requests
import csv
Expand Down Expand Up @@ -63,18 +63,31 @@ def initialize_default_tree(tree_type: str, discipline_or_institution, tree_name
tree_rank_model(
treedef=tree_def,
name=rank.get('name'),
title=rank.get('title') or rank.get('name').title(),
title=(rank.get('title') or rank.get('name').title()),
rankid=int(rank.get('rank', rank_id)),
isenforced=rank.get('enforced', True),
isinfullname=rank.get('infullname', False),
fullnameseparator=rank.get('fullnameseparator', ' ')
)
)
rank_id += 10

if treedefitems_bulk:
tree_rank_model.objects.bulk_create(treedefitems_bulk, ignore_conflicts=False)

# Create a root node
created_items = list(
tree_rank_model.objects.filter(treedef=tree_def).order_by('rankid')
)

parent_item = None
for item in created_items:
item.parent = parent_item
parent_item = item

tree_rank_model.objects.bulk_update(created_items, ['parent'])

# Create a root node for non-taxon trees
# New taxon trees are expected to be empty
if tree_type != 'taxon':
create_default_root(tree_def, tree_type)
Expand Down Expand Up @@ -124,20 +137,131 @@ def __init__(self, tree_type: str, tree_name: str):
self.tree_def_model, self.tree_rank_model, self.tree_node_model = get_models(tree_type)

self.tree_def = self.tree_def_model.objects.get(name=tree_name)
self.tree_def_item_map = self.create_rank_map()

self.create_rank_map()
self.root_parent = self.tree_node_model.objects.filter(
definitionitem__rankid=0,
definitionitem__rankid=0,
definition=self.tree_def
).first()

if self.root_parent is not None and not hasattr(self.root_parent, "_tmp_path"):
self.root_parent._tmp_path = f"/{self.root_parent.name}"

# Ensure a real root exists for new trees, otherwise everything starts with parent=None
if self.root_parent is None:
root_rank = self.rankid_map.get(0)
if root_rank is not None:
self.root_parent = self.tree_node_model(
name=self.tree_def.name,
fullname=self.tree_def.name,
definition=self.tree_def,
definitionitem=root_rank,
parent=None,
rankid=root_rank.rankid,
)
self.root_parent.save(skip_tree_extras=True)
self.root_parent._tmp_path = f"/{self.root_parent.name}"

self.counter = 0
self.batch_size = 1000

def create_rank_map(self):
"""Rank lookup map to reduce queries"""
return {
rank.name: rank
for rank in self.tree_rank_model.objects.filter(treedef=self.tree_def)
}
ranks = list(self.tree_rank_model.objects.filter(treedef=self.tree_def))
self.tree_def_item_map = {rank.name: rank for rank in ranks}
self.rankid_map = {rank.rankid: rank for rank in ranks}
self.buffers: Dict[int, Dict[str, Any]] = {rank.rankid: {} for rank in ranks}

def get_node_in_buffer(self, rank_id: int, path: str):
"""Get a node if it's already in the current batch's buffer."""
return self.buffers.get(rank_id, {}).get(path)

def add_node_to_buffer(self, node, rank_id: int, path: str):
"""Add node to current batch buffer, de-duping on a stable path."""
self.buffers.setdefault(rank_id, {})
node._tmp_path = path # stable across saved/unsaved
self.buffers[rank_id][path] = node
return node

def flush(self, force: bool = False):
"""Flushes this batch's buffer if the batch is complete. Bulk creates the nodes in a complete batch."""
self.counter += 1
if not (force or self.counter >= self.batch_size):
return

created_map: Dict[str, Any] = {}

# Ensure root has a stable path key for this run
if self.root_parent is not None and not hasattr(self.root_parent, "_tmp_path"):
self.root_parent._tmp_path = f"/{self.root_parent.name}"

for rank_id in sorted(self.buffers.keys()):
if rank_id == 0:
self.buffers[rank_id] = {}
continue
buffer = self.buffers.get(rank_id, {})
if not buffer:
continue

rank = self.rankid_map.get(rank_id)
if rank is None:
self.buffers[rank_id] = {}
continue

nodes_to_create = []
for path, node in list(buffer.items()):
parent = getattr(node, "parent", None)

# Resolve parent if it's unsaved
if parent is not None and getattr(parent, "pk", None) is None:
parent_path = getattr(parent, "_tmp_path", None)
if parent_path:
saved_parent = created_map.get(parent_path)
else:
saved_parent = None

# Fallback to real root
if saved_parent is None and self.root_parent is not None:
if parent.name == self.root_parent.name:
saved_parent = self.root_parent

if saved_parent is not None:
node.parent = saved_parent

# Non-root nodes must have a saved parent
if getattr(getattr(node, "parent", None), "pk", None) is not None:
nodes_to_create.append(node)
else:
logger.warning(
f"Skipping {node.name} (rank {rank_id}) – parent could not be resolved"
)

if nodes_to_create:
self.tree_node_model.objects.bulk_create(nodes_to_create, ignore_conflicts=True)

# Re-fetch nodes created for this rank in this flush.
pairs = {(n.name, n.parent_id) for n in nodes_to_create}

q = self.tree_node_model.objects.filter(
definition=self.tree_def,
definitionitem=rank,
name__in=[p[0] for p in pairs],
)
fetched = [n for n in q if (n.name, n.parent_id) in pairs]
fetched_map = {(n.name, n.parent_id): n for n in fetched}

# Populate created_map using each node's path
for n in nodes_to_create:
key = (n.name, n.parent_id)
saved = fetched_map.get(key)
if saved is not None:
created_map[getattr(n, "_tmp_path")] = saved

def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: dict[str, RankMappingConfiguration]):
self.buffers[rank_id] = {}

self.counter = 0

def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: dict[str, RankMappingConfiguration], row_id: int):
"""
Given one CSV row and a column mapping / rank configuration dictionary,
walk through the 'ranks' in order, creating or updating each tree record and linking
Expand All @@ -146,7 +270,6 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di
tree_node_model = context.tree_node_model
tree_def = context.tree_def
parent = context.root_parent
rank_id = 10

for rank_mapping in tree_cfg['ranks']:
rank_name = rank_mapping['name']
Expand Down Expand Up @@ -175,14 +298,17 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di
continue

# Create the node at this rank if it isn't already there.
obj = tree_node_model.objects.filter(
name=record_name,
fullname=record_name,
definition=tree_def,
definitionitem=tree_def_item,
parent=parent,
).first()
if obj is None:
parent_path = getattr(parent, "_tmp_path", None)
if parent_path is None:
parent_path = f"/{getattr(parent, 'name', 'ROOT')}"
setattr(parent, "_tmp_path", parent_path)

path = f"{parent_path}/{record_name}"

buffered = context.get_node_in_buffer(tree_def_item.rankid, path)
if buffered is not None:
obj = buffered
else:
data = {
'name': record_name,
'fullname': record_name,
Expand All @@ -193,10 +319,9 @@ def add_default_tree_record(context: DefaultTreeContext, row: dict, tree_cfg: di
**defaults
}
obj = tree_node_model(**data)
obj.save(skip_tree_extras=True)
obj = context.add_node_to_buffer(obj, tree_def_item.rankid, path)

parent = obj
rank_id += 10

@app.task(base=LogErrorsTask, bind=True)
def create_default_tree_task(self, url: str, discipline_id: int, tree_discipline_name: str, specify_collection_id: Optional[int],
Expand Down Expand Up @@ -280,8 +405,10 @@ def progress(cur: int, additional_total: int=0) -> None:
progress(0, total_rows)

for row in stream_csv_from_url(url):
add_default_tree_record(context, row, tree_cfg)
add_default_tree_record(context, row, tree_cfg, current)
context.flush()
progress(1, 0)
context.flush(force=True)
except Exception as e:
if specify_user_id and specify_collection_id:
Message.objects.create(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { LocalizedString } from 'typesafe-i18n';

import { formsText } from '../../localization/forms';
import { setupToolText } from '../../localization/setupTool';
import { statsText } from '../../localization/stats';
import type { RA } from '../../utils/types';

// Default for max field length.
Expand Down Expand Up @@ -367,17 +368,19 @@ export const resources: RA<ResourceConfig> = [
required: true,
default: fullNameDirections[0].value.toString(),
},
// Pre-loading is disabled for now for taxon trees.
// {
// name: 'preload',
// label: setupToolText_preloadTree(),
// type: 'boolean',
// },
/*
* Pre-loading is disabled for now for taxon trees.
* {
* name: 'preload',
* label: setupToolText_preloadTree(),
* type: 'boolean',
* },
*/
],
},
{
resourceName: 'collection',
label: setupToolText.collection(),
label: statsText.collection(),
fields: [
{
name: 'collectionName',
Expand Down
10 changes: 3 additions & 7 deletions specifyweb/frontend/js_src/lib/localization/setupTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@ export const setupToolText = createDictionary({
'en-us': 'Full Name Direction',
},
preloadTree: {
'en-us': 'Pre-load Tree'
'en-us': 'Pre-load Tree',
},
preloadTreeDescription: {
'en-us': 'Download default records for this tree.'
'en-us': 'Download default records for this tree.',
},

// Storage Tree
Expand Down Expand Up @@ -169,9 +169,6 @@ export const setupToolText = createDictionary({
},

// Collection
collection: {
'en-us': 'Collection',
},
collectionName: {
'en-us': 'Collection Name',
},
Expand Down Expand Up @@ -215,8 +212,7 @@ export const setupToolText = createDictionary({
'en-us': 'Last Name',
},
specifyUserLastNameDescription: {
'en-us':
'The last name of the agent associated with the account.',
'en-us': 'The last name of the agent associated with the account.',
},

taxonTreeSetUp: {
Expand Down
2 changes: 1 addition & 1 deletion specifyweb/specify/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7164,7 +7164,7 @@ class Taxontreedefitem(model_extras.Taxontreedefitem):
# Relationships: Many-to-One
createdbyagent = models.ForeignKey('Agent', db_column='CreatedByAgentID', related_name='+', null=True, on_delete=protect_with_blockers)
modifiedbyagent = models.ForeignKey('Agent', db_column='ModifiedByAgentID', related_name='+', null=True, on_delete=protect_with_blockers)
parent = models.ForeignKey('TaxonTreeDefItem', db_column='ParentItemID', related_name='children', null=True, on_delete=models.DO_NOTHING)
parent = models.ForeignKey('TaxonTreeDefItem', db_column='ParentItemID', related_name='children', null=True, on_delete=models.CASCADE)
treedef = models.ForeignKey('TaxonTreeDef', db_column='TaxonTreeDefID', related_name='treedefitems', null=False, on_delete=models.CASCADE)

class Meta:
Expand Down