New renumber_tree function to fix node numbering ordering issue in generated query#7528
New renumber_tree function to fix node numbering ordering issue in generated query#7528acwhite211 merged 9 commits intomainfrom
Conversation
|
Here is the sql code that gets generated in this branch, running on the NHMD_2025_10_29 database: USE NHMD_2025_10_29;
-- Generated running NHMD_2025_10_29 on issue-7423-2 branch
UPDATE taxon t JOIN taxontreedefitem d ON t.taxontreedefitemid = d.taxontreedefitemid SET t.rankid = d.rankid;
UPDATE taxon t
JOIN (
SELECT id, rn FROM (
SELECT
t0.taxonid AS id,
ROW_NUMBER() OVER (ORDER BY CONCAT_WS(',', LPAD(t25.taxonid, 12, '0'), LPAD(t24.taxonid, 12, '0'), LPAD(t23.taxonid, 12, '0'), LPAD(t22.taxonid, 12, '0'), LPAD(t21.taxonid, 12, '0'), LPAD(t20.taxonid, 12, '0'), LPAD(t19.taxonid, 12, '0'), LPAD(t18.taxonid, 12, '0'), LPAD(t17.taxonid, 12, '0'), LPAD(t16.taxonid, 12, '0'), LPAD(t15.taxonid, 12, '0'), LPAD(t14.taxonid, 12, '0'), LPAD(t13.taxonid, 12, '0'), LPAD(t12.taxonid, 12, '0'), LPAD(t11.taxonid, 12, '0'), LPAD(t10.taxonid, 12, '0'), LPAD(t9.taxonid, 12, '0'), LPAD(t8.taxonid, 12, '0'), LPAD(t7.taxonid, 12, '0'), LPAD(t6.taxonid, 12, '0'), LPAD(t5.taxonid, 12, '0'), LPAD(t4.taxonid, 12, '0'), LPAD(t3.taxonid, 12, '0'), LPAD(t2.taxonid, 12, '0'), LPAD(t1.taxonid, 12, '0'), LPAD(t0.taxonid, 12, '0')), t0.taxonid) AS rn
FROM taxon t0
LEFT JOIN taxon t1 ON t0.parentid = t1.taxonid
LEFT JOIN taxon t2 ON t1.parentid = t2.taxonid
LEFT JOIN taxon t3 ON t2.parentid = t3.taxonid
LEFT JOIN taxon t4 ON t3.parentid = t4.taxonid
LEFT JOIN taxon t5 ON t4.parentid = t5.taxonid
LEFT JOIN taxon t6 ON t5.parentid = t6.taxonid
LEFT JOIN taxon t7 ON t6.parentid = t7.taxonid
LEFT JOIN taxon t8 ON t7.parentid = t8.taxonid
LEFT JOIN taxon t9 ON t8.parentid = t9.taxonid
LEFT JOIN taxon t10 ON t9.parentid = t10.taxonid
LEFT JOIN taxon t11 ON t10.parentid = t11.taxonid
LEFT JOIN taxon t12 ON t11.parentid = t12.taxonid
LEFT JOIN taxon t13 ON t12.parentid = t13.taxonid
LEFT JOIN taxon t14 ON t13.parentid = t14.taxonid
LEFT JOIN taxon t15 ON t14.parentid = t15.taxonid
LEFT JOIN taxon t16 ON t15.parentid = t16.taxonid
LEFT JOIN taxon t17 ON t16.parentid = t17.taxonid
LEFT JOIN taxon t18 ON t17.parentid = t18.taxonid
LEFT JOIN taxon t19 ON t18.parentid = t19.taxonid
LEFT JOIN taxon t20 ON t19.parentid = t20.taxonid
LEFT JOIN taxon t21 ON t20.parentid = t21.taxonid
LEFT JOIN taxon t22 ON t21.parentid = t22.taxonid
LEFT JOIN taxon t23 ON t22.parentid = t23.taxonid
LEFT JOIN taxon t24 ON t23.parentid = t24.taxonid
LEFT JOIN taxon t25 ON t24.parentid = t25.taxonid
) ordered
) r ON r.id = t.taxonid
SET t.nodenumber = r.rn,
t.highestchildnodenumber = r.rn;
SELECT DISTINCT rankid FROM taxon ORDER BY rankid DESC;
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 260};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 250};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 240};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 230};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 220};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 200};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 190};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 180};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 170};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 160};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 150};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 140};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 130};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 120};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 110};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 100};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 90};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 80};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 70};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 60};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 50};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 40};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 30};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 10};
UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > %(rank)s
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = %(rank)s {'rank': 0}; |
grantfitzsimmons
left a comment
There was a problem hiding this comment.
Using a database copy provided by NHMD.
-
Try merging two other random taxon nodes in three to make sure no errors occur.
select count(*) from taxon t join taxon p on t.parentid = p.taxonid where t.nodenumber not between p.nodenumber and p.highestchildnodenumber;
Returns a count of 20833 before any changes.
- Run the
renumber_tree('taxon')function directly via the Django Python shell.
specify@ca4649c1501b:/opt/specify7$ ve/bin/python manage.py shell
Python 3.12.3 (main, Aug 14 2025, 17:47:21) [GCC 13.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
(InteractiveConsole)
>>> from specifyweb.backend.trees import extras
jango.db import connection
import logging
logger =>>> from django.db import connection
>>> import logging
>>> logger = logging.getLogger(__name__)
>>> extras.renumber_tree('taxon')
[03/Nov/2025 11:17:27] [DEBUG] [specifyweb.backend.trees.extras:748] [renumber_tree] running for taxon
[03/Nov/2025 11:17:27] [DEBUG] [specifyweb.backend.trees.extras:758] UPDATE taxon t JOIN taxontreedefitem d ON t.taxontreedefitemid = d.taxontreedefitemid SET t.rankid = d.rankid
[03/Nov/2025 11:17:29] [DEBUG] [specifyweb.backend.trees.extras:793] UPDATE taxon t
JOIN (
SELECT id, rn FROM (
SELECT
t0.taxonid AS id,
ROW_NUMBER() OVER (ORDER BY CONCAT_WS(',', LPAD(t25.taxonid, 12, '0'), LPAD(t24.taxonid, 12, '0'), LPAD(t23.taxonid, 12, '0'), LPAD(t22.taxonid, 12, '0'), LPAD(t21.taxonid, 12, '0'), LPAD(t20.taxonid, 12, '0'), LPAD(t19.taxonid, 12, '0'), LPAD(t18.taxonid, 12, '0'), LPAD(t17.taxonid, 12, '0'), LPAD(t16.taxonid, 12, '0'), LPAD(t15.taxonid, 12, '0'), LPAD(t14.taxonid, 12, '0'), LPAD(t13.taxonid, 12, '0'), LPAD(t12.taxonid, 12, '0'), LPAD(t11.taxonid, 12, '0'), LPAD(t10.taxonid, 12, '0'), LPAD(t9.taxonid, 12, '0'), LPAD(t8.taxonid, 12, '0'), LPAD(t7.taxonid, 12, '0'), LPAD(t6.taxonid, 12, '0'), LPAD(t5.taxonid, 12, '0'), LPAD(t4.taxonid, 12, '0'), LPAD(t3.taxonid, 12, '0'), LPAD(t2.taxonid, 12, '0'), LPAD(t1.taxonid, 12, '0'), LPAD(t0.taxonid, 12, '0')), t0.taxonid) AS rn
FROM taxon t0
LEFT JOIN taxon t1 ON t0.parentid = t1.taxonid
LEFT JOIN taxon t2 ON t1.parentid = t2.taxonid
LEFT JOIN taxon t3 ON t2.parentid = t3.taxonid
LEFT JOIN taxon t4 ON t3.parentid = t4.taxonid
LEFT JOIN taxon t5 ON t4.parentid = t5.taxonid
LEFT JOIN taxon t6 ON t5.parentid = t6.taxonid
LEFT JOIN taxon t7 ON t6.parentid = t7.taxonid
LEFT JOIN taxon t8 ON t7.parentid = t8.taxonid
LEFT JOIN taxon t9 ON t8.parentid = t9.taxonid
LEFT JOIN taxon t10 ON t9.parentid = t10.taxonid
LEFT JOIN taxon t11 ON t10.parentid = t11.taxonid
LEFT JOIN taxon t12 ON t11.parentid = t12.taxonid
LEFT JOIN taxon t13 ON t12.parentid = t13.taxonid
LEFT JOIN taxon t14 ON t13.parentid = t14.taxonid
LEFT JOIN taxon t15 ON t14.parentid = t15.taxonid
LEFT JOIN taxon t16 ON t15.parentid = t16.taxonid
LEFT JOIN taxon t17 ON t16.parentid = t17.taxonid
LEFT JOIN taxon t18 ON t17.parentid = t18.taxonid
LEFT JOIN taxon t19 ON t18.parentid = t19.taxonid
LEFT JOIN taxon t20 ON t19.parentid = t20.taxonid
LEFT JOIN taxon t21 ON t20.parentid = t21.taxonid
LEFT JOIN taxon t22 ON t21.parentid = t22.taxonid
LEFT JOIN taxon t23 ON t22.parentid = t23.taxonid
LEFT JOIN taxon t24 ON t23.parentid = t24.taxonid
LEFT JOIN taxon t25 ON t24.parentid = t25.taxonid
) ordered
) r ON r.id = t.taxonid
SET t.nodenumber = r.rn,
t.highestchildnodenumber = r.rn
[03/Nov/2025 11:17:39] [DEBUG] [specifyweb.backend.trees.extras:798] SELECT DISTINCT rankid FROM taxon ORDER BY rankid DESC
[03/Nov/2025 11:17:39] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 260
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 260
[03/Nov/2025 11:17:39] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 250
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 250
[03/Nov/2025 11:17:40] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 240
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 240
[03/Nov/2025 11:17:40] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 230
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 230
[03/Nov/2025 11:17:40] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 220
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 220
[03/Nov/2025 11:17:40] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 200
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 200
[03/Nov/2025 11:17:40] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 190
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 190
[03/Nov/2025 11:17:40] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 180
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 180
[03/Nov/2025 11:17:41] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 170
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 170
[03/Nov/2025 11:17:41] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 160
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 160
[03/Nov/2025 11:17:41] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 150
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 150
[03/Nov/2025 11:17:41] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 140
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 140
[03/Nov/2025 11:17:42] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 130
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 130
[03/Nov/2025 11:17:42] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 120
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 120
[03/Nov/2025 11:17:42] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 110
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 110
[03/Nov/2025 11:17:42] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 100
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 100
[03/Nov/2025 11:17:42] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 90
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 90
[03/Nov/2025 11:17:43] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 80
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 80
[03/Nov/2025 11:17:43] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 70
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 70
[03/Nov/2025 11:17:43] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 60
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 60
[03/Nov/2025 11:17:43] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 50
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 50
[03/Nov/2025 11:17:43] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 40
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 40
[03/Nov/2025 11:17:44] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 30
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 30
[03/Nov/2025 11:17:44] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 10
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 10
[03/Nov/2025 11:17:44] [DEBUG] [specifyweb.backend.trees.extras:814] UPDATE taxon t
JOIN (
SELECT parentid, MAX(highestchildnodenumber) AS hcnn
FROM taxon
WHERE rankid > 0
GROUP BY parentid
) sub ON sub.parentid = t.taxonid
SET t.highestchildnodenumber = sub.hcnn
WHERE t.rankid = 0
>>> extras.validate_tree_numbering('taxon')
[03/Nov/2025 11:17:46] [INFO] [specifyweb.backend.trees.extras:597] validating tree
>>> -
The command
renumber_tree('taxon')ran without errors. -
Query the database with:
select count(*) from taxon t join taxon p on t.parentid = p.taxonid where t.nodenumber not between p.nodenumber and p.highestchildnodenumber;
-
The query should return 0.
-
Through the Specify Taxon Tree UI, merge two taxon nodes (e.g., 'Palaeoniscidae (in Palaeonisciformes)' into 'Palaeoniscidae (in Palaeoniscoidea)'). See that the taxon merge occurred without errors.
-
See that the
validate_tree_numbering('taxon')command ran without errors
There was a problem hiding this comment.
Generally pretty solid! 👌 🚀
I tested two different database backups (a copy of NHMD and KUFish) as well as many custom-built trees (including trees in the same discipline) using MariaDB 11.7.2 and MariaDB 10.11.
I don't think there's anything inherently wrong or pressing with these changes1, although there does seem to be superfluous (unnecessary/extra) changes: using LPAD to enforce a numerical-style ordering, redefining the parent_joins and path_expr helper functions, disabling sql_safe_updates.
I don't think they have a functional impact on node numbering, although (especially with the LPAD changes) there may be other side-effects such as performance impacts or newly introduced breaking edge cases.
I'd love to see some automated tests for this as well!
Here's some testing artifacts:
Below is a graph of a simplified Taxon tree which had node numbering performed via main, and has 8 taxon records where the node number is not between the parent's node number and highest child node number.
The notation of the graph for each node is of the form nodeNumber <> highestChildNodeNumber.
Here is the same tree with node numbering performed on this branch (0e34b53):
And in Specify 6.8.03:
Footnotes
-
besides perhaps conceptually not setting the
sql_safe_updatesback to the prior value, though I doubt Specify or Django has a use case where it expects it to enabled, so practically it's likely not going to result in an error 🤞 ↩
emenslin
left a comment
There was a problem hiding this comment.
- Through the Specify Taxon Tree UI, merge two taxon nodes (e.g., 'Palaeoniscidae (in Palaeonisciformes)' into 'Palaeoniscidae (in Palaeoniscoidea)'). See that the taxon merge occurred without errors.
- Try merging two other random taxon nodes in three to make sure no errors occur.
Looks good, I didn't run into any errors
There was a problem hiding this comment.
- Through the Specify Taxon Tree UI, merge two taxon nodes (e.g., 'Palaeoniscidae (in Palaeonisciformes)' into 'Palaeoniscidae (in Palaeoniscoidea)'). See that the taxon merge occurred without errors.
- Try merging two other random taxon nodes in three to make sure no errors occur.
UX tests look great!
Development Testing below------------------------------
Query the database with:
select count(*)
from taxon t
join taxon p on t.parentid = p.taxonid
where t.nodenumber not between p.nodenumber and p.highestchildnodenumber;
- See that it returns a count of 0
Run the renumber_tree('taxon') function directly via the Django Python shell.
- The command renumber_tree('taxon') ran without errors.
Query the database with:
select count(*)
from taxon t
join taxon p on t.parentid = p.taxonid
where t.nodenumber not between p.nodenumber and p.highestchildnodenumber;
-
The query should return 0.
-
Through the Specify Taxon Tree UI, merge two taxon nodes (e.g., 'Palaeoniscidae (in Palaeonisciformes)' into 'Palaeoniscidae (in Palaeoniscoidea)'). See that the taxon merge occurred without errors.
Go back to the python shell and run the validate_tree_numbering('taxon') command
- See that the validate_tree_numbering('taxon') command ran without errors
No issues found while following the Dev testing instructions either. Nice work!
Database: vpl_specify_2025_10_17
MariaDB: v11.8
Fixes #7423
User-variable numbering doesn't seems reliable in MariaDB 11.8. MariaDB 11.8 is more aggressive about optimizing/merging derived tables. With
(@rn := @rn + 1)inside a subquery, the optimizer may reorder evaluation. ORDER BY inside a derived table is not guaranteed.The other issue with the existing renumber_tree function, used to taxon node renumbering, was due to the use of lexical numbering, instead of numerical numbering, used in the sorting of path expressions. This issue is more likely to appear when there is a node with lots of children.
Numerically Sorted:
2 < 10 < 100Lexicographically Sorted:
"10" < "100" < "2"Other improvements like logging and readability improved as well in this new solution.
Attempts at making a CTE query solution for node renumbering were made, but no attempts were success in running within a reasonable time.
Checklist
self-explanatory (or properly documented)
Testing instructions
Same as testing instructions in #7423 and #7457
UX Testing instructions
Running the merge:

Taxon Tree after merge:

Development Testing instructions
NOTE: When running locally, make sure to be using MariaDB version 11.8 to test this PR.
Query the database with:
See that it returns a count of
0Run the
renumber_tree('taxon')function directly via the Django Python shell.The command
renumber_tree('taxon')ran without errors.Query the database with:
The query should return 0.
Through the Specify Taxon Tree UI, merge two taxon nodes (e.g., 'Palaeoniscidae (in Palaeonisciformes)' into 'Palaeoniscidae (in Palaeoniscoidea)'). See that the taxon merge occurred without errors.
Go back to the python shell and run the
validate_tree_numbering('taxon')commandvalidate_tree_numbering('taxon')command ran without errors