From 26220f8437c7f64151ff1a3d726f547d2c1c211b Mon Sep 17 00:00:00 2001 From: sonika-shah <58761340+sonika-shah@users.noreply.github.com> Date: Mon, 27 Apr 2026 18:55:49 +0530 Subject: [PATCH 1/3] fix: strip stale relatedTerms from glossary term version snapshots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends PR #26586. That fix cleaned glossary_term_entity but not the version snapshots in entity_extension, so GET /versions/{v} still 500s on any pre-1.13 term whose relatedTerms had legacy shape: UnrecognizedPropertyException: Unrecognized field "id" (class TermRelation, has only "term" and "relationType") Predicate matches only legacy snapshots — first item has bare `id` (EntityReference) instead of `term` (TermRelation). Skips correctly- shaped snapshots written on 1.13+. Stripping is safe: relatedTerms is loaded from entity_relationship at read time post-#25886. --- .../native/1.13.0/mysql/postDataMigrationSQLScript.sql | 7 +++++++ .../native/1.13.0/postgres/postDataMigrationSQLScript.sql | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql index c8eb5f06d6bc..2bd92bd6509f 100644 --- a/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql +++ b/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql @@ -80,6 +80,13 @@ UPDATE glossary_term_entity SET json = JSON_REMOVE(json, '$.relatedTerms') WHERE JSON_EXTRACT(json, '$.relatedTerms') IS NOT NULL; +-- Same fix as above, for version snapshots used by GET /versions/{v}. +-- Only legacy snapshots: first item has bare `id` (EntityReference) instead of `term` (TermRelation). +UPDATE entity_extension +SET json = JSON_REMOVE(json, '$.relatedTerms') +WHERE extension LIKE 'glossaryTerm.version.%' + AND JSON_CONTAINS_PATH(json, 'one', '$.relatedTerms[0].id'); + -- Backfill conceptMappings for existing glossary terms UPDATE glossary_term_entity SET json = JSON_SET(COALESCE(json, '{}'), '$.conceptMappings', JSON_ARRAY()) diff --git a/bootstrap/sql/migrations/native/1.13.0/postgres/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.13.0/postgres/postDataMigrationSQLScript.sql index 80e0af41970a..55e88aa005ee 100644 --- a/bootstrap/sql/migrations/native/1.13.0/postgres/postDataMigrationSQLScript.sql +++ b/bootstrap/sql/migrations/native/1.13.0/postgres/postDataMigrationSQLScript.sql @@ -82,6 +82,13 @@ UPDATE glossary_term_entity SET json = (json::jsonb - 'relatedTerms')::json WHERE jsonb_exists(json::jsonb, 'relatedTerms'); +-- Same fix as above, for version snapshots used by GET /versions/{v}. +-- Only legacy snapshots: first item has bare `id` (EntityReference) instead of `term` (TermRelation). +UPDATE entity_extension +SET json = (json::jsonb - 'relatedTerms')::json +WHERE extension LIKE 'glossaryTerm.version.%' + AND ((json::jsonb)->'relatedTerms'->0) ? 'id'; + -- Backfill conceptMappings for existing glossary terms UPDATE glossary_term_entity SET json = jsonb_set(COALESCE(json::jsonb, '{}'::jsonb), '{conceptMappings}', '[]'::jsonb) From 85a27894a955758195d731c3eae529277e49fe1b Mon Sep 17 00:00:00 2001 From: sonika-shah <58761340+sonika-shah@users.noreply.github.com> Date: Wed, 29 Apr 2026 03:11:20 +0530 Subject: [PATCH 2/3] v1130: transform legacy relatedTerms in version snapshots instead of stripping Replace the SQL UPDATE that stripped relatedTerms from entity_extension version snapshots with a Java migration that wraps each legacy EntityReference[] item as TermRelation[] (term + relationType="relatedTo"). Version reads deserialize entity_extension JSON directly without rehydrating from entity_relationship, so a strip would lose history per version. The transform preserves it. Designed for tables with millions of rows: keyset paginated by PK (id, extension), batched updates, idempotent on re-run. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../mysql/postDataMigrationSQLScript.sql | 2 + .../postgres/postDataMigrationSQLScript.sql | 8 +- .../migration/mysql/v1130/Migration.java | 5 + .../migration/postgres/v1130/Migration.java | 5 + .../migration/utils/v1130/MigrationUtil.java | 182 ++++++++++++++++++ 5 files changed, 196 insertions(+), 6 deletions(-) diff --git a/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql index 2bd92bd6509f..e2b04808951b 100644 --- a/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql +++ b/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql @@ -79,6 +79,8 @@ WHERE NOT EXISTS ( UPDATE glossary_term_entity SET json = JSON_REMOVE(json, '$.relatedTerms') WHERE JSON_EXTRACT(json, '$.relatedTerms') IS NOT NULL; +-- entity_extension version snapshots: handled by Java migration +-- migrateGlossaryTermVersionRelatedTermsToTermRelation (transforms in place to preserve history). -- Same fix as above, for version snapshots used by GET /versions/{v}. -- Only legacy snapshots: first item has bare `id` (EntityReference) instead of `term` (TermRelation). diff --git a/bootstrap/sql/migrations/native/1.13.0/postgres/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.13.0/postgres/postDataMigrationSQLScript.sql index 55e88aa005ee..95941e470e7a 100644 --- a/bootstrap/sql/migrations/native/1.13.0/postgres/postDataMigrationSQLScript.sql +++ b/bootstrap/sql/migrations/native/1.13.0/postgres/postDataMigrationSQLScript.sql @@ -82,12 +82,8 @@ UPDATE glossary_term_entity SET json = (json::jsonb - 'relatedTerms')::json WHERE jsonb_exists(json::jsonb, 'relatedTerms'); --- Same fix as above, for version snapshots used by GET /versions/{v}. --- Only legacy snapshots: first item has bare `id` (EntityReference) instead of `term` (TermRelation). -UPDATE entity_extension -SET json = (json::jsonb - 'relatedTerms')::json -WHERE extension LIKE 'glossaryTerm.version.%' - AND ((json::jsonb)->'relatedTerms'->0) ? 'id'; +-- entity_extension version snapshots: handled by Java migration +-- migrateGlossaryTermVersionRelatedTermsToTermRelation (transforms in place to preserve history). -- Backfill conceptMappings for existing glossary terms UPDATE glossary_term_entity diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v1130/Migration.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v1130/Migration.java index 119af6d27c1d..334d3caa8b81 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v1130/Migration.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v1130/Migration.java @@ -25,5 +25,10 @@ public void runDataMigration() { + "Webhook authentication may not work correctly until re-saved.", e); } + try { + MigrationUtil.migrateGlossaryTermVersionRelatedTermsToTermRelation(handle); + } catch (Exception e) { + LOG.error("v1130 glossaryTerm version relatedTerms transform failed; re-run to retry.", e); + } } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v1130/Migration.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v1130/Migration.java index fb9e71782baf..f9e160b45e5b 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v1130/Migration.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v1130/Migration.java @@ -25,5 +25,10 @@ public void runDataMigration() { + "Webhook authentication may not work correctly until re-saved.", e); } + try { + MigrationUtil.migrateGlossaryTermVersionRelatedTermsToTermRelation(handle); + } catch (Exception e) { + LOG.error("v1130 glossaryTerm version relatedTerms transform failed; re-run to retry.", e); + } } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/utils/v1130/MigrationUtil.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/utils/v1130/MigrationUtil.java index 341a861ad081..9ee0fa91b9c2 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/migration/utils/v1130/MigrationUtil.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/utils/v1130/MigrationUtil.java @@ -1,11 +1,13 @@ package org.openmetadata.service.migration.utils.v1130; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; import java.util.List; import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.jdbi.v3.core.Handle; +import org.jdbi.v3.core.statement.PreparedBatch; import org.openmetadata.schema.dataInsight.custom.DataInsightCustomChart; import org.openmetadata.schema.entity.events.SubscriptionDestination; import org.openmetadata.schema.utils.JsonUtils; @@ -129,4 +131,184 @@ public static void migrateWebhookSecretKeyToAuthType(Handle handle) { LOG.info("Migrated {} event subscriptions with secretKey to authType", migratedCount); } + + private static final String SELECT_GLOSSARY_VERSIONS_MYSQL = + "SELECT id, extension, json FROM entity_extension " + + "WHERE extension LIKE 'glossaryTerm.version.%' " + + "AND JSON_CONTAINS_PATH(json, 'one', '$.relatedTerms[0].id') " + + "AND (id > :id OR (id = :id AND extension > :extension)) " + + "ORDER BY id, extension LIMIT :pageSize"; + + private static final String SELECT_GLOSSARY_VERSIONS_POSTGRES = + "SELECT id, extension, json::text AS json FROM entity_extension " + + "WHERE extension LIKE 'glossaryTerm.version.%' " + + "AND jsonb_exists((json::jsonb)->'relatedTerms'->0, 'id') " + + "AND (id > :id OR (id = :id AND extension > :extension)) " + + "ORDER BY id, extension LIMIT :pageSize"; + + private static final String UPDATE_VERSION_JSON_MYSQL = + "UPDATE entity_extension SET json = :json WHERE id = :id AND extension = :extension"; + + private static final String UPDATE_VERSION_JSON_POSTGRES = + "UPDATE entity_extension SET json = :json::jsonb WHERE id = :id AND extension = :extension"; + + private static final int VERSION_RELATED_TERMS_PAGE_SIZE = 500; + private static final String RELATED_TERMS = "relatedTerms"; + private static final String CHANGE_DESCRIPTION = "changeDescription"; + + /** + * Wraps legacy {@code EntityReference[]} relatedTerms as {@code TermRelation[]} in + * glossaryTerm version snapshots — both top-level and inside changeDescription diff strings. + * Version reads bypass entity_relationship rehydration, so a strip would lose history. Idempotent. + */ + public static void migrateGlossaryTermVersionRelatedTermsToTermRelation(Handle handle) { + LOG.info("v1130: transforming legacy relatedTerms in glossaryTerm version snapshots"); + boolean isMySQL = Boolean.TRUE.equals(DatasourceConfig.getInstance().isMySQL()); + String selectSql = isMySQL ? SELECT_GLOSSARY_VERSIONS_MYSQL : SELECT_GLOSSARY_VERSIONS_POSTGRES; + String updateSql = isMySQL ? UPDATE_VERSION_JSON_MYSQL : UPDATE_VERSION_JSON_POSTGRES; + + String cursorId = ""; + String cursorExtension = ""; + long totalTransformed = 0; + long totalSkipped = 0; + int pageNumber = 0; + boolean morePages = true; + + while (morePages) { + List> rows = + handle + .createQuery(selectSql) + .bind("id", cursorId) + .bind("extension", cursorExtension) + .bind("pageSize", VERSION_RELATED_TERMS_PAGE_SIZE) + .mapToMap() + .list(); + + if (rows.isEmpty()) { + break; + } + pageNumber++; + morePages = rows.size() == VERSION_RELATED_TERMS_PAGE_SIZE; + + PreparedBatch batch = handle.prepareBatch(updateSql); + int batchedUpdates = 0; + for (Map row : rows) { + String id = String.valueOf(row.get("id")); + String extension = String.valueOf(row.get("extension")); + String jsonStr = String.valueOf(row.get("json")); + + cursorId = id; + cursorExtension = extension; + + try { + ObjectNode root = (ObjectNode) JsonUtils.readTree(jsonStr); + if (transformSnapshot(root)) { + batch.bind("id", id).bind("extension", extension).bind("json", root.toString()).add(); + batchedUpdates++; + } + } catch (Exception e) { + totalSkipped++; + LOG.warn( + "Skipping malformed glossaryTerm version snapshot id={} extension={}: {}", + id, + extension, + e.getMessage()); + } + } + + if (batchedUpdates > 0) { + batch.execute(); + totalTransformed += batchedUpdates; + } + + LOG.info( + "v1130 relatedTerms transform: page={} transformed={} skipped={} cursor=({},{})", + pageNumber, + totalTransformed, + totalSkipped, + cursorId, + cursorExtension); + } + + LOG.info( + "v1130 relatedTerms transform done: pages={} transformed={} skipped={}", + pageNumber, + totalTransformed, + totalSkipped); + } + + private static boolean transformSnapshot(ObjectNode root) { + boolean changed = false; + ArrayNode wrappedTopLevel = wrapLegacyRelatedTerms(root.get(RELATED_TERMS)); + if (wrappedTopLevel != null) { + root.set(RELATED_TERMS, wrappedTopLevel); + changed = true; + } + JsonNode changeDescription = root.get(CHANGE_DESCRIPTION); + if (changeDescription instanceof ObjectNode cd) { + changed |= rewriteChangeDescriptionEntries(cd, "fieldsAdded", "newValue"); + changed |= rewriteChangeDescriptionEntries(cd, "fieldsDeleted", "oldValue"); + changed |= rewriteChangeDescriptionEntries(cd, "fieldsUpdated", "newValue"); + changed |= rewriteChangeDescriptionEntries(cd, "fieldsUpdated", "oldValue"); + } + return changed; + } + + /** Wraps legacy items as TermRelation; returns null when nothing needs wrapping. */ + private static ArrayNode wrapLegacyRelatedTerms(JsonNode array) { + if (array == null || !array.isArray() || array.isEmpty()) { + return null; + } + ArrayNode wrapped = JsonUtils.getObjectMapper().createArrayNode(); + boolean changed = false; + for (JsonNode item : array) { + if (isWrappedTermRelation(item)) { + wrapped.add(item); + } else { + ObjectNode tr = JsonUtils.getObjectMapper().createObjectNode(); + tr.set("term", item); + tr.put("relationType", "relatedTo"); + wrapped.add(tr); + changed = true; + } + } + return changed ? wrapped : null; + } + + private static boolean isWrappedTermRelation(JsonNode item) { + return item != null && item.isObject() && item.has("term"); + } + + /** Rewrites legacy relatedTerms items inside changeDescription diff JSON strings. */ + private static boolean rewriteChangeDescriptionEntries( + ObjectNode changeDescription, String bucket, String valueField) { + JsonNode entries = changeDescription.get(bucket); + if (entries == null || !entries.isArray()) { + return false; + } + boolean anyChanged = false; + for (JsonNode entry : entries) { + if (!(entry instanceof ObjectNode entryObj)) { + continue; + } + JsonNode nameNode = entryObj.get("name"); + if (nameNode == null || !RELATED_TERMS.equals(nameNode.asText())) { + continue; + } + JsonNode valueNode = entryObj.get(valueField); + if (valueNode == null || !valueNode.isTextual() || valueNode.asText().isEmpty()) { + continue; + } + try { + JsonNode parsed = JsonUtils.readTree(valueNode.asText()); + ArrayNode wrapped = wrapLegacyRelatedTerms(parsed); + if (wrapped != null) { + entryObj.put(valueField, wrapped.toString()); + anyChanged = true; + } + } catch (Exception ignored) { + } + } + return anyChanged; + } } From 93250de44445bbabebbf4dbce8664d1c2110241e Mon Sep 17 00:00:00 2001 From: sonika-shah <58761340+sonika-shah@users.noreply.github.com> Date: Wed, 29 Apr 2026 03:18:32 +0530 Subject: [PATCH 3/3] fix(mysql): remove leftover entity_extension strip in v1130 post-migration The previous edit added the comment pointer above the legacy UPDATE entity_extension SET json = JSON_REMOVE(... '$.relatedTerms') block without removing it. On MySQL that SQL would have stripped relatedTerms from version snapshots BEFORE the Java transform runs, defeating the migration and losing related-term history. Postgres was already correct. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../native/1.13.0/mysql/postDataMigrationSQLScript.sql | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql index e2b04808951b..6566238de001 100644 --- a/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql +++ b/bootstrap/sql/migrations/native/1.13.0/mysql/postDataMigrationSQLScript.sql @@ -79,16 +79,10 @@ WHERE NOT EXISTS ( UPDATE glossary_term_entity SET json = JSON_REMOVE(json, '$.relatedTerms') WHERE JSON_EXTRACT(json, '$.relatedTerms') IS NOT NULL; + -- entity_extension version snapshots: handled by Java migration -- migrateGlossaryTermVersionRelatedTermsToTermRelation (transforms in place to preserve history). --- Same fix as above, for version snapshots used by GET /versions/{v}. --- Only legacy snapshots: first item has bare `id` (EntityReference) instead of `term` (TermRelation). -UPDATE entity_extension -SET json = JSON_REMOVE(json, '$.relatedTerms') -WHERE extension LIKE 'glossaryTerm.version.%' - AND JSON_CONTAINS_PATH(json, 'one', '$.relatedTerms[0].id'); - -- Backfill conceptMappings for existing glossary terms UPDATE glossary_term_entity SET json = JSON_SET(COALESCE(json, '{}'), '$.conceptMappings', JSON_ARRAY())