Skip to content

Commit

Permalink
diff/push: fix MySQL 8 collation edge case verify failure
Browse files Browse the repository at this point in the history
MySQL 8 exhibits some unusual behavior in SHOW CREATE TABLE regarding
superfluous column-level CHARACTER SET and COLLATE clauses (where
"superfluous" means "equal to the table-level defaults"). This logic was
consistent in MySQL 5.7 and older, and in all versions of MariaDB; however,
MySQL 8 seems to use different and sometimes-inconsistent logic. This caused
false-positives for diff/push --verify failures beginning with Skeema v1.7.1
since it refactored some related logic in a03718c.

Essentially, the --verify option assumed the output of SHOW CREATE TABLE is
a stable "canonical" representation of the table: if you take the output and
execute it as a CREATE TABLE, and then run SHOW CREATE TABLE on it, you should
get back the same output. Unfortunately this is not always the case in MySQL 8
for tables that use a default collation which differs from the default for the
table's chosen default charset.

To fix --verify, this commit changes the logic away from being solely based on
SHOW CREATE TABLE comparisons, and instead it now does a full table diff
(which inherently first compares SHOW CREATE TABLE, and then compares all
table metadata if those do not match). This comparison is able to detect, but
intentionally ignore, differences in superfluous CHARACTER SET and COLLATE
clauses in MySQL 8.

This commit also adds a new integration test method, which failed on the old
--verify logic in MySQL 8 but now passes.

Fixes #184. Thank you to Etsy's database team for the report!
  • Loading branch information
evanelias committed Jun 1, 2022
1 parent 85c6236 commit 8bde55a
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 37 deletions.
90 changes: 53 additions & 37 deletions internal/applier/verifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,28 @@ func VerifyDiff(diff *tengo.SchemaDiff, t *Target) error {
return nil
}

// Build a set of statement modifiers that will yield matching CREATE TABLE
// statements in all edge cases.
// The goal of VerifyDiff is to confirm that the diff contains the correct and
// complete set of differences between all modified tables. We use a strict set
// of statement modifiers that will transform the initial state into an exact
// match of the desired state. This is all run in a workspace, so we can be
// more aggressive about which statement modifiers are used in generating the
// ALTER here. When the diff is actually used against the real live table
// later, a different looser set of modifiers is used which filters out some
// of the undesired cosmetic clauses by default.
mods := tengo.StatementModifiers{
NextAutoInc: tengo.NextAutoIncIgnore,
StrictIndexOrder: true, // needed since we must get the SHOW CREATE TABLEs to match
StrictCheckOrder: true, // ditto (only affects MariaDB)
StrictForeignKeyNaming: true, // ditto
StrictColumnDefinition: true, // ditto (only affects MySQL 8 edge cases)
AllowUnsafe: true, // needed since we're just running against the temp schema
SkipPreDropAlters: true, // needed to ignore DROP PARTITION generated just to speed up DROP TABLE
NextAutoInc: tengo.NextAutoIncAlways, // use whichever auto_increment is in the fs
Partitioning: tengo.PartitioningPermissive, // ditto with partitioning status
AllowUnsafe: true, // needed since we're just running against the temp schema
AlgorithmClause: "copy", // needed so the DB doesn't ignore attempts to re-order indexes
StrictIndexOrder: true, // needed since we want the SHOW CREATE TABLEs to match
StrictCheckOrder: true, // ditto (only affects MariaDB)
StrictForeignKeyNaming: true, // ditto
StrictColumnDefinition: true, // ditto (only affects MySQL 8 edge cases)
SkipPreDropAlters: true, // ignore DROP PARTITIONs that were only generated to speed up a DROP TABLE
Flavor: t.Instance.Flavor(),
}
if !mods.Flavor.Matches(tengo.FlavorMySQL55) {
// avoid having MySQL ignore index changes that are simply reordered, but only
// legal syntax in 5.6+
mods.AlgorithmClause = "copy"
if mods.Flavor.Matches(tengo.FlavorMySQL55) {
mods.AlgorithmClause = "" // MySQL 5.5 doesn't support ALGORITHM clause
}

// Gather CREATE and ALTER for modified tables, and put into a LogicalSchema,
Expand Down Expand Up @@ -81,12 +87,20 @@ func VerifyDiff(diff *tengo.SchemaDiff, t *Target) error {
return fmt.Errorf("Diff verification failure: %s", err.Error())
}

// Compare the "expected" version of tables ("to" side of diff from the
// filesystem) with the "actual" version (from the workspace after the
// generated ALTERs were run there)
// Compare the "expected" version of each table ("to" side of original diff,
// from the filesystem) with the "actual" version (from the workspace after the
// generated ALTERs were run there) by running a second diff. Verification
// is successful if this second diff has no clauses (tables completely and
// exactly match) or only a blank statement (suppressed by StatementModifiers).
// We use very strict StatementModifiers here, except StrictColumnDefinition
// must be omitted because MySQL 8 behaves inconsistently with respect to
// superfluous column-level charset/collation clauses in some specific edge-
// cases. (These MySQL 8 discrepancies are purely cosmetic, safe to ignore.)
mods.StrictColumnDefinition = false
mods.AlgorithmClause = ""
actualTables := wsSchema.TablesByName()
for name, toTable := range expected {
if err := verifyTable(toTable, actualTables[name], mods.Flavor); err != nil {
if err := verifyTable(toTable, actualTables[name], mods); err != nil {
return err
}
}
Expand All @@ -97,28 +111,30 @@ func wantVerify(diff *tengo.SchemaDiff, t *Target) bool {
return t.Dir.Config.GetBool("verify") && len(diff.TableDiffs) > 0 && !t.briefOutput()
}

// verifyTable confirms that a table has the expected CREATE TABLE statement
// and expected partitioning status. In comparing the CREATE TABLE statement, we
// must ignore differences in next-auto-inc value (which intentionally is often
// not updated in the filesystem) as well as the entirety of the partitioning
// clause (ditto).
func verifyTable(expected, actual *tengo.Table, flavor tengo.Flavor) error {
// Simply compare partitioning *status*
expectPartitioned := (expected.Partitioning != nil)
actualPartitioned := (actual.Partitioning != nil)
if expectPartitioned != actualPartitioned {
return fmt.Errorf("Diff verification failure on table %s\nEXPECTED PARTITIONING STATUS POST-ALTER: %t\nACTUAL PARTITIONING STATUS POST-ALTER: %t\nRun command again with --skip-verify if this discrepancy is safe to ignore", expected.Name, expectPartitioned, actualPartitioned)
// verifyTable confirms that a table has the expected structure by doing an
// additional diff. Typically this diff will return quickly based on SHOW CREATE
// TABLE matching, but if they don't match (as happens with some MySQL 8 edge-
// cases) it will do a full structural comparison of the tables' fields.
func verifyTable(expected, actual *tengo.Table, mods tengo.StatementModifiers) error {
makeVerifyError := func() error {
return fmt.Errorf("Diff verification failure on table %s\n\nEXPECTED POST-ALTER:\n%s\n\nACTUAL POST-ALTER:\n%s\n\nRun command again with --skip-verify if this discrepancy is safe to ignore", expected.Name, expected.CreateStatement, actual.CreateStatement)
}
expectCreate := expected.CreateStatement
actualCreate := actual.CreateStatement
if expectPartitioned {
expectCreate = expected.UnpartitionedCreateStatement(flavor)
actualCreate = actual.UnpartitionedCreateStatement(flavor)
alterClauses, supported := expected.Diff(actual)

// supported will be false if either table cannot be introspected properly, or
// if the SHOW CREATE TABLEs don't match but the diff logic can't figure out
// how/why. These situations would indicate bugs, so verification fails.
if !supported {
return makeVerifyError()
}
expectCreate, _ = tengo.ParseCreateAutoInc(expectCreate)
actualCreate, _ = tengo.ParseCreateAutoInc(actualCreate)
if expectCreate != actualCreate {
return fmt.Errorf("Diff verification failure on table %s\n\nEXPECTED POST-ALTER:\n%s\n\nACTUAL POST-ALTER:\n%s\n\nRun command again with --skip-verify if this discrepancy is safe to ignore", expected.Name, expectCreate, actualCreate)

// If any clauses were emitted, fail verification if any are non-blank. Blank
// clauses are fine tho, as they are expected in a few cases, such as partition
// list differences or MySQL 8 superfluous charset/collate clause differences.
for _, clause := range alterClauses {
if clause.Clause(mods) != "" {
return makeVerifyError()
}
}
return nil
}
45 changes: 45 additions & 0 deletions skeema_cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1619,3 +1619,48 @@ func (s SkeemaIntegrationSuite) TestStripPartitioning(t *testing.T) {
cfg = s.handleCommand(t, CodeSuccess, ".", "skeema pull --strip-partitioning")
assertUnpartitioned(cfg)
}

// TestCharsetCollate confirms that no diff verification failures occur with
// various permutations of charset and collation on columns. This is a common
// bug/regression source, particularly with MySQL 8 which has different SHOW
// CREATE TABLE logic than prior versions of MySQL. Similar test logic in
// internal/tengo covers introspection accuracy of these situations, but not
// the full end-to-end logic used in internal/applier's diff verification.
func (s SkeemaIntegrationSuite) TestCharsetCollate(t *testing.T) {
s.sourceSQL(t, "charset-collate.sql")
s.handleCommand(t, CodeSuccess, ".", "skeema init --dir product -h %s -P %d --schema product ", s.d.Instance.Host, s.d.Instance.Port)

// Slightly adjust the four many_permutations tables in the sql file
for i := 1; i <= 4; i++ {
filename := fmt.Sprintf("product/many_permutations%d.sql", i)
origContents := fs.ReadTestFile(t, filename)
lines := strings.Split(origContents, "\n")
var updates int
for j, line := range lines {
// In charset-collate.sql, first col of each table is defined without any
// explicit charset or collation, using table-level defaults. init uses
// SHOW CREATE TABLE which will potentially add charset/collate back, but
// we want to re-strip them to test handling of this situation, especially
// in MySQL 8 which handles it oddly.
if strings.HasPrefix(strings.TrimSpace(line), "`a`") {
lines[j] = " `a` char(10),"
updates++
} else if strings.HasPrefix(line, ") ") {
lines[j-1] += ","
lines[j] = " `x` int"
lines[j+1] = line
lines = append(lines, "")
updates++
break
}
}
newContents := strings.Join(lines, "\n")
if origContents == newContents || updates != 2 {
t.Fatalf("Failed to adjust test file contents as expected: update count %d, new contents:\n%s", updates, newContents)
}
fs.WriteTestFile(t, filename, newContents)
}

// Diff should report differences found but not fatal error
s.handleCommand(t, CodeDifferencesFound, ".", "skeema diff --skip-lint")
}
41 changes: 41 additions & 0 deletions testdata/charset-collate.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
USE product;

CREATE TABLE many_permutations1 (
a char(10),
b char(10) CHARACTER SET latin1,
c char(10) COLLATE latin1_swedish_ci,
d char(10) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
e char(10) COLLATE latin1_general_ci,
f char(10) CHARACTER SET utf8mb4,
g char(10) COLLATE utf8mb4_general_ci
) DEFAULT CHARSET=latin1;

CREATE TABLE many_permutations2 (
a char(10),
b char(10) CHARACTER SET latin1,
c char(10) COLLATE latin1_swedish_ci,
d char(10) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
e char(10) COLLATE latin1_general_ci,
f char(10) CHARACTER SET utf8mb4,
g char(10) COLLATE utf8mb4_general_ci
) DEFAULT CHARSET=latin1 COLLATE latin1_general_ci;

CREATE TABLE many_permutations3 (
a char(10),
b char(10) CHARACTER SET latin1,
c char(10) COLLATE latin1_swedish_ci,
d char(10) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
e char(10) COLLATE utf8_general_ci,
f char(10) CHARACTER SET utf8mb3,
g char(10) COLLATE utf8_unicode_ci
) DEFAULT CHARSET=utf8;

CREATE TABLE many_permutations4 (
a char(10),
b char(10) CHARACTER SET latin1,
c char(10) COLLATE latin1_swedish_ci,
d char(10) CHARACTER SET latin1 COLLATE latin1_swedish_ci,
e char(10) COLLATE utf8_general_ci,
f char(10) CHARACTER SET utf8mb3,
g char(10) COLLATE utf8_unicode_ci
) DEFAULT CHARSET=utf8mb3 COLLATE utf8_unicode_ci;

0 comments on commit 8bde55a

Please sign in to comment.