Support DB servers with lower_case_table_names=1 or 2 (Windows / MacOS)

This commit adds initial support for database servers with non-zero values for lower_case_table_names ("LCTN" below), typically meaning a database instance running natively on Windows or MacOS. These OSes use case-insensitive file- systems by default, which previously caused problems in Skeema if any table names included uppercase characters in their .sql CREATE statement, or any schema names included uppercase characters in .skeema files. The following changes are included in this commit: * In all commands, Skeema can now properly map *.sql CREATE statements to introspected database objects even if the server uses LCTN=1 or LCTN=2 and the objects have mixed-case names. * When using workspace=docker, if the real database server uses LCTN=1, any newly-created Dockerized database instance will be configured to use LCTN=1 as well. (No equivalent behavior is needed for LCTN=2, since that mode retains table name casing.) * With LCTN=1 or LCTN=2, all operations will confirm that there are no case- insensitive duplicate table names among the tables in a single subdirectory's *.sql files. For example, if a directory contains both CREATE TABLE Foo and CREATE TABLE foo, this is illegal with LCTN=1 or LCTN=2 and an error is now thrown before the operation is even attempted. With LCTN=0, this situation is still permitted (these are distinct tables case-sensitive i.e. on a Linux database server) and no error is thrown. * When using `skeema diff` or `skeema push` in a sharded environment (the .skeema file configures the subdir to map to multiple hosts in the same environment name), all shards must have the same LCTN value. If not, Skeema detects this situation and throws a fatal error. * Integration test coverage for LCTN=1 is included. By default it is run in CI but not locally; to enable locally set env var SKEEMA_TEST_LCTN=true. This adds to test execution time since a separate Dockerized database instance must be created for this test. * Integration test coverage for LCTN=2 is included. By default it is not run, as it requires a MacOS host to be possible to test. To enable locally, set env var SKEEMA_TEST_LCTN=true on a MacOS host. This adds to test execution time since a separate Dockerized database instance must be created for this test, and it must bind-mount a host path for the DB's data directory to use a case-insensitive data volume. Fixes #65. Several additional changes specific to views are omitted here, since Skeema Community Edition does not support views. In the upcoming v1.8 release, the Premium Edition CLI will include all changes from this commit, plus additional views-specific fixes and integration testing. On MariaDB in particular, when using LCTN=2 and a mixed-case schema name, SHOW CREATE VIEW has some odd bugs; the Premium Edition CLI properly detects this and uses a workaround to ensure correct behavior even in that situation.
skeema · Apr 18, 2022 · bdc279e · bdc279e
1 parent 2d3d7c4
commit bdc279e
Show file tree

Hide file tree

Showing 12 changed files with 572 additions and 69 deletions.
diff --git a/cmd_pull.go b/cmd_pull.go
@@ -153,6 +153,14 @@ func pullSchemaDir(dir *fs.Dir, instance *tengo.Instance) (schemaNames []string,
 // more schemas. A slice of handled schema names is returned, along with any
 // error encountered.
 func pullLogicalSchema(dir *fs.Dir, instance *tengo.Instance, logicalSchema *fs.LogicalSchema) (schemaNames []string, err error) {
+	// With non-zero lower_case_table_names, force names to lowercase as needed in
+	// logicalSchema, so that statements can be correctly linked to objects
+	if lctn := instance.NameCaseMode(); lctn > tengo.NameCaseAsIs {
+		if err := logicalSchema.LowerCaseNames(lctn); err != nil {
+			return nil, err
+		}
+	}
+
 	if logicalSchema.Name != "" {
 		schemaNames = []string{logicalSchema.Name}
 	} else if schemaNames, err = dir.SchemaNames(instance); err != nil {

diff --git a/internal/applier/target.go b/internal/applier/target.go
@@ -203,6 +203,19 @@ func targetsForLogicalSchema(logicalSchema *fs.LogicalSchema, dir *fs.Dir, insta
 		return nil, len(instances)
 	}
 
+	// Confirm all instances have the same lower_case_table_names; mixing isn't
+	// supported within a single operation
+	if len(instances) > 1 && instances[0].NameCaseMode() != tengo.NameCaseUnknown {
+		lctn := instances[0].NameCaseMode()
+		for _, other := range instances[1:] {
+			if compare := other.NameCaseMode(); compare != tengo.NameCaseUnknown && compare != lctn {
+				log.Errorf("Skipping %s: all database servers mapped by the same subdirectory and environment must have the same value for lower_case_table_names.", dir)
+				log.Errorf("Instance %s has lower_case_table_names=%d, but instance %s has lower_case_table_names=%d.", instances[0], lctn, other, compare)
+				return nil, len(instances)
+			}
+		}
+	}
+
 	// Obtain a *tengo.Schema representation of the dir's *.sql files from a
 	// workspace
 	opts, err := workspace.OptionsForDir(dir, instances[0])

diff --git a/internal/dumper/dumper.go b/internal/dumper/dumper.go
@@ -7,6 +7,7 @@ package dumper
 import (
 	"errors"
 	"fmt"
+	"runtime"
 	"strings"
 
 	log "github.com/sirupsen/logrus"
@@ -65,26 +66,17 @@ func rewriteSQLFile(file *fs.TokenizedSQLFile) error {
 // a result of the dump operation. The directory's parsed values are modified
 // in-place by this function, but nothing is written to the filesystem yet.
 func modifiedFiles(schema *tengo.Schema, dir *fs.Dir, opts Options) []*fs.TokenizedSQLFile {
-	fileMap := make(map[string]*fs.TokenizedSQLFile) // filePath string -> tokenized file
-	fileDirty := make(map[string]bool)               // filePath string -> bool
-
 	// TODO: handle dirs that contain multiple logical schemas by name
 	var logicalSchema *fs.LogicalSchema
 	if len(dir.LogicalSchemas) > 0 {
 		logicalSchema = dir.LogicalSchemas[0]
 	} else {
 		logicalSchema = &fs.LogicalSchema{}
 	}
-	keySeen := make(map[tengo.ObjectKey]bool, len(logicalSchema.Creates))
-	for key, stmt := range logicalSchema.Creates {
-		keySeen[key] = true
-		if filePath := stmt.FromFile.String(); fileMap[filePath] == nil {
-			fileMap[filePath] = stmt.FromFile
-		}
-	}
+	fm := newFileMap(logicalSchema)
 
-	for key, object := range schema.Objects() {
-		delete(keySeen, key) // filter keySeen to just be things that *aren't* in the DB
+	dbObjects := schema.Objects()
+	for key, object := range dbObjects {
 		if opts.shouldIgnore(object) {
 			continue
 		}
@@ -126,59 +118,42 @@ func modifiedFiles(schema *tengo.Schema, dir *fs.Dir, opts Options) []*fs.Tokeni
 			continue
 		}
 
-		var filePath string
-		if stmt == nil {
-			filePath = fs.PathForObject(dir.Path, key.Name)
-		} else {
-			filePath = stmt.FromFile.Path()
-		}
-		fileDirty[filePath] = true
-		if fileMap[filePath] == nil {
-			fileMap[filePath] = &fs.TokenizedSQLFile{
-				SQLFile: fs.SQLFile{
-					Dir:      dir.Path,
-					FileName: fs.FileNameForObject(key.Name),
-				},
-			}
-		}
-
-		if opts.CountOnly {
-			continue // Don't mutate stmt if CountOnly
-		}
-
-		// Append the statement if it doesn't exist in fs yet; otherwise update it.
+		// If we reach this point, we need to mark the statement's file as dirty, and
+		// update/append its in-memory representation unless CountOnly was requested.
 		// We "cheat" by potentially omitting some fs fields and potentially including
 		// DELIMITER wrappers in a single Statement.Text, but this still works fine
 		// for rewriting the file later.
-		if stmt == nil {
-			f := fileMap[filePath]
-			stmt = &fs.Statement{
-				Type:       fs.StatementTypeCreate,
-				ObjectType: key.Type,
-				ObjectName: key.Name,
-				FromFile:   f,
+		if stmt != nil { // statement already in fs
+			fm.markDirty(stmt.FromFile)
+			if !opts.CountOnly {
+				stmt.Text = canonicalCreate
+			}
+		} else { // statement not in fs, needs to be appended to file
+			f := fm.file(dir, object)
+			fm.markDirty(f)
+			if !opts.CountOnly {
+				f.Statements = append(f.Statements, &fs.Statement{
+					Type:       fs.StatementTypeCreate,
+					ObjectType: key.Type,
+					ObjectName: key.Name,
+					FromFile:   f,
+					Text:       canonicalCreate,
+				})
 			}
-			f.Statements = append(f.Statements, stmt)
 		}
-		stmt.Text = canonicalCreate
 	}
 
-	// Remaining keys in keySeen do not exist in db, so remove them
-	for key := range keySeen {
-		if !opts.shouldIgnore(key) {
-			stmt := logicalSchema.Creates[key]
+	// Handle create statements that are in FS but do not exist in DB
+	for key, stmt := range logicalSchema.Creates {
+		if _, inDB := dbObjects[key]; !inDB && !opts.shouldIgnore(key) {
+			fm.markDirty(stmt.FromFile)
 			if !opts.CountOnly {
 				stmt.Remove()
 			}
-			fileDirty[stmt.FromFile.String()] = true
 		}
 	}
 
-	var result []*fs.TokenizedSQLFile
-	for filePath := range fileDirty {
-		result = append(result, fileMap[filePath])
-	}
-	return result
+	return fm.dirtyFiles()
 }
 
 // AddDelimiter takes the supplied string and appends a delimiter to the end.
@@ -205,3 +180,64 @@ func verifyCanParse(key tengo.ObjectKey, statementBody string) bool {
 	}
 	return ok
 }
+
+// uniquePath converts its arg to lower-case on MacOS or Windows, or returns it
+// unchanged on any other OS. This is useful for normalizing map keys to ensure
+// correct dumper behavior on systems that typically have case-insensitive
+// filesystems.
+func uniquePath(p string) string {
+	if runtime.GOOS == "darwin" || runtime.GOOS == "windows" {
+		return strings.ToLower(p)
+	}
+	return p
+}
+
+type fileMap struct {
+	all   map[string]*fs.TokenizedSQLFile // all files, normalized filePath string -> tokenized sql file
+	dirty map[string]bool                 // dirty files, normalized filePath string -> bool
+}
+
+func newFileMap(logicalSchema *fs.LogicalSchema) *fileMap {
+	fm := &fileMap{
+		all:   make(map[string]*fs.TokenizedSQLFile, len(logicalSchema.Creates)),
+		dirty: make(map[string]bool),
+	}
+	// track all unique files in the logical schema's CREATE statements
+	for _, stmt := range logicalSchema.Creates {
+		fm.all[uniquePath(stmt.FromFile.Path())] = stmt.FromFile
+	}
+	return fm
+}
+
+func (fm *fileMap) file(dir *fs.Dir, keyer tengo.ObjectKeyer) *fs.TokenizedSQLFile {
+	objName := keyer.ObjectKey().Name
+	filePath := fs.PathForObject(dir.Path, objName)
+
+	// If the file at this path is already tracked, return it
+	if f := fm.all[uniquePath(filePath)]; f != nil {
+		return f
+	}
+
+	// Otherwise, instantiate a new file, track it, and return it
+	f := &fs.TokenizedSQLFile{
+		SQLFile: fs.SQLFile{
+			Dir:      dir.Path,
+			FileName: fs.FileNameForObject(objName),
+		},
+	}
+	fm.all[uniquePath(filePath)] = f
+	return f
+}
+
+func (fm *fileMap) markDirty(f *fs.TokenizedSQLFile) {
+	fm.dirty[uniquePath(f.Path())] = true
+}
+
+func (fm *fileMap) dirtyFiles() (result []*fs.TokenizedSQLFile) {
+	// contents of fm.dirty have already been run through uniquePath(); ditto for
+	// keys of fm.all; so no need to re-run uniquePath() here
+	for filePath := range fm.dirty {
+		result = append(result, fm.all[filePath])
+	}
+	return result
+}
diff --git a/internal/fs/dir.go b/internal/fs/dir.go
@@ -70,6 +70,60 @@ func (logicalSchema *LogicalSchema) AddStatement(stmt *Statement) error {
 	}
 }
 
+// LowerCaseNames adjusts logicalSchema in-place such that its object names are
+// forced to lower-case as appropriate for the supplied NameCaseMode.
+// An error will be returned if case-insensitivity would result in duplicate
+// objects with the same name and type.
+func (logicalSchema *LogicalSchema) LowerCaseNames(mode tengo.NameCaseMode) error {
+	switch mode {
+	case tengo.NameCaseLower: // lower_case_table_names=1
+		// Schema names and table names are forced lowercase in this mode
+		logicalSchema.Name = strings.ToLower(logicalSchema.Name)
+		newCreates := make(map[tengo.ObjectKey]*Statement, len(logicalSchema.Creates))
+		for k, stmt := range logicalSchema.Creates {
+			if k.Type == tengo.ObjectTypeTable {
+				k.Name = strings.ToLower(k.Name)
+				stmt.ObjectName = strings.ToLower(stmt.ObjectName)
+				if origStmt, already := newCreates[k]; already {
+					return DuplicateDefinitionError{
+						ObjectKey: stmt.ObjectKey(),
+						FirstFile: origStmt.File,
+						FirstLine: origStmt.LineNo,
+						DupeFile:  stmt.File,
+						DupeLine:  stmt.LineNo,
+					}
+				}
+			}
+			newCreates[k] = stmt
+		}
+		logicalSchema.Creates = newCreates
+
+	case tengo.NameCaseInsensitive: // lower_case_table_names=2
+		// Only view names are forced to lowercase in this mode, but Community Edition
+		// codebase does not support views, so nothing to lowercase here.
+		// However, with this mode we still need to ensure there aren't any duplicate
+		// table names in CREATEs after accounting for case-insensitive table naming.
+		lowerTables := make(map[string]*Statement)
+
+		for k, stmt := range logicalSchema.Creates {
+			if k.Type == tengo.ObjectTypeTable {
+				lowerName := strings.ToLower(k.Name)
+				if origStmt, already := lowerTables[lowerName]; already {
+					return DuplicateDefinitionError{
+						ObjectKey: stmt.ObjectKey(),
+						FirstFile: origStmt.File,
+						FirstLine: origStmt.LineNo,
+						DupeFile:  stmt.File,
+						DupeLine:  stmt.LineNo,
+					}
+				}
+				lowerTables[lowerName] = stmt
+			}
+		}
+	}
+	return nil
+}
+
 // ParseDir parses the specified directory, including all *.sql files in it,
 // its .skeema config file, and all .skeema config files of its parent
 // directory hierarchy. Evaluation of parent dirs stops once we hit either a
@@ -440,7 +494,17 @@ func (dir *Dir) SchemaNames(instance *tengo.Instance) (names []string, err error
 	if err != nil {
 		return nil, err
 	}
-	return filterSchemaNames(names, ignoreSchema), nil
+	names = filterSchemaNames(names, ignoreSchema)
+
+	// If the instance has lower_case_table_names=1, force result to lowercase,
+	// to handle cases where a user has manually configured a mixed-case name
+	if instance.NameCaseMode() == tengo.NameCaseLower {
+		for n, name := range names {
+			names[n] = strings.ToLower(name)
+		}
+	}
+
+	return names, nil
 }
 
 func looksLikeRegex(input string) bool {

diff --git a/internal/tengo/docker.go b/internal/tengo/docker.go
@@ -67,6 +67,8 @@ type DockerizedInstanceOptions struct {
 	Image             string
 	RootPassword      string
 	DefaultConnParams string
+	DataBindMount     string // Host path to bind-mount as /var/lib/mysql in container
+	CommandArgs       []string
 }
 
 // CreateInstance attempts to create a Docker container with the supplied name
@@ -113,6 +115,7 @@ func (dc *DockerClient) CreateInstance(opts DockerizedInstanceOptions) (*Dockeri
 		Config: &docker.Config{
 			Image: opts.Image,
 			Env:   env,
+			Cmd:   opts.CommandArgs,
 		},
 		HostConfig: &docker.HostConfig{
 			PortBindings: map[docker.Port][]docker.PortBinding{
@@ -122,6 +125,9 @@ func (dc *DockerClient) CreateInstance(opts DockerizedInstanceOptions) (*Dockeri
 			},
 		},
 	}
+	if opts.DataBindMount != "" {
+		ccopts.HostConfig.Binds = []string{opts.DataBindMount + ":/var/lib/mysql"}
+	}
 	di := &DockerizedInstance{
 		DockerizedInstanceOptions: opts,
 		Manager:                   dc,
@@ -268,7 +274,11 @@ func (di *DockerizedInstance) TryConnect() (err error) {
 	if err != nil {
 		return err
 	}
-	for attempts := 0; attempts < 120; attempts++ {
+	maxAttempts := 120
+	if di.DataBindMount != "" { // bind mounted dir causes slower startup
+		maxAttempts *= 2
+	}
+	for attempts := 0; attempts < maxAttempts; attempts++ {
 		if ok, err = di.Instance.CanConnect(); ok {
 			return err
 		}