Skip to content

Commit

Permalink
Support DB servers with lower_case_table_names=1 or 2 (Windows / MacOS)
Browse files Browse the repository at this point in the history
This commit adds initial support for database servers with non-zero values for
lower_case_table_names ("LCTN" below), typically meaning a database instance
running natively on Windows or MacOS. These OSes use case-insensitive file-
systems by default, which previously caused problems in Skeema if any table
names included uppercase characters in their .sql CREATE statement, or any
schema names included uppercase characters in .skeema files.

The following changes are included in this commit:

* In all commands, Skeema can now properly map *.sql CREATE statements to
  introspected database objects even if the server uses LCTN=1 or LCTN=2 and
  the objects have mixed-case names.

* When using workspace=docker, if the real database server uses LCTN=1, any
  newly-created Dockerized database instance will be configured to use LCTN=1
  as well. (No equivalent behavior is needed for LCTN=2, since that mode
  retains table name casing.)

* With LCTN=1 or LCTN=2, all operations will confirm that there are no case-
  insensitive duplicate table names among the tables in a single
  subdirectory's *.sql files. For example, if a directory contains both CREATE
  TABLE Foo and CREATE TABLE foo, this is illegal with LCTN=1 or LCTN=2 and
  an error is now thrown before the operation is even attempted. With LCTN=0,
  this situation is still permitted (these are distinct tables case-sensitive
  i.e. on a Linux database server) and no error is thrown.

* When using `skeema diff` or `skeema push` in a sharded environment (the
  .skeema file configures the subdir to map to multiple hosts in the same
  environment name), all shards must have the same LCTN value. If not, Skeema
  detects this situation and throws a fatal error.

* Integration test coverage for LCTN=1 is included. By default it is run in CI
  but not locally; to enable locally set env var SKEEMA_TEST_LCTN=true. This
  adds to test execution time since a separate Dockerized database instance
  must be created for this test.

* Integration test coverage for LCTN=2 is included. By default it is not run,
  as it requires a MacOS host to be possible to test. To enable locally, set
  env var SKEEMA_TEST_LCTN=true on a MacOS host. This adds to test execution
  time since a separate Dockerized database instance must be created for this
  test, and it must bind-mount a host path for the DB's data directory to use
  a case-insensitive data volume.

Fixes #65.

Several additional changes specific to views are omitted here, since Skeema
Community Edition does not support views. In the upcoming v1.8 release, the
Premium Edition CLI will include all changes from this commit, plus additional
views-specific fixes and integration testing. On MariaDB in particular, when
using LCTN=2 and a mixed-case schema name, SHOW CREATE VIEW has some odd bugs;
the Premium Edition CLI properly detects this and uses a workaround to ensure
correct behavior even in that situation.
  • Loading branch information
evanelias committed Apr 18, 2022
1 parent 2d3d7c4 commit bdc279e
Show file tree
Hide file tree
Showing 12 changed files with 572 additions and 69 deletions.
8 changes: 8 additions & 0 deletions cmd_pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,14 @@ func pullSchemaDir(dir *fs.Dir, instance *tengo.Instance) (schemaNames []string,
// more schemas. A slice of handled schema names is returned, along with any
// error encountered.
func pullLogicalSchema(dir *fs.Dir, instance *tengo.Instance, logicalSchema *fs.LogicalSchema) (schemaNames []string, err error) {
// With non-zero lower_case_table_names, force names to lowercase as needed in
// logicalSchema, so that statements can be correctly linked to objects
if lctn := instance.NameCaseMode(); lctn > tengo.NameCaseAsIs {
if err := logicalSchema.LowerCaseNames(lctn); err != nil {
return nil, err
}
}

if logicalSchema.Name != "" {
schemaNames = []string{logicalSchema.Name}
} else if schemaNames, err = dir.SchemaNames(instance); err != nil {
Expand Down
13 changes: 13 additions & 0 deletions internal/applier/target.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,19 @@ func targetsForLogicalSchema(logicalSchema *fs.LogicalSchema, dir *fs.Dir, insta
return nil, len(instances)
}

// Confirm all instances have the same lower_case_table_names; mixing isn't
// supported within a single operation
if len(instances) > 1 && instances[0].NameCaseMode() != tengo.NameCaseUnknown {
lctn := instances[0].NameCaseMode()
for _, other := range instances[1:] {
if compare := other.NameCaseMode(); compare != tengo.NameCaseUnknown && compare != lctn {
log.Errorf("Skipping %s: all database servers mapped by the same subdirectory and environment must have the same value for lower_case_table_names.", dir)
log.Errorf("Instance %s has lower_case_table_names=%d, but instance %s has lower_case_table_names=%d.", instances[0], lctn, other, compare)
return nil, len(instances)
}
}
}

// Obtain a *tengo.Schema representation of the dir's *.sql files from a
// workspace
opts, err := workspace.OptionsForDir(dir, instances[0])
Expand Down
140 changes: 88 additions & 52 deletions internal/dumper/dumper.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package dumper
import (
"errors"
"fmt"
"runtime"
"strings"

log "github.com/sirupsen/logrus"
Expand Down Expand Up @@ -65,26 +66,17 @@ func rewriteSQLFile(file *fs.TokenizedSQLFile) error {
// a result of the dump operation. The directory's parsed values are modified
// in-place by this function, but nothing is written to the filesystem yet.
func modifiedFiles(schema *tengo.Schema, dir *fs.Dir, opts Options) []*fs.TokenizedSQLFile {
fileMap := make(map[string]*fs.TokenizedSQLFile) // filePath string -> tokenized file
fileDirty := make(map[string]bool) // filePath string -> bool

// TODO: handle dirs that contain multiple logical schemas by name
var logicalSchema *fs.LogicalSchema
if len(dir.LogicalSchemas) > 0 {
logicalSchema = dir.LogicalSchemas[0]
} else {
logicalSchema = &fs.LogicalSchema{}
}
keySeen := make(map[tengo.ObjectKey]bool, len(logicalSchema.Creates))
for key, stmt := range logicalSchema.Creates {
keySeen[key] = true
if filePath := stmt.FromFile.String(); fileMap[filePath] == nil {
fileMap[filePath] = stmt.FromFile
}
}
fm := newFileMap(logicalSchema)

for key, object := range schema.Objects() {
delete(keySeen, key) // filter keySeen to just be things that *aren't* in the DB
dbObjects := schema.Objects()
for key, object := range dbObjects {
if opts.shouldIgnore(object) {
continue
}
Expand Down Expand Up @@ -126,59 +118,42 @@ func modifiedFiles(schema *tengo.Schema, dir *fs.Dir, opts Options) []*fs.Tokeni
continue
}

var filePath string
if stmt == nil {
filePath = fs.PathForObject(dir.Path, key.Name)
} else {
filePath = stmt.FromFile.Path()
}
fileDirty[filePath] = true
if fileMap[filePath] == nil {
fileMap[filePath] = &fs.TokenizedSQLFile{
SQLFile: fs.SQLFile{
Dir: dir.Path,
FileName: fs.FileNameForObject(key.Name),
},
}
}

if opts.CountOnly {
continue // Don't mutate stmt if CountOnly
}

// Append the statement if it doesn't exist in fs yet; otherwise update it.
// If we reach this point, we need to mark the statement's file as dirty, and
// update/append its in-memory representation unless CountOnly was requested.
// We "cheat" by potentially omitting some fs fields and potentially including
// DELIMITER wrappers in a single Statement.Text, but this still works fine
// for rewriting the file later.
if stmt == nil {
f := fileMap[filePath]
stmt = &fs.Statement{
Type: fs.StatementTypeCreate,
ObjectType: key.Type,
ObjectName: key.Name,
FromFile: f,
if stmt != nil { // statement already in fs
fm.markDirty(stmt.FromFile)
if !opts.CountOnly {
stmt.Text = canonicalCreate
}
} else { // statement not in fs, needs to be appended to file
f := fm.file(dir, object)
fm.markDirty(f)
if !opts.CountOnly {
f.Statements = append(f.Statements, &fs.Statement{
Type: fs.StatementTypeCreate,
ObjectType: key.Type,
ObjectName: key.Name,
FromFile: f,
Text: canonicalCreate,
})
}
f.Statements = append(f.Statements, stmt)
}
stmt.Text = canonicalCreate
}

// Remaining keys in keySeen do not exist in db, so remove them
for key := range keySeen {
if !opts.shouldIgnore(key) {
stmt := logicalSchema.Creates[key]
// Handle create statements that are in FS but do not exist in DB
for key, stmt := range logicalSchema.Creates {
if _, inDB := dbObjects[key]; !inDB && !opts.shouldIgnore(key) {
fm.markDirty(stmt.FromFile)
if !opts.CountOnly {
stmt.Remove()
}
fileDirty[stmt.FromFile.String()] = true
}
}

var result []*fs.TokenizedSQLFile
for filePath := range fileDirty {
result = append(result, fileMap[filePath])
}
return result
return fm.dirtyFiles()
}

// AddDelimiter takes the supplied string and appends a delimiter to the end.
Expand All @@ -205,3 +180,64 @@ func verifyCanParse(key tengo.ObjectKey, statementBody string) bool {
}
return ok
}

// uniquePath converts its arg to lower-case on MacOS or Windows, or returns it
// unchanged on any other OS. This is useful for normalizing map keys to ensure
// correct dumper behavior on systems that typically have case-insensitive
// filesystems.
func uniquePath(p string) string {
if runtime.GOOS == "darwin" || runtime.GOOS == "windows" {
return strings.ToLower(p)
}
return p
}

type fileMap struct {
all map[string]*fs.TokenizedSQLFile // all files, normalized filePath string -> tokenized sql file
dirty map[string]bool // dirty files, normalized filePath string -> bool
}

func newFileMap(logicalSchema *fs.LogicalSchema) *fileMap {
fm := &fileMap{
all: make(map[string]*fs.TokenizedSQLFile, len(logicalSchema.Creates)),
dirty: make(map[string]bool),
}
// track all unique files in the logical schema's CREATE statements
for _, stmt := range logicalSchema.Creates {
fm.all[uniquePath(stmt.FromFile.Path())] = stmt.FromFile
}
return fm
}

func (fm *fileMap) file(dir *fs.Dir, keyer tengo.ObjectKeyer) *fs.TokenizedSQLFile {
objName := keyer.ObjectKey().Name
filePath := fs.PathForObject(dir.Path, objName)

// If the file at this path is already tracked, return it
if f := fm.all[uniquePath(filePath)]; f != nil {
return f
}

// Otherwise, instantiate a new file, track it, and return it
f := &fs.TokenizedSQLFile{
SQLFile: fs.SQLFile{
Dir: dir.Path,
FileName: fs.FileNameForObject(objName),
},
}
fm.all[uniquePath(filePath)] = f
return f
}

func (fm *fileMap) markDirty(f *fs.TokenizedSQLFile) {
fm.dirty[uniquePath(f.Path())] = true
}

func (fm *fileMap) dirtyFiles() (result []*fs.TokenizedSQLFile) {
// contents of fm.dirty have already been run through uniquePath(); ditto for
// keys of fm.all; so no need to re-run uniquePath() here
for filePath := range fm.dirty {
result = append(result, fm.all[filePath])
}
return result
}
66 changes: 65 additions & 1 deletion internal/fs/dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,60 @@ func (logicalSchema *LogicalSchema) AddStatement(stmt *Statement) error {
}
}

// LowerCaseNames adjusts logicalSchema in-place such that its object names are
// forced to lower-case as appropriate for the supplied NameCaseMode.
// An error will be returned if case-insensitivity would result in duplicate
// objects with the same name and type.
func (logicalSchema *LogicalSchema) LowerCaseNames(mode tengo.NameCaseMode) error {
switch mode {
case tengo.NameCaseLower: // lower_case_table_names=1
// Schema names and table names are forced lowercase in this mode
logicalSchema.Name = strings.ToLower(logicalSchema.Name)
newCreates := make(map[tengo.ObjectKey]*Statement, len(logicalSchema.Creates))
for k, stmt := range logicalSchema.Creates {
if k.Type == tengo.ObjectTypeTable {
k.Name = strings.ToLower(k.Name)
stmt.ObjectName = strings.ToLower(stmt.ObjectName)
if origStmt, already := newCreates[k]; already {
return DuplicateDefinitionError{
ObjectKey: stmt.ObjectKey(),
FirstFile: origStmt.File,
FirstLine: origStmt.LineNo,
DupeFile: stmt.File,
DupeLine: stmt.LineNo,
}
}
}
newCreates[k] = stmt
}
logicalSchema.Creates = newCreates

case tengo.NameCaseInsensitive: // lower_case_table_names=2
// Only view names are forced to lowercase in this mode, but Community Edition
// codebase does not support views, so nothing to lowercase here.
// However, with this mode we still need to ensure there aren't any duplicate
// table names in CREATEs after accounting for case-insensitive table naming.
lowerTables := make(map[string]*Statement)

for k, stmt := range logicalSchema.Creates {
if k.Type == tengo.ObjectTypeTable {
lowerName := strings.ToLower(k.Name)
if origStmt, already := lowerTables[lowerName]; already {
return DuplicateDefinitionError{
ObjectKey: stmt.ObjectKey(),
FirstFile: origStmt.File,
FirstLine: origStmt.LineNo,
DupeFile: stmt.File,
DupeLine: stmt.LineNo,
}
}
lowerTables[lowerName] = stmt
}
}
}
return nil
}

// ParseDir parses the specified directory, including all *.sql files in it,
// its .skeema config file, and all .skeema config files of its parent
// directory hierarchy. Evaluation of parent dirs stops once we hit either a
Expand Down Expand Up @@ -440,7 +494,17 @@ func (dir *Dir) SchemaNames(instance *tengo.Instance) (names []string, err error
if err != nil {
return nil, err
}
return filterSchemaNames(names, ignoreSchema), nil
names = filterSchemaNames(names, ignoreSchema)

// If the instance has lower_case_table_names=1, force result to lowercase,
// to handle cases where a user has manually configured a mixed-case name
if instance.NameCaseMode() == tengo.NameCaseLower {
for n, name := range names {
names[n] = strings.ToLower(name)
}
}

return names, nil
}

func looksLikeRegex(input string) bool {
Expand Down
12 changes: 11 additions & 1 deletion internal/tengo/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ type DockerizedInstanceOptions struct {
Image string
RootPassword string
DefaultConnParams string
DataBindMount string // Host path to bind-mount as /var/lib/mysql in container
CommandArgs []string
}

// CreateInstance attempts to create a Docker container with the supplied name
Expand Down Expand Up @@ -113,6 +115,7 @@ func (dc *DockerClient) CreateInstance(opts DockerizedInstanceOptions) (*Dockeri
Config: &docker.Config{
Image: opts.Image,
Env: env,
Cmd: opts.CommandArgs,
},
HostConfig: &docker.HostConfig{
PortBindings: map[docker.Port][]docker.PortBinding{
Expand All @@ -122,6 +125,9 @@ func (dc *DockerClient) CreateInstance(opts DockerizedInstanceOptions) (*Dockeri
},
},
}
if opts.DataBindMount != "" {
ccopts.HostConfig.Binds = []string{opts.DataBindMount + ":/var/lib/mysql"}
}
di := &DockerizedInstance{
DockerizedInstanceOptions: opts,
Manager: dc,
Expand Down Expand Up @@ -268,7 +274,11 @@ func (di *DockerizedInstance) TryConnect() (err error) {
if err != nil {
return err
}
for attempts := 0; attempts < 120; attempts++ {
maxAttempts := 120
if di.DataBindMount != "" { // bind mounted dir causes slower startup
maxAttempts *= 2
}
for attempts := 0; attempts < maxAttempts; attempts++ {
if ok, err = di.Instance.CanConnect(); ok {
return err
}
Expand Down

0 comments on commit bdc279e

Please sign in to comment.