Skip to content

Commit

Permalink
Prevent use of CREATE TABLE...LIKE and CREATE TABLE...SELECT
Browse files Browse the repository at this point in the history
These forms of CREATE TABLE do not currently work properly in Skeema, so this
commit now treats them as unparsable statements:

* Both forms introduce potential ordering constraints, since these statements
  refer to other tables, which may or may not be in the same schema. Skeema
  does not currently track dependencies between tables (even in the case of
  foreign keys, since FOREIGN_KEY_CHECKS=0 avoids the need).

* CREATE TABLE...SELECT inherently mixes DDL with DML, and Skeema does not
  generally support DML yet. Presence of rows in a workspace table prevent
  the workspace from being cleaned up, causing a breakage that must be
  cleaned manually.
  • Loading branch information
evanelias committed May 13, 2019
1 parent 1bbc114 commit 007fd7b
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 9 deletions.
35 changes: 31 additions & 4 deletions fs/statement.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,17 @@ func (stmt *Statement) Remove() {
func CanParse(input string) bool {
sqlStmt := &sqlStatement{}
err := nameParser.ParseString(input, sqlStmt)
return err == nil
return err == nil && !sqlStmt.forbidden()
}

//////////// lexing/parsing internals from here to end of this file ////////////

// TODO: The current state of lexing and parsing in this file is a mess. First
// there's a manually-coded lexer to split files into statements, and then
// there's a separate regexp-based lexer for splitting statements into tokens,
// followed by a parser for identifying the statement type and any identifier
// names. These should all be unified, which would improve performance and
// reduce the amount of code.
type statementTokenizer struct {
filePath string
delimiter string // statement delimiter, typically ";" or sometimes "//" for routines
Expand Down Expand Up @@ -344,7 +352,7 @@ func (ls *lineState) parseStatement() {
ls.stmt.Type = StatementTypeNoop
} else {
sqlStmt := &sqlStatement{}
if err := nameParser.ParseString(txt, sqlStmt); err != nil {
if err := nameParser.ParseString(txt, sqlStmt); err != nil || sqlStmt.forbidden() {
return
} else if sqlStmt.UseCommand != nil {
ls.stmt.Type = StatementTypeCommand
Expand Down Expand Up @@ -423,6 +431,23 @@ type sqlStatement struct {
DelimiterCommand *delimiterCommand `parser:"| @@"`
}

// forbidden returns true if the statement can be parsed, but is of a disallowed
// form by this package.
func (sqlStmt *sqlStatement) forbidden() bool {
// Forbid CREATE TABLE...LIKE and CREATE TABLE...SELECT. Both are potentially
// ordering-dependent; and the latter mixes DML, which violates "workspace
// tables should be empty" validations.
if sqlStmt.CreateTable != nil {
for _, token := range sqlStmt.CreateTable.Body.Contents {
token = strings.ToUpper(token)
if token == "LIKE" || token == "SELECT" {
return true
}
}
}
return false
}

// objectName represents the name of an object, which may or may not be
// backtick-wrapped, and may or may not have multiple qualifier parts (each
// also potentially backtick-wrapped).
Expand All @@ -441,9 +466,11 @@ func (n *objectName) schemaAndTable() (string, string) {
return "", stripBackticks(n.Name)
}

// body slurps all body contents of a statement.
// body slurps all body contents of a statement. Note that "body" and
// "statement" here are used with respect to the parser internals, and do NOT
// refer to Statement or Statement.Body().
type body struct {
Contents string `parser:"(Word | String | Number | Operator)*"`
Contents []string `parser:"(@Word | @String | @Number | @Operator)*"`
}

// definer represents a user who is the definer of a routine or view.
Expand Down
16 changes: 11 additions & 5 deletions fs/statement_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,17 @@ func TestStripAnyQuote(t *testing.T) {

func TestCanParse(t *testing.T) {
cases := map[string]bool{
"CREATE TABLE foo (\n\tid int\n) ;\n": true,
"USE some_db\n\n": true,
"INSERT INTO foo VALUES (';')": false,
"bork bork bork": false,
"# hello": false,
"CREATE TABLE foo (\n\t`id` int unsigned DEFAULT '0'\n) ;\n": true,
"CREATE TABLE IF not EXISTS foo (\n\tid int\n) ;\n": true,
"USE some_db\n\n": true,
"INSERT INTO foo VALUES (';')": false,
"bork bork bork": false,
"# hello": false,
"CREATE TEMPORARY TABLE foo (\n\tid int\n) ;\n": false,
"CREATE TABLE foo LIKE bar": false,
"CREATE TABLE foo (like bar)": false,
"CREATE TABLE foo2 select * from foo": false,
"CREATE TABLE foo2 (id int) AS select * from foo": false,
}
for input, expected := range cases {
if actual := CanParse(input); actual != expected {
Expand Down

0 comments on commit 007fd7b

Please sign in to comment.