Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pull in set of reserved words from Redshift docs #117

Merged
merged 5 commits into from
May 4, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Expand Up @@ -13,6 +13,8 @@
(`Issue #110 <https://github.com/sqlalchemy-redshift/sqlalchemy-redshift/issues/110>`_)
- Allow tests to tolerate new default column encodings in Redshift
(`Issue #114 <https://github.com/sqlalchemy-redshift/sqlalchemy-redshift/pull/114>`_)
- Pull in set of reserved words from Redshift docs
(`Issue #94 <https://github.com/sqlalchemy-redshift/sqlalchemy-redshift/issues/94>` _)


0.5.0 (2016-04-21)
Expand Down
39 changes: 38 additions & 1 deletion sqlalchemy_redshift/dialect.py
Expand Up @@ -4,7 +4,9 @@
import pkg_resources
import sqlalchemy as sa
from sqlalchemy import Column, exc, inspect
from sqlalchemy.dialects.postgresql.base import PGCompiler, PGDDLCompiler
from sqlalchemy.dialects.postgresql.base import (
PGCompiler, PGDDLCompiler, PGIdentifierPreparer
)
from sqlalchemy.dialects.postgresql.psycopg2 import PGDialect_psycopg2
from sqlalchemy.engine import reflection
from sqlalchemy.ext.compiler import compiles
Expand Down Expand Up @@ -96,6 +98,36 @@ class RedshiftImpl(postgresql.PostgresqlImpl):
\s* \) \s* # Arbitrary whitespace and literal ')'
""", re.VERBOSE)

# Reserved words as extracted from Redshift docs.
# Command used to extract:
# curl -q "http://docs.aws.amazon.com/redshift/latest/dg/r_pg_keywords.html" | tr '\n' '\r' | sed 's/.*\(AES128.*WITHOUT\).*/\1/' | tr '\r' '\n' | sed 's/^\([A-Z0-9_]*\).*/"\1",/' | paste -s -d' ' - | fold -s -w 70 | awk '{print " "$0}' | tr 'A-Z' 'a-z' # noqa
Copy link
Member

@graingert graingert May 4, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd probably just create a reserved words file and read it with

RESERVED_WORDS = set(pkg_resources.resource_string(__name__, 'reserved-words.txt').split('\n'))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also port your shell script to python and move it somewhere outside of sqlalchemy_redshift?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The postgres base dialect has the list hardcoded, so I like keeping with what that's doing. This seems like the simplest approach to make it obvious what's going on, rather than adding indirection by separating it to a different file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I guess it depends how often it changes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

put the script in './script.sh" and refer to it in the comment, rather than the whole thing with #noqa

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, and of course the URL is https://docs.aws.amazon.com/redshift/latest/dg/r_pg_keywords.html

RESERVED_WORDS = set([
"aes128", "aes256", "all", "allowoverwrite", "analyse", "analyze",
"and", "any", "array", "as", "asc", "authorization", "backup",
"between", "binary", "blanksasnull", "both", "bytedict", "bzip2",
"case", "cast", "check", "collate", "column", "constraint", "create",
"credentials", "cross", "current_date", "current_time",
"current_timestamp", "current_user", "current_user_id", "default",
"deferrable", "deflate", "defrag", "delta", "delta32k", "desc",
"disable", "distinct", "do", "else", "emptyasnull", "enable",
"encode", "encrypt", "encryption", "end", "except", "explicit",
"false", "for", "foreign", "freeze", "from", "full", "globaldict256",
"globaldict64k", "grant", "group", "gzip", "having", "identity",
"ignore", "ilike", "in", "initially", "inner", "intersect", "into",
"is", "isnull", "join", "leading", "left", "like", "limit",
"localtime", "localtimestamp", "lun", "luns", "lzo", "lzop", "minus",
"mostly13", "mostly32", "mostly8", "natural", "new", "not",
"notnull", "null", "nulls", "off", "offline", "offset", "oid", "old",
"on", "only", "open", "or", "order", "outer", "overlaps", "parallel",
"partition", "percent", "permissions", "placing", "primary", "raw",
"readratio", "recover", "references", "respect", "rejectlog",
"resort", "restore", "right", "select", "session_user", "similar",
"snapshot", "some", "sysdate", "system", "table", "tag", "tdes",
"text255", "text32k", "then", "timestamp", "to", "top", "trailing",
"true", "truncatecolumns", "union", "unique", "user", "using",
"verbose", "wallet", "when", "where", "with", "without",
])


class RelationKey(namedtuple('RelationKey', ('name', 'schema'))):
"""
Expand Down Expand Up @@ -312,6 +344,10 @@ def _fetch_redshift_column_attributes(self, column):
return text


class RedshiftIdentifierPreparer(PGIdentifierPreparer):
reserved_words = RESERVED_WORDS


class RedshiftDialect(PGDialect_psycopg2):
"""
Define Redshift-specific behavior.
Expand All @@ -326,6 +362,7 @@ class RedshiftDialect(PGDialect_psycopg2):

statement_compiler = RedshiftCompiler
ddl_compiler = RedshiftDDLCompiler
preparer = RedshiftIdentifierPreparer
construct_arguments = [
(sa.schema.Index, {
"using": False,
Expand Down
10 changes: 10 additions & 0 deletions tests/rs_sqla_test_utils/models.py
Expand Up @@ -159,6 +159,16 @@ class ReflectionDelimitedIdentifiers2(Base):
)


class ReflectionCustomReservedWords(Base):
__tablename__ = 'aes256'
col1 = sa.Column('open', sa.Integer())
col2 = sa.Column('tag', sa.Integer())
pkey = sa.Column('pkey', sa.Integer(), primary_key=True)
__table_args__ = (
{'redshift_diststyle': 'EVEN'}
)


class ReflectionDelimitedTableName(Base):
__tablename__ = 'this.table'
col1 = sa.Column('id', sa.Integer(), primary_key=True)
Expand Down
8 changes: 8 additions & 0 deletions tests/test_reflection.py
Expand Up @@ -111,6 +111,14 @@ def table_to_ddl(table):
PRIMARY KEY ("excellent! & column"),
) DISTSTYLE EVEN
''')),
(models.ReflectionCustomReservedWords, '''
CREATE TABLE "aes256" (
"open" INTEGER,
"tag" INTEGER,
pkey INTEGER NOT NULL,
PRIMARY KEY (pkey)
) DISTSTYLE EVEN
'''),
(models.Referencing, '''
CREATE TABLE other_schema.referencing (
referenced_table_id INTEGER NOT NULL,
Expand Down