Skip to content

Commit

Permalink
Merge pull request #51 from shinichi-takii/feature/add-mysql-datatype
Browse files Browse the repository at this point in the history
Add supports MySQL data-type
  • Loading branch information
shinichi-takii committed Jul 18, 2020
2 parents a581a27 + e6662bc commit 4e7e09a
Show file tree
Hide file tree
Showing 7 changed files with 567 additions and 252 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG.md
Expand Up @@ -5,6 +5,25 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [1.6.0] - 2020-07-18
### Added
- Add property.
- `DdlParseColumn.is_unsigned`
- `DdlParseColumn.is_zerofill`
- Add supports for numeric type attributes of MySQL.
- `UNSIGNED`
- `ZEROFILL`
- Add supports MySQL data-type.
- `TINYINT`
- `SMALLINT`
- `MEDIUMINT`
- `DEC`
- `FIXED`

### Fixed
- Miner fix.


## [1.5.0] - 2020-07-06
### Added
- Add supports for Python 3.8
Expand Down Expand Up @@ -142,6 +161,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Initial released.


[1.6.0]: https://github.com/shinichi-takii/ddlparse/compare/v1.5.0...v1.6.0
[1.5.0]: https://github.com/shinichi-takii/ddlparse/compare/v1.4.0...v1.5.0
[1.4.0]: https://github.com/shinichi-takii/ddlparse/compare/v1.3.1...v1.4.0
[1.3.1]: https://github.com/shinichi-takii/ddlparse/compare/v1.3.0...v1.3.1
Expand Down
40 changes: 23 additions & 17 deletions README.md
Expand Up @@ -50,14 +50,18 @@ $ pip install ddlparse --upgrade
### Example

```python
from ddlparse.ddlparse import DdlParse
import json

from ddlparse import DdlParse

sample_ddl = """
CREATE TABLE My_Schema.Sample_Table (
Id integer PRIMARY KEY COMMENT 'User ID',
Name varchar(100) NOT NULL COMMENT 'User name',
Total bigint NOT NULL,
Avg decimal(5,1) NOT NULL,
Point int(10) unsigned,
Zerofill_Id integer unsigned zerofill NOT NULL,
Created_At date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
UNIQUE (NAME)
);
Expand Down Expand Up @@ -111,22 +115,24 @@ print(table.to_bigquery_fields(DdlParse.NAME_CASE.upper))

print("* COLUMN *")
for col in table.columns.values():
col_info = []
col_info.append("name = {}".format(col.name))
col_info.append("data_type = {}".format(col.data_type))
col_info.append("length = {}".format(col.length))
col_info.append("precision(=length) = {}".format(col.precision))
col_info.append("scale = {}".format(col.scale))
col_info.append("constraint = {}".format(col.constraint))
col_info.append("not_null = {}".format(col.not_null))
col_info.append("PK = {}".format(col.primary_key))
col_info.append("unique = {}".format(col.unique))
col_info.append("bq_legacy_data_type = {}".format(col.bigquery_legacy_data_type))
col_info.append("bq_standard_data_type = {}".format(col.bigquery_standard_data_type))
col_info.append("comment = '{}'".format(col.comment))
col_info.append("description(=comment) = '{}'".format(col.description))
col_info.append("BQ {}".format(col.to_bigquery_field()))
print(" : ".join(col_info))
col_info = {}
col_info["name"] = col.name
col_info["data_type"] = col.data_type
col_info["length"] = col.length
col_info["precision(=length)"] = col.precision
col_info["scale"] = col.scale
col_info["is_unsigned"] = col.is_unsigned
col_info["is_zerofill"] = col.is_zerofill
col_info["constraint"] = col.constraint
col_info["not_null"] = col.not_null
col_info["PK"] = col.primary_key
col_info["unique"] = col.unique
col_info["bq_legacy_data_type"] = col.bigquery_legacy_data_type
col_info["bq_standard_data_type"] = col.bigquery_standard_data_type
col_info["comment"] = col.comment
col_info["description(=comment)"] = col.description
col_info["bigquery_field"] = json.loads(col.to_bigquery_field())
print(json.dumps(col_info, indent=2, ensure_ascii=False))

print("* DDL (CREATE TABLE) statements *")
print(table.to_bigquery_ddl())
Expand Down
2 changes: 1 addition & 1 deletion ddlparse/__init__.py
Expand Up @@ -8,7 +8,7 @@
from .ddlparse import *

__copyright__ = 'Copyright (C) 2018-2020 Shinichi Takii'
__version__ = '1.5.0'
__version__ = '1.6.0'
__license__ = 'BSD-3-Clause'
__author__ = 'Shinichi Takii'
__author_email__ = 'shinichi.takii@gmail.com'
Expand Down
61 changes: 36 additions & 25 deletions ddlparse/ddlparse.py
Expand Up @@ -92,6 +92,14 @@ def __init__(self, name, data_type_array, array_brackets=None, constraint=None,
def data_type(self):
return self._data_type

@property
def is_unsigned(self):
return self._numeric_is_unsigned

@property
def is_zerofill(self):
return self._numeric_is_zerofill

@property
def length(self):
return self._length
Expand All @@ -105,20 +113,17 @@ def scale(self):
return self._scale

def _set_data_type(self, data_type_array):
self._data_type = data_type_array[0].upper()
self._data_type = ' '.join(data_type_array["type_name"]).upper()
self._numeric_is_unsigned = True if "unsigned" in data_type_array else False
self._numeric_is_zerofill = True if "zerofill" in data_type_array else False
self._length = None
self._scale = None

if len(data_type_array) < 2:
return

matches = re.findall(r"([\d\*]+)\s*,*\s*(\d*)", data_type_array[-1])
if len(matches) > 0:
self._length = matches[0][0] if matches[0][0] == "*" else int(matches[0][0])
self._scale = None if len(matches[0]) < 2 or matches[0][1] == "" or int(matches[0][1]) == 0 else int(matches[0][1])

if re.search(r"^[^\d\*]+", data_type_array[1]):
self._data_type += " {}".format(data_type_array[1])
if "length" in data_type_array:
matches = re.findall(r"([\d\*]+)\s*,*\s*(\d*)", data_type_array["length"])
if len(matches) > 0:
self._length = matches[0][0] if matches[0][0] == "*" else int(matches[0][0])
self._scale = None if len(matches[0]) < 2 or matches[0][1] == "" or int(matches[0][1]) == 0 else int(matches[0][1])


@property
Expand Down Expand Up @@ -224,7 +229,7 @@ def bigquery_data_type(self):
or (self._source_database is not None and source_db is None)):
return bq_type

if self._data_type in ["NUMERIC", "NUMBER", "DECIMAL"]:
if self._data_type in ["NUMERIC", "NUMBER", "DECIMAL", "DEC", "FIXED"]:
if self._length is None:
if self._source_database in [self.DATABASE.oracle, self.DATABASE.postgresql]:
return "NUMERIC"
Expand Down Expand Up @@ -489,6 +494,8 @@ class DdlParse(DdlParseBase):
_LPAR, _RPAR, _COMMA, _SEMICOLON, _DOT, _DOUBLEQUOTE, _BACKQUOTE, _SPACE = map(Suppress, "(),;.\"` ")
_CREATE, _TABLE, _TEMP, _CONSTRAINT, _NOT_NULL, _PRIMARY_KEY, _UNIQUE, _UNIQUE_KEY, _FOREIGN_KEY, _REFERENCES, _KEY, _CHAR_SEMANTICS, _BYTE_SEMANTICS = \
map(CaselessKeyword, "CREATE, TABLE, TEMP, CONSTRAINT, NOT NULL, PRIMARY KEY, UNIQUE, UNIQUE KEY, FOREIGN KEY, REFERENCES, KEY, CHAR, BYTE".replace(", ", ",").split(","))
_TYPE_UNSIGNED, _TYPE_ZEROFILL = \
map(CaselessKeyword, "UNSIGNED, ZEROFILL".replace(", ", ",").split(","))
_SUPPRESS_QUOTE = _BACKQUOTE | _DOUBLEQUOTE

_COMMENT = Suppress("--" + Regex(r".+"))
Expand Down Expand Up @@ -531,42 +538,46 @@ class DdlParse(DdlParseBase):


_CREATE_TABLE_STATEMENT = Suppress(_CREATE) + Optional(_TEMP)("temp") + Suppress(_TABLE) + Optional(Suppress(CaselessKeyword("IF NOT EXISTS"))) \
+ Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums+"_")("schema") + Optional(_SUPPRESS_QUOTE) + _DOT + Optional(_SUPPRESS_QUOTE)) + Word(alphanums+"_<>")("table") + Optional(_SUPPRESS_QUOTE) \
+ Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_")("schema") + Optional(_SUPPRESS_QUOTE) + _DOT + Optional(_SUPPRESS_QUOTE)) + Word(alphanums + "_<>")("table") + Optional(_SUPPRESS_QUOTE) \
+ _LPAR \
+ delimitedList(
OneOrMore(
_COMMENT
|
# Ignore Index
Suppress(_KEY + Word(alphanums+"_'`() "))
Suppress(_KEY + Word(alphanums + "_'`() "))
|
Group(
Optional(Suppress(_CONSTRAINT) + Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("name") + Optional(_SUPPRESS_QUOTE))
Optional(Suppress(_CONSTRAINT) + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE))
+ (
(
(_PRIMARY_KEY ^ _UNIQUE ^ _UNIQUE_KEY ^ _NOT_NULL)("type")
+ Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums+"_"))("name") + Optional(_SUPPRESS_QUOTE)
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
+ Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_"))("name") + Optional(_SUPPRESS_QUOTE)
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
)
|
(
(_FOREIGN_KEY)("type")
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
+ Optional(Suppress(_REFERENCES)
+ Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("references_table") + Optional(_SUPPRESS_QUOTE)
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_") + Optional(_SUPPRESS_QUOTE)))("references_columns") + _RPAR
+ Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("references_table") + Optional(_SUPPRESS_QUOTE)
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("references_columns") + _RPAR
)
)
)
)("constraint")
|
Group(
Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("name") + Optional(_SUPPRESS_QUOTE)
Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE)
+ Group(
Word(alphanums+"_")
+ Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING"))
+ Optional(_LPAR + Regex(r"[\d\*]+\s*,*\s*\d*") + Optional(Suppress(_CHAR_SEMANTICS | _BYTE_SEMANTICS)) + _RPAR)
)("type")
Group(
Word(alphanums + "_")
+ Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING"))
)("type_name")
+ Optional(_LPAR + Regex(r"[\d\*]+\s*,*\s*\d*")("length") + Optional(_CHAR_SEMANTICS | _BYTE_SEMANTICS)("semantics") + _RPAR)
+ Optional(_TYPE_UNSIGNED)("unsigned")
+ Optional(_TYPE_ZEROFILL)("zerofill")
)("type")
+ Optional(Word(r"\[\]"))("array_brackets")
+ Optional(Regex(_COLUMN_CONSTRAINT, re.IGNORECASE))("constraint")
)("column")
Expand Down
38 changes: 22 additions & 16 deletions example/example.py
Expand Up @@ -5,6 +5,8 @@
# This module is part of python-ddlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause

import json

from ddlparse import DdlParse

sample_ddl = """
Expand All @@ -13,6 +15,8 @@
Name varchar(100) NOT NULL COMMENT 'User name',
Total bigint NOT NULL,
Avg decimal(5,1) NOT NULL,
Point int(10) unsigned,
Zerofill_Id integer unsigned zerofill NOT NULL,
Created_At date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
UNIQUE (NAME)
);
Expand Down Expand Up @@ -66,22 +70,24 @@

print("* COLUMN *")
for col in table.columns.values():
col_info = []
col_info.append("name = {}".format(col.name))
col_info.append("data_type = {}".format(col.data_type))
col_info.append("length = {}".format(col.length))
col_info.append("precision(=length) = {}".format(col.precision))
col_info.append("scale = {}".format(col.scale))
col_info.append("constraint = {}".format(col.constraint))
col_info.append("not_null = {}".format(col.not_null))
col_info.append("PK = {}".format(col.primary_key))
col_info.append("unique = {}".format(col.unique))
col_info.append("bq_legacy_data_type = {}".format(col.bigquery_legacy_data_type))
col_info.append("bq_standard_data_type = {}".format(col.bigquery_standard_data_type))
col_info.append("comment = '{}'".format(col.comment))
col_info.append("description(=comment) = '{}'".format(col.description))
col_info.append("BQ {}".format(col.to_bigquery_field()))
print(" : ".join(col_info))
col_info = {}
col_info["name"] = col.name
col_info["data_type"] = col.data_type
col_info["length"] = col.length
col_info["precision(=length)"] = col.precision
col_info["scale"] = col.scale
col_info["is_unsigned"] = col.is_unsigned
col_info["is_zerofill"] = col.is_zerofill
col_info["constraint"] = col.constraint
col_info["not_null"] = col.not_null
col_info["PK"] = col.primary_key
col_info["unique"] = col.unique
col_info["bq_legacy_data_type"] = col.bigquery_legacy_data_type
col_info["bq_standard_data_type"] = col.bigquery_standard_data_type
col_info["comment"] = col.comment
col_info["description(=comment)"] = col.description
col_info["bigquery_field"] = json.loads(col.to_bigquery_field())
print(json.dumps(col_info, indent=2, ensure_ascii=False))

print("* DDL (CREATE TABLE) statements *")
print(table.to_bigquery_ddl())
Expand Down
6 changes: 3 additions & 3 deletions test-requirements.txt
@@ -1,6 +1,6 @@
pytest>=5.4.3
pytest-cov>=2.10.0
tox>=3.16.1
coveralls>=2.0.0
codecov>=2.1.7
tox>=3.17.1
coveralls>=2.1.1
codecov>=2.1.8
codeclimate-test-reporter>=0.2.3

0 comments on commit 4e7e09a

Please sign in to comment.