Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add supports MySQL data-type #51

Merged
merged 1 commit into from
Jul 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,25 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [1.6.0] - 2020-07-18
### Added
- Add property.
- `DdlParseColumn.is_unsigned`
- `DdlParseColumn.is_zerofill`
- Add supports for numeric type attributes of MySQL.
- `UNSIGNED`
- `ZEROFILL`
- Add supports MySQL data-type.
- `TINYINT`
- `SMALLINT`
- `MEDIUMINT`
- `DEC`
- `FIXED`

### Fixed
- Miner fix.


## [1.5.0] - 2020-07-06
### Added
- Add supports for Python 3.8
Expand Down Expand Up @@ -142,6 +161,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Initial released.


[1.6.0]: https://github.com/shinichi-takii/ddlparse/compare/v1.5.0...v1.6.0
[1.5.0]: https://github.com/shinichi-takii/ddlparse/compare/v1.4.0...v1.5.0
[1.4.0]: https://github.com/shinichi-takii/ddlparse/compare/v1.3.1...v1.4.0
[1.3.1]: https://github.com/shinichi-takii/ddlparse/compare/v1.3.0...v1.3.1
Expand Down
40 changes: 23 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,18 @@ $ pip install ddlparse --upgrade
### Example

```python
from ddlparse.ddlparse import DdlParse
import json

from ddlparse import DdlParse

sample_ddl = """
CREATE TABLE My_Schema.Sample_Table (
Id integer PRIMARY KEY COMMENT 'User ID',
Name varchar(100) NOT NULL COMMENT 'User name',
Total bigint NOT NULL,
Avg decimal(5,1) NOT NULL,
Point int(10) unsigned,
Zerofill_Id integer unsigned zerofill NOT NULL,
Created_At date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
UNIQUE (NAME)
);
Expand Down Expand Up @@ -111,22 +115,24 @@ print(table.to_bigquery_fields(DdlParse.NAME_CASE.upper))

print("* COLUMN *")
for col in table.columns.values():
col_info = []
col_info.append("name = {}".format(col.name))
col_info.append("data_type = {}".format(col.data_type))
col_info.append("length = {}".format(col.length))
col_info.append("precision(=length) = {}".format(col.precision))
col_info.append("scale = {}".format(col.scale))
col_info.append("constraint = {}".format(col.constraint))
col_info.append("not_null = {}".format(col.not_null))
col_info.append("PK = {}".format(col.primary_key))
col_info.append("unique = {}".format(col.unique))
col_info.append("bq_legacy_data_type = {}".format(col.bigquery_legacy_data_type))
col_info.append("bq_standard_data_type = {}".format(col.bigquery_standard_data_type))
col_info.append("comment = '{}'".format(col.comment))
col_info.append("description(=comment) = '{}'".format(col.description))
col_info.append("BQ {}".format(col.to_bigquery_field()))
print(" : ".join(col_info))
col_info = {}
col_info["name"] = col.name
col_info["data_type"] = col.data_type
col_info["length"] = col.length
col_info["precision(=length)"] = col.precision
col_info["scale"] = col.scale
col_info["is_unsigned"] = col.is_unsigned
col_info["is_zerofill"] = col.is_zerofill
col_info["constraint"] = col.constraint
col_info["not_null"] = col.not_null
col_info["PK"] = col.primary_key
col_info["unique"] = col.unique
col_info["bq_legacy_data_type"] = col.bigquery_legacy_data_type
col_info["bq_standard_data_type"] = col.bigquery_standard_data_type
col_info["comment"] = col.comment
col_info["description(=comment)"] = col.description
col_info["bigquery_field"] = json.loads(col.to_bigquery_field())
print(json.dumps(col_info, indent=2, ensure_ascii=False))

print("* DDL (CREATE TABLE) statements *")
print(table.to_bigquery_ddl())
Expand Down
2 changes: 1 addition & 1 deletion ddlparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .ddlparse import *

__copyright__ = 'Copyright (C) 2018-2020 Shinichi Takii'
__version__ = '1.5.0'
__version__ = '1.6.0'
__license__ = 'BSD-3-Clause'
__author__ = 'Shinichi Takii'
__author_email__ = 'shinichi.takii@gmail.com'
Expand Down
61 changes: 36 additions & 25 deletions ddlparse/ddlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ def __init__(self, name, data_type_array, array_brackets=None, constraint=None,
def data_type(self):
return self._data_type

@property
def is_unsigned(self):
return self._numeric_is_unsigned

@property
def is_zerofill(self):
return self._numeric_is_zerofill

@property
def length(self):
return self._length
Expand All @@ -105,20 +113,17 @@ def scale(self):
return self._scale

def _set_data_type(self, data_type_array):
self._data_type = data_type_array[0].upper()
self._data_type = ' '.join(data_type_array["type_name"]).upper()
self._numeric_is_unsigned = True if "unsigned" in data_type_array else False
self._numeric_is_zerofill = True if "zerofill" in data_type_array else False
self._length = None
self._scale = None

if len(data_type_array) < 2:
return

matches = re.findall(r"([\d\*]+)\s*,*\s*(\d*)", data_type_array[-1])
if len(matches) > 0:
self._length = matches[0][0] if matches[0][0] == "*" else int(matches[0][0])
self._scale = None if len(matches[0]) < 2 or matches[0][1] == "" or int(matches[0][1]) == 0 else int(matches[0][1])

if re.search(r"^[^\d\*]+", data_type_array[1]):
self._data_type += " {}".format(data_type_array[1])
if "length" in data_type_array:
matches = re.findall(r"([\d\*]+)\s*,*\s*(\d*)", data_type_array["length"])
if len(matches) > 0:
self._length = matches[0][0] if matches[0][0] == "*" else int(matches[0][0])
self._scale = None if len(matches[0]) < 2 or matches[0][1] == "" or int(matches[0][1]) == 0 else int(matches[0][1])


@property
Expand Down Expand Up @@ -224,7 +229,7 @@ def bigquery_data_type(self):
or (self._source_database is not None and source_db is None)):
return bq_type

if self._data_type in ["NUMERIC", "NUMBER", "DECIMAL"]:
if self._data_type in ["NUMERIC", "NUMBER", "DECIMAL", "DEC", "FIXED"]:
if self._length is None:
if self._source_database in [self.DATABASE.oracle, self.DATABASE.postgresql]:
return "NUMERIC"
Expand Down Expand Up @@ -489,6 +494,8 @@ class DdlParse(DdlParseBase):
_LPAR, _RPAR, _COMMA, _SEMICOLON, _DOT, _DOUBLEQUOTE, _BACKQUOTE, _SPACE = map(Suppress, "(),;.\"` ")
_CREATE, _TABLE, _TEMP, _CONSTRAINT, _NOT_NULL, _PRIMARY_KEY, _UNIQUE, _UNIQUE_KEY, _FOREIGN_KEY, _REFERENCES, _KEY, _CHAR_SEMANTICS, _BYTE_SEMANTICS = \
map(CaselessKeyword, "CREATE, TABLE, TEMP, CONSTRAINT, NOT NULL, PRIMARY KEY, UNIQUE, UNIQUE KEY, FOREIGN KEY, REFERENCES, KEY, CHAR, BYTE".replace(", ", ",").split(","))
_TYPE_UNSIGNED, _TYPE_ZEROFILL = \
map(CaselessKeyword, "UNSIGNED, ZEROFILL".replace(", ", ",").split(","))
_SUPPRESS_QUOTE = _BACKQUOTE | _DOUBLEQUOTE

_COMMENT = Suppress("--" + Regex(r".+"))
Expand Down Expand Up @@ -531,42 +538,46 @@ class DdlParse(DdlParseBase):


_CREATE_TABLE_STATEMENT = Suppress(_CREATE) + Optional(_TEMP)("temp") + Suppress(_TABLE) + Optional(Suppress(CaselessKeyword("IF NOT EXISTS"))) \
+ Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums+"_")("schema") + Optional(_SUPPRESS_QUOTE) + _DOT + Optional(_SUPPRESS_QUOTE)) + Word(alphanums+"_<>")("table") + Optional(_SUPPRESS_QUOTE) \
+ Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_")("schema") + Optional(_SUPPRESS_QUOTE) + _DOT + Optional(_SUPPRESS_QUOTE)) + Word(alphanums + "_<>")("table") + Optional(_SUPPRESS_QUOTE) \
+ _LPAR \
+ delimitedList(
OneOrMore(
_COMMENT
|
# Ignore Index
Suppress(_KEY + Word(alphanums+"_'`() "))
Suppress(_KEY + Word(alphanums + "_'`() "))
|
Group(
Optional(Suppress(_CONSTRAINT) + Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("name") + Optional(_SUPPRESS_QUOTE))
Optional(Suppress(_CONSTRAINT) + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE))
+ (
(
(_PRIMARY_KEY ^ _UNIQUE ^ _UNIQUE_KEY ^ _NOT_NULL)("type")
+ Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums+"_"))("name") + Optional(_SUPPRESS_QUOTE)
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
+ Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_"))("name") + Optional(_SUPPRESS_QUOTE)
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
)
|
(
(_FOREIGN_KEY)("type")
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR
+ Optional(Suppress(_REFERENCES)
+ Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("references_table") + Optional(_SUPPRESS_QUOTE)
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_") + Optional(_SUPPRESS_QUOTE)))("references_columns") + _RPAR
+ Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("references_table") + Optional(_SUPPRESS_QUOTE)
+ _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("references_columns") + _RPAR
)
)
)
)("constraint")
|
Group(
Optional(_SUPPRESS_QUOTE) + Word(alphanums+"_")("name") + Optional(_SUPPRESS_QUOTE)
Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE)
+ Group(
Word(alphanums+"_")
+ Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING"))
+ Optional(_LPAR + Regex(r"[\d\*]+\s*,*\s*\d*") + Optional(Suppress(_CHAR_SEMANTICS | _BYTE_SEMANTICS)) + _RPAR)
)("type")
Group(
Word(alphanums + "_")
+ Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING"))
)("type_name")
+ Optional(_LPAR + Regex(r"[\d\*]+\s*,*\s*\d*")("length") + Optional(_CHAR_SEMANTICS | _BYTE_SEMANTICS)("semantics") + _RPAR)
+ Optional(_TYPE_UNSIGNED)("unsigned")
+ Optional(_TYPE_ZEROFILL)("zerofill")
)("type")
+ Optional(Word(r"\[\]"))("array_brackets")
+ Optional(Regex(_COLUMN_CONSTRAINT, re.IGNORECASE))("constraint")
)("column")
Expand Down
38 changes: 22 additions & 16 deletions example/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# This module is part of python-ddlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause

import json

from ddlparse import DdlParse

sample_ddl = """
Expand All @@ -13,6 +15,8 @@
Name varchar(100) NOT NULL COMMENT 'User name',
Total bigint NOT NULL,
Avg decimal(5,1) NOT NULL,
Point int(10) unsigned,
Zerofill_Id integer unsigned zerofill NOT NULL,
Created_At date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
UNIQUE (NAME)
);
Expand Down Expand Up @@ -66,22 +70,24 @@

print("* COLUMN *")
for col in table.columns.values():
col_info = []
col_info.append("name = {}".format(col.name))
col_info.append("data_type = {}".format(col.data_type))
col_info.append("length = {}".format(col.length))
col_info.append("precision(=length) = {}".format(col.precision))
col_info.append("scale = {}".format(col.scale))
col_info.append("constraint = {}".format(col.constraint))
col_info.append("not_null = {}".format(col.not_null))
col_info.append("PK = {}".format(col.primary_key))
col_info.append("unique = {}".format(col.unique))
col_info.append("bq_legacy_data_type = {}".format(col.bigquery_legacy_data_type))
col_info.append("bq_standard_data_type = {}".format(col.bigquery_standard_data_type))
col_info.append("comment = '{}'".format(col.comment))
col_info.append("description(=comment) = '{}'".format(col.description))
col_info.append("BQ {}".format(col.to_bigquery_field()))
print(" : ".join(col_info))
col_info = {}
col_info["name"] = col.name
col_info["data_type"] = col.data_type
col_info["length"] = col.length
col_info["precision(=length)"] = col.precision
col_info["scale"] = col.scale
col_info["is_unsigned"] = col.is_unsigned
col_info["is_zerofill"] = col.is_zerofill
col_info["constraint"] = col.constraint
col_info["not_null"] = col.not_null
col_info["PK"] = col.primary_key
col_info["unique"] = col.unique
col_info["bq_legacy_data_type"] = col.bigquery_legacy_data_type
col_info["bq_standard_data_type"] = col.bigquery_standard_data_type
col_info["comment"] = col.comment
col_info["description(=comment)"] = col.description
col_info["bigquery_field"] = json.loads(col.to_bigquery_field())
print(json.dumps(col_info, indent=2, ensure_ascii=False))

print("* DDL (CREATE TABLE) statements *")
print(table.to_bigquery_ddl())
Expand Down
6 changes: 3 additions & 3 deletions test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pytest>=5.4.3
pytest-cov>=2.10.0
tox>=3.16.1
coveralls>=2.0.0
codecov>=2.1.7
tox>=3.17.1
coveralls>=2.1.1
codecov>=2.1.8
codeclimate-test-reporter>=0.2.3
Loading