Skip to content

Commit

Permalink
Merge pull request #8 from shinichi-takii/feature/fix-issues
Browse files Browse the repository at this point in the history
add option / fix BigQuery convert
  • Loading branch information
Shinichi Takii committed Feb 12, 2018
2 parents b64e29d + b6b24da commit 4346ca5
Show file tree
Hide file tree
Showing 9 changed files with 267 additions and 78 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Expand Up @@ -56,3 +56,7 @@ ENV/

# mypy
.mypy_cache/

# Rope config file
# https://github.com/python-rope/rope
.ropeproject/
13 changes: 10 additions & 3 deletions CHANGELOG.md
@@ -1,13 +1,20 @@
# Changelog

## 1.1.0
- Add `source_database` option.
- Add `to_bigquery_fields` method to Columns dicttionary(`DdlParseColumnDict` class).
- Fix BigQuery convert of Oracle data type.
- Oracle 'DATE' -> BigQuery 'DATETIME'
- Oracle 'NUMBER' -> BigQuery 'INTEGER' or 'FLOAT'

## 1.0.2
- Miner enhancement
- Miner enhancement.
- `ddlparse.py` : Exclude unused module.
- `example.py` : Modified comment.
- `README.md` : Miner fix.

## 1.0.1
- Miner enhancement
- Miner enhancement.

## 1.0.0
- Initial released
- Initial released.
29 changes: 25 additions & 4 deletions README.md
Expand Up @@ -58,19 +58,40 @@ CREATE TABLE My_Schema.Sample_Table (
NAME varchar(100) NOT NULL,
TOTAL bigint NOT NULL,
AVG decimal(5,1) NOT NULL,
CREATED_AT timestamp,
CREATED_AT date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
UNIQUE (NAME)
);
"""

# parse pattern (1)

# parse pattern (1-1)
table = DdlParse().parse(sample_ddl)

# parse pattern (2)
# parse pattern (1-2) : Specify source database
table = DdlParse().parse(ddl=sample_ddl, source_database=DdlParse.DATABASE.oracle)


# parse pattern (2-1)
parser = DdlParse(sample_ddl)
table = parser.parse()

# parse pattern (2-2) : Specify source database
parser = DdlParse(ddl=sample_ddl, source_database=DdlParse.DATABASE.oracle)
table = parser.parse()


# parse pattern (3-1)
parser = DdlParse()
parser.ddl = sample_ddl
table = parser.parse()

# parse pattern (3-2) : Specify source database
parser = DdlParse()
parser.source_database = DdlParse.DATABASE.oracle
parser.ddl = sample_ddl
table = parser.parse()


print("* TABLE *")
print("schema = {} : name = {} : is_temp = {}".format(table.schema, table.name, table.is_temp))

Expand Down Expand Up @@ -102,7 +123,7 @@ print(table.columns["total"])

## License

[BSD 3-Clause License](LICENSE.md)
[BSD 3-Clause License](https://github.com/shinichi-takii/ddlparse/LICENSE.md)

## Author

Expand Down
30 changes: 26 additions & 4 deletions README.rst
Expand Up @@ -68,19 +68,40 @@ Example
NAME varchar(100) NOT NULL,
TOTAL bigint NOT NULL,
AVG decimal(5,1) NOT NULL,
CREATED_AT timestamp,
CREATED_AT date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
UNIQUE (NAME)
);
"""
# parse pattern (1)
# parse pattern (1-1)
table = DdlParse().parse(sample_ddl)
# parse pattern (2)
# parse pattern (1-2) : Specify source database
table = DdlParse().parse(ddl=sample_ddl, source_database=DdlParse.DATABASE.oracle)
# parse pattern (2-1)
parser = DdlParse(sample_ddl)
table = parser.parse()
# parse pattern (2-2) : Specify source database
parser = DdlParse(ddl=sample_ddl, source_database=DdlParse.DATABASE.oracle)
table = parser.parse()
# parse pattern (3-1)
parser = DdlParse()
parser.ddl = sample_ddl
table = parser.parse()
# parse pattern (3-2) : Specify source database
parser = DdlParse()
parser.source_database = DdlParse.DATABASE.oracle
parser.ddl = sample_ddl
table = parser.parse()
print("* TABLE *")
print("schema = {} : name = {} : is_temp = {}".format(table.schema, table.name, table.is_temp))
Expand Down Expand Up @@ -112,7 +133,8 @@ Example
License
-------

`BSD 3-Clause License <LICENSE.md>`__
`BSD 3-Clause
License <https://github.com/shinichi-takii/ddlparse/LICENSE.md>`__

Author
------
Expand Down
2 changes: 1 addition & 1 deletion ddlparse/__init__.py
Expand Up @@ -8,7 +8,7 @@
from .ddlparse import *

__copyright__ = 'Copyright (C) 2018 Shinichi Takii'
__version__ = '1.0.2'
__version__ = '1.1.0'
__license__ = 'BSD-3-Clause'
__author__ = 'Shinichi Takii'
__author_email__ = 'shinichi.takii@gmail.com'
Expand Down
149 changes: 105 additions & 44 deletions ddlparse/ddlparse.py
Expand Up @@ -18,15 +18,28 @@
class DdlParseBase():

NAME_CASE = IntEnum("NAME_CASE", "original lower upper")
DATABASE = IntEnum("DATABASE", "mysql, postgresql, oracle, redshift")

def __init__(self):
pass
def __init__(self, source_database=None):
self._source_database = source_database

@property
def source_database(self):
"""
Source database option
:param source_database: enum DdlParse.DATABASE
"""
return self._source_database

@source_database.setter
def source_database(self, source_database):
self._source_database = source_database


class DdlParseTableColumnBase(DdlParseBase):

def __init__(self):
super().__init__()
def __init__(self, source_database=None):
super().__init__(source_database)
self._name = ""

@property
Expand Down Expand Up @@ -60,12 +73,13 @@ def _get_name(self, name_case=DdlParseBase.NAME_CASE.original):
class DdlParseColumn(DdlParseTableColumnBase):
"""Column define info"""

def __init__(self, name, data_type_array, constraint=None):
def __init__(self, name, data_type_array, constraint=None, source_database=None):
"""
:param data_type_array[]: Column data type [data type name, length, precision]
:param data_type_array[]: Column data type ['data type name'] or ['data type name', '(length)'] or ['data type name', '(precision, scale)']
:param constraint: Column constraint string
:param source_database: enum DdlParse.DATABASE
"""
super().__init__()
super().__init__(source_database)
self._name = name
self._set_data_type(data_type_array)
self.constraint = constraint
Expand Down Expand Up @@ -149,29 +163,36 @@ def unique(self, flag):
def bigquery_data_type(self):
"""Get BigQuery data type"""

BQ_DATA_TYPE_DIC = {
# BigQuery data type : [condition, ...]
"STRING" : [re.compile(r"(CHAR|TEXT)")],
"INTEGER" : [re.compile(r"INT|SERIAL|YEAR")],
"FLOAT" : [re.compile(r"(FLOAT|DOUBLE)"), "REAL", "MONEY"],
"DATE" : ["DATE"],
"TIME" : ["TIME"],
"DATETIME" : ["DATETIME", "TIMESTAMP", "TIMESTAMP WITHOUT TIME ZONE"],
"TIMESTAMP" : ["TIMESTAMPTZ", "TIMESTAMP WITH TIME ZONE"],
"BOOLEAN" : [re.compile(r"BOOL")],
}

this_bq_type = None
# BigQuery data type = {source_database: [data type, ...], ...}
BQ_DATA_TYPE_DIC = OrderedDict()
BQ_DATA_TYPE_DIC["STRING"] = {None: [re.compile(r"(CHAR|TEXT)")]}
BQ_DATA_TYPE_DIC["INTEGER"] = {None: [re.compile(r"INT|SERIAL|YEAR")]}
BQ_DATA_TYPE_DIC["FLOAT"] = {None: [re.compile(r"(FLOAT|DOUBLE)"), "REAL", "MONEY"]}
BQ_DATA_TYPE_DIC["DATETIME"] = {
None: ["DATETIME", "TIMESTAMP", "TIMESTAMP WITHOUT TIME ZONE"],
self.DATABASE.oracle: ["DATE"]
}
BQ_DATA_TYPE_DIC["TIMESTAMP"] = {None: ["TIMESTAMPTZ", "TIMESTAMP WITH TIME ZONE"]}
BQ_DATA_TYPE_DIC["DATE"] = {None: ["DATE"]}
BQ_DATA_TYPE_DIC["TIME"] = {None: ["TIME"]}
BQ_DATA_TYPE_DIC["BOOLEAN"] = {None: [re.compile(r"BOOL")]}

for bq_type, conditions in BQ_DATA_TYPE_DIC.items():
for condition in conditions:
if isinstance(condition, str):
if self._data_type == condition:
for source_db, source_datatypes in conditions.items():
for source_datatype in source_datatypes:

if isinstance(source_datatype, str):
if self._data_type == source_datatype \
and ( self._source_database == source_db
or (self._source_database is not None and source_db is None)):
return bq_type

elif re.search(source_datatype, self._data_type) \
and ( self._source_database == source_db
or (self._source_database is not None and source_db is None)):
return bq_type
elif re.search(condition, self._data_type):
return bq_type

if this_bq_type is None and self._data_type in ["NUMERIC", "DECIMAL"]:
if self._data_type in ["NUMERIC", "NUMBER", "DECIMAL"]:
return "INTEGER" if self._scale is None else "FLOAT"

raise ValueError("Unknown data type : '{}'".format(self._data_type))
Expand All @@ -188,7 +209,7 @@ def to_bigquery_field(self, name_case=DdlParseBase.NAME_CASE.original):
return '{{"name": "{}", "type": "{}", "mode": "{}"}}'.format(self._get_name(name_case), self.bigquery_data_type, self.bigquery_mode)


class DdlParseColumnDict(OrderedDict):
class DdlParseColumnDict(OrderedDict, DdlParseBase):
"""
Columns dictionary collection
Expand All @@ -197,28 +218,55 @@ class DdlParseColumnDict(OrderedDict):
(SQL is case insensitive)
"""

def __init__(self):
def __init__(self, source_database=None):
super().__init__()
self.source_database = source_database

def __getitem__(self, key):
return super().__getitem__(key.lower())

def __setitem__(self, key, value):
super().__setitem__(key.lower(), value)

def append(self, column_name, data_type_array=None, constraint=None):
column = DdlParseColumn(column_name, data_type_array, constraint)
def append(self, column_name, data_type_array=None, constraint=None, source_database=None):
if source_database is None:
source_database = self.source_database

column = DdlParseColumn(column_name, data_type_array, constraint, source_database)
self.__setitem__(column_name, column)
return column

def to_bigquery_fields(self, name_case=DdlParseBase.NAME_CASE.original):
"""Generate BigQuery JSON fields define"""

bq_fields = []

for col in self.values():
bq_fields.append(col.to_bigquery_field(name_case))

return "[{}]".format(",".join(bq_fields))


class DdlParseTable(DdlParseTableColumnBase):
"""Table define info"""

def __init__(self):
super().__init__()
def __init__(self, source_database=None):
super().__init__(source_database)
self._schema = None
self._columns = DdlParseColumnDict()
self._columns = DdlParseColumnDict(source_database)

@property
def source_database(self):
"""
Source database option
:param source_database: enum DdlParse.DATABASE
"""
return super().source_database

@source_database.setter
def source_database(self, source_database):
super(self.__class__, self.__class__).source_database.__set__(self, source_database)
self._columns.source_database = source_database

@property
def is_temp(self):
Expand Down Expand Up @@ -246,12 +294,7 @@ def columns(self):
def to_bigquery_fields(self, name_case=DdlParseBase.NAME_CASE.original):
"""Generate BigQuery JSON fields define"""

bq_fields = []

for col in self._columns.values():
bq_fields.append(col.to_bigquery_field(name_case))

return "[{}]".format(",".join(bq_fields))
return self._columns.to_bigquery_fields(name_case)


class DdlParse(DdlParseBase):
Expand Down Expand Up @@ -295,10 +338,23 @@ class DdlParse(DdlParseBase):
_DDL_PARSE_EXPR << OneOrMore(_COMMENT | _CREATE_TABLE_STATEMENT)


def __init__(self, ddl=None):
super().__init__()
def __init__(self, ddl=None, source_database=None):
super().__init__(source_database)
self._ddl = ddl
self._table = DdlParseTable()
self._table = DdlParseTable(source_database)

@property
def source_database(self):
"""
Source database option
:param source_database: enum DdlParse.DATABASE
"""
return super().source_database

@source_database.setter
def source_database(self, source_database):
super(self.__class__, self.__class__).source_database.__set__(self, source_database)
self._table.source_database = source_database

@property
def ddl(self):
Expand All @@ -309,7 +365,7 @@ def ddl(self):
def ddl(self, ddl):
self._ddl = ddl

def parse(self, ddl=None):
def parse(self, ddl=None, source_database=None):
"""
Parse DDL script.
Expand All @@ -320,6 +376,9 @@ def parse(self, ddl=None):
if ddl is not None:
self._ddl = ddl

if source_database is not None:
self.source_database = source_database

if self._ddl is None:
raise ValueError("DDL is not specified")

Expand All @@ -336,7 +395,9 @@ def parse(self, ddl=None):

if ret_col.getName() == "column":
# add column
col = self._table.columns.append(ret_col["name"], ret_col["type"])
col = self._table.columns.append(
column_name=ret_col["name"],
data_type_array=ret_col["type"])

if "constraint" in ret_col:
col.constraint = ret_col["constraint"]
Expand Down

0 comments on commit 4346ca5

Please sign in to comment.