Skip to content

Commit

Permalink
Merge pull request #23 from shinichi-takii/feature/add-generate-bq-ddl
Browse files Browse the repository at this point in the history
add bq-ddl generate func
  • Loading branch information
Shinichi Takii committed Jan 2, 2019
2 parents c98a271 + e58acae commit c88cc8f
Show file tree
Hide file tree
Showing 7 changed files with 422 additions and 80 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,14 @@
# Changelog

## 1.2.0
- Add `DdlParseTable.to_bigquery_ddl` function.
- BigQuery DDL (CREATE TABLE) statement generate function.
- Add `DdlParseColumn.bigquery_legacy_data_type` property.
- Get BigQuery Legacy SQL data property.
- Alias of `DdlParseColumn.bigquery_data_type` property.
- Add `DdlParseColumn.bigquery_standard_data_type` property.
- Get BigQuery Standard SQL data property.

## 1.1.3
- Add support inline comment.
- Add support constraint name with quotes.
Expand Down
50 changes: 30 additions & 20 deletions README.md
Expand Up @@ -8,16 +8,16 @@
[![Requirements Status](https://requires.io/github/shinichi-takii/ddlparse/requirements.svg?branch=master)](https://requires.io/github/shinichi-takii/ddlparse/requirements/?branch=master)
[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://github.com/shinichi-takii/ddlparse/blob/master/LICENSE.md)

*DDL parase and Convert to BigQuery JSON schema module, available in Python.*
*DDL parase and Convert to BigQuery JSON schema and DDL statements module, available in Python.*

----

## Features

- DDL parse and get table schema information.
- Currently, only the `CREATE TABLE` statement is supported.
- Convert to [BigQuery JSON schema](https://cloud.google.com/bigquery/docs/schemas#creating_a_json_schema_file) and [BigQuery DDL statements](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language).
- Supported databases are MySQL, PostgreSQL, Oracle, Redshift.
- Convert to [BigQuery JSON schema](https://cloud.google.com/bigquery/docs/schemas#creating_a_json_schema_file).

## Requirement

Expand Down Expand Up @@ -50,15 +50,15 @@ $ pip install ddlparse --upgrade
### Example

```python
from ddlparse import DdlParse
from ddlparse.ddlparse import DdlParse

sample_ddl = """
CREATE TABLE My_Schema.Sample_Table (
ID integer PRIMARY KEY,
NAME varchar(100) NOT NULL,
TOTAL bigint NOT NULL,
AVG decimal(5,1) NOT NULL,
CREATED_AT date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
Id integer PRIMARY KEY,
Name varchar(100) NOT NULL,
Total bigint NOT NULL,
Avg decimal(5,1) NOT NULL,
Created_At date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
UNIQUE (NAME)
);
"""
Expand Down Expand Up @@ -111,18 +111,28 @@ print(table.to_bigquery_fields(DdlParse.NAME_CASE.upper))

print("* COLUMN *")
for col in table.columns.values():
print("name = {} : data_type = {} : length = {} : precision(=length) = {} : scale = {} : constraint = {} : not_null = {} : PK = {} : unique = {} : BQ {}".format(
col.name,
col.data_type,
col.length,
col.precision,
col.scale,
col.constraint,
col.not_null,
col.primary_key,
col.unique,
col.to_bigquery_field()
))
col_info = []
col_info.append("name = {}".format(col.name))
col_info.append("data_type = {}".format(col.data_type))
col_info.append("length = {}".format(col.length))
col_info.append("precision(=length) = {}".format(col.precision))
col_info.append("scale = {}".format(col.scale))
col_info.append("constraint = {}".format(col.constraint))
col_info.append("not_null = {}".format(col.not_null))
col_info.append("PK = {}".format(col.primary_key))
col_info.append("unique = {}".format(col.unique))
col_info.append("bq_data_type = {}".format(col.bigquery_data_type))
col_info.append("bq_legacy_data_type = {}".format(col.bigquery_legacy_data_type))
col_info.append("bq_standard_data_type = {}".format(col.bigquery_standard_data_type))
col_info.append("BQ {}".format(col.to_bigquery_field()))
print(" : ".join(col_info))

print("* DDL (CREATE TABLE) statements *")
print(table.to_bigquery_ddl())

print("* DDL (CREATE TABLE) statements - dataset name, table name and column name to lower case / upper case *")
print(table.to_bigquery_ddl(DdlParse.NAME_CASE.lower))
print(table.to_bigquery_ddl(DdlParse.NAME_CASE.upper))

print("* Get Column object (case insensitive) *")
print(table.columns["total"])
Expand Down
94 changes: 62 additions & 32 deletions README.rst
@@ -1,18 +1,36 @@
DDL Parse
=========

`PyPI version <https://pypi.python.org/pypi/ddlparse>`__ `Python
version <https://pypi.python.org/pypi/ddlparse>`__ `Travis CI Build
Status <https://travis-ci.org/shinichi-takii/ddlparse>`__ `Coveralls
Coverage
Status <https://coveralls.io/github/shinichi-takii/ddlparse?branch=master>`__
`codecov Coverage
Status <https://codecov.io/gh/shinichi-takii/ddlparse>`__ `Requirements
Status <https://requires.io/github/shinichi-takii/ddlparse/requirements/?branch=master>`__
`License <https://github.com/shinichi-takii/ddlparse/blob/master/LICENSE.md>`__

*DDL parase and Convert to BigQuery JSON schema module, available in
Python.*
.. image:: https://img.shields.io/pypi/v/ddlparse.svg
:target: https://pypi.python.org/pypi/ddlparse
:alt: PyPI version

.. image:: https://img.shields.io/pypi/pyversions/ddlparse.svg
:target: https://pypi.python.org/pypi/ddlparse
:alt: Python version

.. image:: https://travis-ci.org/shinichi-takii/ddlparse.svg?branch=master
:target: https://travis-ci.org/shinichi-takii/ddlparse
:alt: Travis CI Build Status

.. image:: https://coveralls.io/repos/github/shinichi-takii/ddlparse/badge.svg?branch=master
:target: https://coveralls.io/github/shinichi-takii/ddlparse?branch=master
:alt: Coveralls Coverage Status

.. image:: https://codecov.io/gh/shinichi-takii/ddlparse/branch/master/graph/badge.svg
:target: https://codecov.io/gh/shinichi-takii/ddlparse
:alt: codecov Coverage Status

.. image:: https://requires.io/github/shinichi-takii/ddlparse/requirements.svg?branch=master
:target: https://requires.io/github/shinichi-takii/ddlparse/requirements/?branch=master
:alt: Requirements Status

.. image:: https://img.shields.io/badge/License-BSD%203--Clause-blue.svg
:target: https://github.com/shinichi-takii/ddlparse/blob/master/LICENSE.md
:alt: License

*DDL parase and Convert to BigQuery JSON schema and DDL statements
module, available in Python.*

--------------

Expand All @@ -21,9 +39,11 @@ Features

- DDL parse and get table schema information.
- Currently, only the ``CREATE TABLE`` statement is supported.
- Supported databases are MySQL, PostgreSQL, Oracle, Redshift.
- Convert to `BigQuery JSON
schema <https://cloud.google.com/bigquery/docs/schemas#creating_a_json_schema_file>`__.
schema <https://cloud.google.com/bigquery/docs/schemas#creating_a_json_schema_file>`__
and `BigQuery DDL
statements <https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language>`__.
- Supported databases are MySQL, PostgreSQL, Oracle, Redshift.

Requirement
-----------
Expand Down Expand Up @@ -66,15 +86,15 @@ Example

.. code:: python
from ddlparse import DdlParse
from ddlparse.ddlparse import DdlParse
sample_ddl = """
CREATE TABLE My_Schema.Sample_Table (
ID integer PRIMARY KEY,
NAME varchar(100) NOT NULL,
TOTAL bigint NOT NULL,
AVG decimal(5,1) NOT NULL,
CREATED_AT date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
Id integer PRIMARY KEY,
Name varchar(100) NOT NULL,
Total bigint NOT NULL,
Avg decimal(5,1) NOT NULL,
Created_At date, -- Oracle 'DATE' -> BigQuery 'DATETIME'
UNIQUE (NAME)
);
"""
Expand Down Expand Up @@ -127,18 +147,28 @@ Example
print("* COLUMN *")
for col in table.columns.values():
print("name = {} : data_type = {} : length = {} : precision(=length) = {} : scale = {} : constraint = {} : not_null = {} : PK = {} : unique = {} : BQ {}".format(
col.name,
col.data_type,
col.length,
col.precision,
col.scale,
col.constraint,
col.not_null,
col.primary_key,
col.unique,
col.to_bigquery_field()
))
col_info = []
col_info.append("name = {}".format(col.name))
col_info.append("data_type = {}".format(col.data_type))
col_info.append("length = {}".format(col.length))
col_info.append("precision(=length) = {}".format(col.precision))
col_info.append("scale = {}".format(col.scale))
col_info.append("constraint = {}".format(col.constraint))
col_info.append("not_null = {}".format(col.not_null))
col_info.append("PK = {}".format(col.primary_key))
col_info.append("unique = {}".format(col.unique))
col_info.append("bq_data_type = {}".format(col.bigquery_data_type))
col_info.append("bq_legacy_data_type = {}".format(col.bigquery_legacy_data_type))
col_info.append("bq_standard_data_type = {}".format(col.bigquery_standard_data_type))
col_info.append("BQ {}".format(col.to_bigquery_field()))
print(" : ".join(col_info))
print("* DDL (CREATE TABLE) statements *")
print(table.to_bigquery_ddl())
print("* DDL (CREATE TABLE) statements - dataset name, table name and column name to lower case / upper case *")
print(table.to_bigquery_ddl(DdlParse.NAME_CASE.lower))
print(table.to_bigquery_ddl(DdlParse.NAME_CASE.upper))
print("* Get Column object (case insensitive) *")
print(table.columns["total"])
Expand Down
4 changes: 2 additions & 2 deletions ddlparse/__init__.py
Expand Up @@ -7,8 +7,8 @@

from .ddlparse import *

__copyright__ = 'Copyright (C) 2018 Shinichi Takii'
__version__ = '1.1.3'
__copyright__ = 'Copyright (C) 2018-2019 Shinichi Takii'
__version__ = '1.2.0'
__license__ = 'BSD-3-Clause'
__author__ = 'Shinichi Takii'
__author_email__ = 'shinichi.takii@gmail.com'
Expand Down
93 changes: 85 additions & 8 deletions ddlparse/ddlparse.py
Expand Up @@ -27,6 +27,7 @@ def __init__(self, source_database=None):
def source_database(self):
"""
Source database option
:param source_database: enum DdlParse.DATABASE
"""
return self._source_database
Expand All @@ -51,14 +52,14 @@ def name(self):
def name(self, name):
self._name = name

def _get_name(self, name_case=DdlParseBase.NAME_CASE.original):
def get_name(self, name_case=DdlParseBase.NAME_CASE.original):
"""
Get Name converted case
:param name_case: name case type
* NAME_CASE.original : Return to no convert
* NAME_CASE.lower : Return to lower
* NAME_CASE.upper : Return to upper
* DdlParse.NAME_CASE.original : Return to no convert
* DdlParse.NAME_CASE.lower : Return to lower
* DdlParse.NAME_CASE.upper : Return to upper
:return: name
"""
Expand Down Expand Up @@ -161,7 +162,7 @@ def unique(self, flag):

@property
def bigquery_data_type(self):
"""Get BigQuery data type"""
"""Get BigQuery Legacy SQL data type"""

# BigQuery data type = {source_database: [data type, ...], ...}
BQ_DATA_TYPE_DIC = OrderedDict()
Expand Down Expand Up @@ -205,6 +206,27 @@ def bigquery_data_type(self):

raise ValueError("Unknown data type : '{}'".format(self._data_type))

@property
def bigquery_legacy_data_type(self):
"""Get BigQuery Legacy SQL data type"""

return self.bigquery_data_type

@property
def bigquery_standard_data_type(self):
"""Get BigQuery Standard SQL data type"""

legacy_data_type = self.bigquery_data_type

if legacy_data_type == "INTEGER":
return "INT64"
elif legacy_data_type == "FLOAT":
return "FLOAT64"
elif legacy_data_type == "BOOLEAN":
return "BOOL"

return legacy_data_type

@property
def bigquery_mode(self):
"""Get BigQuery constraint"""
Expand All @@ -214,7 +236,7 @@ def bigquery_mode(self):
def to_bigquery_field(self, name_case=DdlParseBase.NAME_CASE.original):
"""Generate BigQuery JSON field define"""

return '{{"name": "{}", "type": "{}", "mode": "{}"}}'.format(self._get_name(name_case), self.bigquery_data_type, self.bigquery_mode)
return '{{"name": "{}", "type": "{}", "mode": "{}"}}'.format(self.get_name(name_case), self.bigquery_data_type, self.bigquery_mode)


class DdlParseColumnDict(OrderedDict, DdlParseBase):
Expand Down Expand Up @@ -245,7 +267,16 @@ def append(self, column_name, data_type_array=None, constraint=None, source_data
return column

def to_bigquery_fields(self, name_case=DdlParseBase.NAME_CASE.original):
"""Generate BigQuery JSON fields define"""
"""
Generate BigQuery JSON fields define
:param name_case: name case type
* DdlParse.NAME_CASE.original : Return to no convert
* DdlParse.NAME_CASE.lower : Return to lower
* DdlParse.NAME_CASE.upper : Return to upper
:return: BigQuery JSON fields define
"""

bq_fields = []

Expand All @@ -267,6 +298,7 @@ def __init__(self, source_database=None):
def source_database(self):
"""
Source database option
:param source_database: enum DdlParse.DATABASE
"""
return super().source_database
Expand Down Expand Up @@ -300,10 +332,54 @@ def columns(self):
return self._columns

def to_bigquery_fields(self, name_case=DdlParseBase.NAME_CASE.original):
"""Generate BigQuery JSON fields define"""
"""
Generate BigQuery JSON fields define
:param name_case: name case type
* DdlParse.NAME_CASE.original : Return to no convert
* DdlParse.NAME_CASE.lower : Return to lower
* DdlParse.NAME_CASE.upper : Return to upper
:return: BigQuery JSON fields define
"""

return self._columns.to_bigquery_fields(name_case)

def to_bigquery_ddl(self, name_case=DdlParseBase.NAME_CASE.original):
"""
Generate BigQuery CREATE TABLE statements
:param name_case: name case type
* DdlParse.NAME_CASE.original : Return to no convert
* DdlParse.NAME_CASE.lower : Return to lower
* DdlParse.NAME_CASE.upper : Return to upper
:return: BigQuery CREATE TABLE statements
"""

if self.schema is None:
dataset = "dataset"
elif name_case == self.NAME_CASE.lower:
dataset = self.schema.lower()
elif name_case == self.NAME_CASE.upper:
dataset = self.schema.upper()
else:
dataset = self.schema

cols_def = []
for col in self.columns.values():
cols_def.append("{name} {type}{not_null}".format(
name=col.get_name(name_case),
type=col.bigquery_standard_data_type,
not_null=" NOT NULL" if col.not_null else "",
))

return "#standardSQL\nCREATE TABLE `project.{dataset}.{table}`\n(\n {colmns_define}\n)".format(
dataset=dataset,
table=self.get_name(name_case),
colmns_define=",\n ".join(cols_def),
)


class DdlParse(DdlParseBase):
"""DDL parser"""
Expand Down Expand Up @@ -356,6 +432,7 @@ def __init__(self, ddl=None, source_database=None):
def source_database(self):
"""
Source database option
:param source_database: enum DdlParse.DATABASE
"""
return super().source_database
Expand Down

0 comments on commit c88cc8f

Please sign in to comment.