In [1]:
from json_schema_to_glue_columns import *

In [2]:
schema_file_location = "./sample1.schema.json"
config_file_location = "./sample1.config.yml"

In [3]:
schema = load_json_schema(schema_file_location)
schema

{'title': 'Root Schema',
 'type': 'object',
 'default': {},
 'required': ['id', 'isbn', 'author', 'editor', 'title', 'category', 'tags'],
 'additionalProperties': True,
 'properties': {'ID': {'title': 'The id Schema',
   'type': 'number',
   'default': 0,
   'examples': [1]},
  'isbn': {'title': 'The isbn Schema',
   'type': 'string',
   'default': '',
   'examples': ['123-456-222']},
  'author': {'title': 'The author Schema',
   'type': 'object',
   'default': {},
   'required': ['lastname', 'firstname'],
   'additionalProperties': True,
   'properties': {'last name': {'title': 'The lastname Schema',
     'type': 'string',
     'default': '',
     'examples': ['Doe']},
    'first-name': {'title': 'The firstname Schema',
     'type': 'string',
     'default': '',
     'examples': ['Jane']}},
   'examples': [{'lastname': 'Doe', 'firstname': 'Jane'}]},
  'editor': {'title': 'The editor Schema',
   'type': 'object',
   'default': {},
   'required': ['lastname', 'firstname'],
   'additiona

In [4]:
placeholders = {
    "[aws_env]": "dev",
    "[logical_env]": "abc"
}

config = load_yaml_config(config_file_location, placeholders)
config

{'data_format': 'json',
 'pipeline_type': 'SCD1',
 'storage_zones': {'raw': {'database': 'abc_source_raw',
   'table': 'sample',
   's3_location': 's3://org-dev-abc-raw/prefix'},
  'staging': {'database': 'abc_source_staging',
   'table': 'sample',
   's3_location': 's3://org-dev-abc-staging/prefix'}}}

In [5]:
partition_key_list = ["year:int", "month:int", "day:int", "hour:int", "minute:int"]
partition_keys = create_partition_keys(partition_key_list)
partition_keys

[{'Name': 'year', 'Type': 'int'},
 {'Name': 'month', 'Type': 'int'},
 {'Name': 'day', 'Type': 'int'},
 {'Name': 'hour', 'Type': 'int'},
 {'Name': 'minute', 'Type': 'int'}]

In [6]:
glue_columns_raw = convert_json_schema_to_glue_columns(schema)
glue_columns_raw

[{'Name': 'id', 'Type': 'DOUBLE'},
 {'Name': 'isbn', 'Type': 'STRING'},
 {'Name': 'author', 'Type': 'STRUCT<last_name:STRING,first_name:STRING>'},
 {'Name': 'editor', 'Type': 'STRUCT<last_name:STRING,first_name:STRING>'},
 {'Name': 'title', 'Type': 'STRING'},
 {'Name': 'category', 'Type': 'ARRAY<STRING>'},
 {'Name': 'tags', 'Type': 'ARRAY<STRUCT<key:STRING,value:STRING>>'}]

In [7]:
glue_columns_staging = convert_json_schema_to_glue_columns(schema, flatten=True, delimiter='__')
glue_columns_staging

[{'Name': 'id', 'Type': 'DOUBLE'},
 {'Name': 'isbn', 'Type': 'STRING'},
 {'Name': 'author__last_name', 'Type': 'STRING'},
 {'Name': 'author__first_name', 'Type': 'STRING'},
 {'Name': 'editor__last_name', 'Type': 'STRING'},
 {'Name': 'editor__first_name', 'Type': 'STRING'},
 {'Name': 'title', 'Type': 'STRING'},
 {'Name': 'category', 'Type': 'ARRAY<STRING>'},
 {'Name': 'tags', 'Type': 'ARRAY<STRUCT<key:STRING,value:STRING>>'}]