In [1]:
import dynamic_singer as dsinger
from dynamic_singer.extra import postgres
from google.cloud import secretmanager
import json
import psycopg2

In [2]:
# create your own connection
def get_connection():
    
    connection = psycopg2.connect(
        user = postgres_user,
        password = postgres_password,
        host = 'localhost',
        port = 5432,
        database = postgres_database,
    )
    return connection

connection = get_connection()



## Before use postgres.Tap

We need to create persistent class to save and pull last primary key state.

It must an object with method `pull` and `push`.

In this example, we simply write to a local file, but in deployment, it is better we store in GCS / S3 / persistent storage.

In [3]:
class Persistent:
    def __init__(self, schema, table):
        self.filename = f'{schema}-{table}'
    
    def pull(self):
        with open(self.filename) as fopen:
            return fopen.read()
        
    def push(self, data):
        with open(self.filename, 'w') as fopen:
            fopen.write(data)

In [4]:
# use your own schema and table from postgres
# primary key is much better use column datetime, like created at, or updated at

schema = ''
table = ''
primary_key = ''

In [5]:
persistent = Persistent(schema, table)
tap = postgres.Tap(schema, table, primary_key, connection, persistent)

## generate bigquery schema

If you are not confident with auto-generate schema from dynamic-singer, we can use `bigquery_schema` to generate schema based on table definition.

In [6]:
table_schema = postgres.bigquery_schema(schema, table, connection)

In [7]:
config = {
    'project_id': '',
    'dataset_id': 'dynamicsinger',
    'validate_records': False,
    # 'stream_data': False
}

with open('bigquery-config.json', 'w') as fopen:
    json.dump(config, fopen)

In [8]:
source = dsinger.Source(tap, tap_name = 'example', tap_key = primary_key, tap_schema = table_schema)

In [9]:
source.add('target-bigquery --config bigquery-config.json')

In [None]:
source.start()

INFO select * from "schema".table where updated_at > '2017-07-25 07:32:40.043588' order by updated_at limit 100
