diff --git a/README.md b/README.md index 99fa5fe..b9609f8 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,11 @@ installation instructions for [Mac](python-mac) or from a public data set - as an example. These commands will install `tap-fixerio` and `target-csv` with pip, -and then run `tap-fixerio`, piping its output to `target-csv`: +and then run them together, piping the output of `tap-fixerio` to +`target-csv`: ```bash -› pip install target-csv -› pip install tap-fixerio +› pip install target-csv tap-fixerio › tap-fixerio | target-csv INFO Replicating the latest exchange rate data from fixer.io INFO Tap exiting normally diff --git a/target_csv.py b/target_csv.py index f29130f..d579824 100755 --- a/target_csv.py +++ b/target_csv.py @@ -25,6 +25,7 @@ def persist_lines(delimiter, quotechar, lines): state = None schemas = {} key_properties = {} + headers = {} for line in lines: try: o = json.loads(line) @@ -45,16 +46,28 @@ def persist_lines(delimiter, quotechar, lines): schema = schemas[o['stream']] validate(o['record'], schema) - top_level_fields = schema['properties'].keys() filename = o['stream'] + '.csv' - with open(filename, 'a', newline='') as csvfile: + file_is_empty = (not os.path.isfile(filename)) or os.stat(filename).st_size == 0 + + if o['stream'] not in headers and not file_is_empty: + with open(filename, 'r') as csvfile: + reader = csv.reader(csvfile, + delimiter=delimiter, + quotechar=quotechar) + first_line = next(reader) + headers[o['stream']] = first_line if first_line else o['record'].keys() + else: + headers[o['stream']] = o['record'].keys() + + with open(filename, 'a') as csvfile: writer = csv.DictWriter(csvfile, - o['record'].keys(), + headers[o['stream']], delimiter=delimiter, quotechar=quotechar) - if os.stat(filename).st_size == 0: + if file_is_empty: writer.writeheader() + writer.writerow(o['record']) state = None