Skip to content

Commit

Permalink
Merge pull request #1386: Determine JSON size without loading data as…
Browse files Browse the repository at this point in the history
… a string into memory
  • Loading branch information
victorlin committed Jan 12, 2024
2 parents 63de6c1 + b60919b commit 89c6b51
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 17 deletions.
4 changes: 2 additions & 2 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,10 +1207,10 @@ def run(args):
# "auspice/zika_root-sequence.json".
output_path = Path(args.output)
root_sequence_path = output_path.parent / Path(output_path.stem + "_root-sequence" + output_path.suffix)
write_json(data=node_data['reference'], file_name=root_sequence_path, include_version=False, **indent)
write_json(data=node_data['reference'], file=root_sequence_path, include_version=False, **indent)
else:
fatal("Root sequence output was requested, but the node data provided is missing a 'reference' key.")
write_json(data=orderKeys(data_json), file_name=args.output, include_version=False, **indent)
write_json(data=orderKeys(data_json), file=args.output, include_version=False, **indent)

# validate outputs
validate_data_json(args.output, args.validation_mode)
Expand Down
46 changes: 31 additions & 15 deletions augur/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os, json, sys
import pandas as pd
from collections import defaultdict, OrderedDict
from io import RawIOBase
from textwrap import dedent
from .__version__ import __version__

Expand Down Expand Up @@ -91,17 +92,17 @@ def read_node_data(fnames, tree=None, validation_mode=ValidationMode.ERROR):
return NodeDataReader(fnames, tree, validation_mode).read()


def write_json(data, file_name, indent=(None if os.environ.get("AUGUR_MINIFY_JSON") else 2), include_version=True):
def write_json(data, file, indent=(None if os.environ.get("AUGUR_MINIFY_JSON") else 2), include_version=True):
"""
Write ``data`` as JSON to the given ``file_name``, creating parent directories
Write ``data`` as JSON to the given ``file``, creating parent directories
if necessary. The augur version is included as a top-level key "augur_version".
Parameters
----------
data : dict
data to write out to JSON
file_name : str
file name to write to
file
file path or handle to write to
indent : int or None, optional
JSON indentation level. Default is `None` if the environment variable `AUGUR_MINIFY_JSON`
is truthy, else 1
Expand All @@ -112,25 +113,40 @@ def write_json(data, file_name, indent=(None if os.environ.get("AUGUR_MINIFY_JSO
------
OSError
"""
#in case parent folder does not exist yet
parent_directory = os.path.dirname(file_name)
if parent_directory and not os.path.exists(parent_directory):
try:
os.makedirs(parent_directory)
except OSError: #Guard against race condition
if not os.path.isdir(parent_directory):
raise
if isinstance(file, (str, os.PathLike)):
#in case parent folder does not exist yet
parent_directory = os.path.dirname(file)
if parent_directory and not os.path.exists(parent_directory):
try:
os.makedirs(parent_directory)
except OSError: #Guard against race condition
if not os.path.isdir(parent_directory):
raise

if include_version:
data["generated_by"] = {"program": "augur", "version": get_augur_version()}
with open(file_name, 'w', encoding='utf-8') as handle:
with open_file(file, 'w', encoding='utf-8') as handle:
sort_keys = False if isinstance(data, OrderedDict) else True
json.dump(data, handle, indent=indent, sort_keys=sort_keys, cls=AugurJSONEncoder)


def json_size(data, indent=2):
class BytesWrittenCounterIO(RawIOBase):
"""Binary stream to count the number of bytes sent via write()."""
def __init__(self):
self.written = 0
"""Number of bytes written."""

def write(self, b):
n = len(b)
self.written += n
return n


def json_size(data):
"""Return size in bytes of a Python object in JSON string form."""
return len(json.dumps(data, indent=indent, cls=AugurJSONEncoder).encode("utf-8"))
with BytesWrittenCounterIO() as counter:
write_json(data, counter, include_version=False)
return counter.written


class AugurJSONEncoder(json.JSONEncoder):
Expand Down

0 comments on commit 89c6b51

Please sign in to comment.