Skip to content

Commit

Permalink
Merge pull request #624 from rkingsbury/jsonstore
Browse files Browse the repository at this point in the history
JSONStore: write file on init, add descriptive KeyError, add tests
  • Loading branch information
munrojm committed Apr 12, 2022
2 parents 02480e2 + e1271e0 commit 7d545d0
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 6 deletions.
30 changes: 24 additions & 6 deletions src/maggma/stores/mongolike.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
various utilities
"""

import json
from pathlib import Path

from datetime import datetime
import yaml
from itertools import chain, groupby
from socket import socket
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union

import mongomock
import orjson
from monty.dev import requires
from monty.io import zopen
from monty.json import MSONable, jsanitize
Expand Down Expand Up @@ -697,7 +697,14 @@ def __init__(self, paths: Union[str, List[str]], file_writable=False, **kwargs):
)
self.file_writable = file_writable
self.kwargs = kwargs
super().__init__(collection_name="collection", **kwargs)

# create the .json file if it does not exist
if self.file_writable and not Path(self.paths[0]).exists():
with zopen(self.paths[0], "w") as f:
data: List[dict] = []
bytesdata = orjson.dumps(data)
f.write(bytesdata.decode("utf-8"))
super().__init__(**kwargs)

def connect(self, force_reset=False):
"""
Expand All @@ -708,9 +715,19 @@ def connect(self, force_reset=False):
with zopen(path) as f:
data = f.read()
data = data.decode() if isinstance(data, bytes) else data
objects = json.loads(data)
objects = orjson.loads(data)
objects = [objects] if not isinstance(objects, list) else objects
self.update(objects)
try:
self.update(objects)
except KeyError:
raise KeyError(
f"""
Key field '{self.key}' not found in {f.name}. This
could mean that this JSONStore was initially created with a different key field.
The keys found in the .json file are {list(objects[0].keys())}. Try
re-initializing your JSONStore using one of these as the key arguments.
"""
)

def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None):
"""
Expand Down Expand Up @@ -750,7 +767,8 @@ def update_json_file(self):
data = [d for d in self.query()]
for d in data:
d.pop("_id")
json.dump(data, f)
bytesdata = orjson.dumps(data)
f.write(bytesdata.decode("utf-8"))

def __hash__(self):
return hash((*self.paths, self.last_updated_field))
Expand Down
13 changes: 13 additions & 0 deletions tests/stores/test_mongolike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import shutil
from datetime import datetime
from unittest import mock
from pathlib import Path

import mongomock.collection
from monty.tempfile import ScratchDir
Expand Down Expand Up @@ -414,9 +415,21 @@ def test_json_store_load(jsonstore, test_dir):
jsonstore.connect()
assert len(list(jsonstore.query())) == 20

# confirm descriptive error raised if you get a KeyError
with pytest.raises(KeyError, match="Key field 'random_key' not found"):
jsonstore = JSONStore(test_dir / "test_set" / "c.json.gz", key="random_key")
jsonstore.connect()


def test_json_store_writeable(test_dir):
with ScratchDir("."):
# if the .json does not exist, it should be created
jsonstore = JSONStore("a.json", file_writable=True)
assert Path("a.json").exists()
jsonstore.connect()
# confirm RunTimeError with multiple paths
with pytest.raises(RuntimeError, match="multiple JSON"):
jsonstore = JSONStore(["a.json", "d.json"], file_writable=True)
shutil.copy(test_dir / "test_set" / "d.json", ".")
jsonstore = JSONStore("d.json", file_writable=True)
jsonstore.connect()
Expand Down

0 comments on commit 7d545d0

Please sign in to comment.