Skip to content

Commit

Permalink
JSONL Support Added
Browse files Browse the repository at this point in the history
  • Loading branch information
fahadsiddiqui committed Sep 21, 2020
1 parent 0a74c0e commit 546776a
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 1 deletion.
34 changes: 33 additions & 1 deletion petl/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,34 @@ def fromjson(source, *args, **kwargs):
| 'c' | 2 |
+-----+-----+
Setting argument `lines` to `True` will enable to
infer the document as a JSON lines document. For more details about JSON lines
please visit https://jsonlines.org/.
>>> import petl as etl
>>> data_with_jlines = '''{"name": "Gilbert", "wins": [["straight", "7S"], ["one pair", "10H"]]}
... {"name": "Alexa", "wins": [["two pair", "4S"], ["two pair", "9S"]]}
... {"name": "May", "wins": []}
... {"name": "Deloise", "wins": [["three of a kind", "5S"]]}'''
...
>>> with open('example2.json', 'w') as f:
... f.write(data_with_jlines)
...
223
>>> table2 = etl.fromjson('example2.json', lines=True)
>>> table2
+-----------+-------------------------------------------+
| name | wins |
+===========+===========================================+
| 'Gilbert' | [['straight', '7S'], ['one pair', '10H']] |
+-----------+-------------------------------------------+
| 'Alexa' | [['two pair', '4S'], ['two pair', '9S']] |
+-----------+-------------------------------------------+
| 'May' | [] |
+-----------+-------------------------------------------+
| 'Deloise' | [['three of a kind', '5S']] |
+-----------+-------------------------------------------+
If your JSON file does not fit this structure, you will need to parse it
via :func:`json.load` and select the array to treat as the data, see also
:func:`petl.io.json.fromdicts`.
Expand Down Expand Up @@ -69,6 +97,7 @@ def __init__(self, source, *args, **kwargs):
self.missing = kwargs.pop('missing', None)
self.header = kwargs.pop('header', None)
self.sample = kwargs.pop('sample', 1000)
self.lines = kwargs.pop('lines', False)
self.args = args
self.kwargs = kwargs

Expand All @@ -79,7 +108,10 @@ def __iter__(self):
f = io.TextIOWrapper(f, encoding='utf-8', newline='',
write_through=True)
try:
dicts = json.load(f, *self.args, **self.kwargs)
if self.lines:
dicts = [json.loads(jline) for jline in f.read().splitlines()]
else:
dicts = json.load(f, *self.args, **self.kwargs)
for row in iterdicts(dicts, self.header, self.sample,
self.missing):
yield row
Expand Down
51 changes: 51 additions & 0 deletions petl/test/io/test_jsonl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division

from tempfile import NamedTemporaryFile

from petl import fromjson
from petl.test.helpers import ieq


def test_fromjson_1():
f = NamedTemporaryFile(delete=False, mode='w')
data = '{"name": "Gilbert", "wins": [["straight", "7S"], ["one pair", "10H"]]}\n' \
'{"name": "Alexa", "wins": [["two pair", "4S"], ["two pair", "9S"]]}\n' \
'{"name": "May", "wins": []}\n' \
'{"name": "Deloise", "wins": [["three of a kind", "5S"]]}'

f.write(data)
f.close()

actual = fromjson(f.name, header=['name', 'wins'], lines=True)

expect = (('name', 'wins'),
('Gilbert', [["straight", "7S"], ["one pair", "10H"]]),
('Alexa', [["two pair", "4S"], ["two pair", "9S"]]),
('May', []),
('Deloise', [["three of a kind", "5S"]]))

ieq(expect, actual)
ieq(expect, actual) # verify can iterate twice


def test_fromjson_2():
f = NamedTemporaryFile(delete=False, mode='w')
data = '{"foo": "bar1", "baz": 1}\n' \
'{"foo": "bar2", "baz": 2}\n' \
'{"foo": "bar3", "baz": 3}\n' \
'{"foo": "bar4", "baz": 4}\n'

f.write(data)
f.close()

actual = fromjson(f.name, header=['foo', 'baz'], lines=True)

expect = (('foo', 'baz'),
('bar1', 1),
('bar2', 2),
('bar3', 3),
('bar4', 4))

ieq(expect, actual)
ieq(expect, actual) # verify can iterate twice

0 comments on commit 546776a

Please sign in to comment.