Skip to content

Commit

Permalink
Merge branch '355_streaming_json_documents' of https://github.com/jen…
Browse files Browse the repository at this point in the history
…nifersmith/csvkit into jennifersmith-355_streaming_json_documents
  • Loading branch information
onyxfish committed Nov 16, 2014
2 parents 08cbd7d + 2b89983 commit 65de9dc
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 17 deletions.
48 changes: 31 additions & 17 deletions csvkit/utilities/csvjson.py
Expand Up @@ -31,7 +31,27 @@ def add_arguments(self):
self.argparser.add_argument('--crs', dest='crs', type=str, default=None,
help='A coordinate reference system string to be included with GeoJSON output. Only valid if --lat and --lon are also specified.')

self.argparser.add_argument('--stream', dest='streamOutput', action='store_true',
help='Output JSON as a stream of newline-separated objects, rather than an as an array.')

def main(self):
if six.PY2:
stream = codecs.getwriter('utf-8')(self.output_file)
else:
stream = self.output_file

json_kwargs = {
'ensure_ascii': False,
'indent': self.args.indent,
}

if six.PY2:
json_kwargs['encoding'] = 'utf-8'

def dump_json (data,newline=False):
json.dump(data, stream, **json_kwargs)
if newline: stream.write("\n")

"""
Convert CSV to JSON.
"""
Expand All @@ -43,15 +63,13 @@ def main(self):

if self.args.crs and not self.args.lat:
self.argparser.error('--crs is only allowed when --lat and --lon are also specified.')

if self.args.streamOutput and (self.args.lat or self.args.lon or self.args.key):
self.argparser.error('--stream is only allowed if --lat, --lon and --key are not specified.')

rows = CSVKitReader(self.input_file, **self.reader_kwargs)
column_names = next(rows)

if six.PY2:
stream = codecs.getwriter('utf-8')(self.output_file)
else:
stream = self.output_file

# GeoJSON
if self.args.lat and self.args.lon:
features = []
Expand Down Expand Up @@ -129,6 +147,7 @@ def main(self):
'name': self.args.crs
})
])
dump_json(output)
# Keyed JSON
elif self.args.key:
output = OrderedDict()
Expand All @@ -145,10 +164,10 @@ def main(self):
raise NonUniqueKeyColumnException('Value %s is not unique in the key column.' % six.text_type(k))

output[k] = data
dump_json(output)
# Boring JSON
else:
output = []

for row in rows:
data = OrderedDict()

Expand All @@ -157,18 +176,13 @@ def main(self):
data[column] = row[i]
except IndexError:
data[column] = None
if(self.args.streamOutput):
dump_json(data,newline=True)
else:
output.append(data)
if not self.args.streamOutput:
dump_json(output)

output.append(data)

kwargs = {
'ensure_ascii': False,
'indent': self.args.indent,
}

if six.PY2:
kwargs['encoding'] = 'utf-8'

json.dump(output, stream, **kwargs)


def launch_new_instance():
Expand Down
12 changes: 12 additions & 0 deletions tests/test_utilities/test_csvjson.py
Expand Up @@ -123,3 +123,15 @@ def test_geojson_with_crs(self):
self.assertEqual(crs['type'], 'name')
self.assertEqual(crs['properties']['name'], 'EPSG:4269')

def test_json_streaming(self):
args = ['--stream', 'examples/dummy3.csv']
output_file = six.StringIO()

utility = CSVJSON(args, output_file)
utility.main()

result = list(map(json.loads, output_file.getvalue().splitlines()))
self.assertEqual(len(result), 2)
self.assertDictEqual(result[0], {"a": "1", "c": "3", "b": "2"})
self.assertDictEqual(result[1], {"a": "1", "c": "5", "b": "4"})

0 comments on commit 65de9dc

Please sign in to comment.