Skip to content

Commit

Permalink
Merge pull request #115 from solvebio/f-json-exporter
Browse files Browse the repository at this point in the history
add JSON exporter for queries
  • Loading branch information
jsh2134 committed Nov 22, 2016
2 parents 52389e6 + 4352434 commit 8015174
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
sudo: false
language: python
python:
- "2.6"
Expand All @@ -14,6 +13,7 @@ install:
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install ordereddict unittest2; fi
- pip install PyVCF==0.6.8
script:
- stty cols 80
- python -W always setup.py test
- flake8 solvebio
notifications:
Expand Down
50 changes: 50 additions & 0 deletions solvebio/exporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from six.moves.urllib.parse import urlparse
from six.moves.urllib.parse import unquote

import json
import os
import sys
import datetime
Expand Down Expand Up @@ -66,6 +67,54 @@ def export(self, exporter, query, *args, **kwargs):
.export(*args, **kwargs)


class JSONExporter(object):
"""
Exports dataset query to JSON (specifically JSONL),
one JSON record per line.
"""
name = 'json'

def __init__(self, query, *args, **kwargs):
self.query = query
self.show_progress = kwargs.get('show_progress', False)
self.exclude_fields = kwargs.get('exclude_fields', ['_id', '_commit'])

def export(self, filename=None, **kwargs):
if not filename:
raise Exception(
'The "filename" parameter is required to export.')

result_count = len(self.query)
if result_count <= 0:
raise AttributeError('No results found in query!')

filename = os.path.expanduser(filename)
if sys.version_info >= (3, 0, 0):
f = open(filename, 'w', newline='')
else:
f = open(filename, 'wb')

if self.show_progress:
progress_bar = pyprind.ProgPercent(
result_count,
title='JSON Export',
track_time=False)

try:
for ind, record in enumerate(self.query):
for field in self.exclude_fields:
record.pop(field, None)
f.write(json.dumps(record) + '\n')
if self.show_progress:
progress_bar.update()
except:
raise
finally:
f.close()

print('Export complete!')


class CSVExporter(object):
"""
This class includes helper functions to export
Expand Down Expand Up @@ -263,6 +312,7 @@ def write(self, filename):


exporters = QueryExporters()
exporters.register(JSONExporter)
exporters.register(CSVExporter)
exporters.register(XLSXExporter)

Expand Down
14 changes: 13 additions & 1 deletion solvebio/test/test_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from solvebio.resource import Dataset

from .helper import SolveBioTestCase
# import unittest

from os import path, remove
import json


class ExportsTests(SolveBioTestCase):
Expand All @@ -18,3 +19,14 @@ def test_csv_exporter(self):
query.export('csv', filename='/tmp/test.csv')
self.assertTrue(path.isfile('/tmp/test.csv'))
remove('/tmp/test.csv')

def test_json_exporter(self):
dataset = Dataset.retrieve(self.TEST_DATASET_NAME)
query = dataset.query()[:10]

query.export('json', filename='/tmp/test.json')
self.assertTrue(path.isfile('/tmp/test.json'))
with open('/tmp/test.json', 'r') as f:
for row in f:
self.assertTrue(json.loads(row))
remove('/tmp/test.json')

0 comments on commit 8015174

Please sign in to comment.