Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

ENH: add ujson support in pandas.io.json #3804

Merged
merged 10 commits into from Jun 11, 2013
View
@@ -0,0 +1,34 @@
+Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the ESN Social Software AB nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
+http://code.google.com/p/stringencoders/
+Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
+
+Numeric decoder derived from from TCL library
+http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
+ * Copyright (c) 1988-1993 The Regents of the University of California.
+ * Copyright (c) 1994 Sun Microsystems, Inc.
View
@@ -45,6 +45,16 @@ Excel
read_excel
ExcelFile.parse
+JSON
+~~~~
+
+.. currentmodule:: pandas.io.json
+
+.. autosummary::
+ :toctree: generated/
+
+ read_json
+
HTML
~~~~
@@ -597,6 +607,7 @@ Serialization / IO / Conversion
DataFrame.to_hdf
DataFrame.to_dict
DataFrame.to_excel
+ DataFrame.to_json
DataFrame.to_html
DataFrame.to_stata
DataFrame.to_records
View
@@ -35,6 +35,7 @@ object.
* ``read_excel``
* ``read_hdf``
* ``read_sql``
+ * ``read_json``
* ``read_html``
* ``read_stata``
* ``read_clipboard``
@@ -45,6 +46,7 @@ The corresponding ``writer`` functions are object methods that are accessed like
* ``to_excel``
* ``to_hdf``
* ``to_sql``
+ * ``to_json``
* ``to_html``
* ``to_stata``
* ``to_clipboard``
@@ -937,6 +939,104 @@ The Series object also has a ``to_string`` method, but with only the ``buf``,
which, if set to ``True``, will additionally output the length of the Series.
+JSON
+----
+
+Read and write ``JSON`` format files.
+
+.. _io.json:
+
+Writing JSON
+~~~~~~~~~~~~
+
+A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json``
+with optional parameters:
+
+- path_or_buf : the pathname or buffer to write the output
+ This can be ``None`` in which case a JSON string is returned
+- orient : The format of the JSON string, default is ``index`` for ``Series``, ``columns`` for ``DataFrame``
+
+ * split : dict like {index -> [index], columns -> [columns], data -> [values]}
+ * records : list like [{column -> value}, ... , {column -> value}]
+ * index : dict like {index -> {column -> value}}
+ * columns : dict like {column -> {index -> value}}
+ * values : just the values array
+
+- date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601), default is epoch
+- double_precision : The number of decimal places to use when encoding floating point values, default 10.
+- force_ascii : force encoded string to be ASCII, default True.
+
+Note NaN's and None will be converted to null and datetime objects will be converted based on the date_format parameter
+
+.. ipython:: python
+
+ dfj = DataFrame(randn(5, 2), columns=list('AB'))
+ json = dfj.to_json()
+ json
+
+Writing in iso date format
+
+.. ipython:: python
+
+ dfd = DataFrame(randn(5, 2), columns=list('AB'))
+ dfd['date'] = Timestamp('20130101')
+ json = dfd.to_json(date_format='iso')
+ json
+
+Writing to a file, with a date index and a date column
+
+.. ipython:: python
+
+ dfj2 = dfj.copy()
+ dfj2['date'] = Timestamp('20130101')
+ dfj2.index = date_range('20130101',periods=5)
+ dfj2.to_json('test.json')
+ open('test.json').read()
+
+Reading JSON
+~~~~~~~~~~~~
+
+Reading a JSON string to pandas object can take a number of parameters.
+The parser will try to parse a ``DataFrame`` if ``typ`` is not supplied or
+is ``None``. To explicity force ``Series`` parsing, pass ``typ=series``
+
+- filepath_or_buffer : a **VALID** JSON string or file handle / StringIO. The string could be
+ a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host
+ is expected. For instance, a local file could be
+ file ://localhost/path/to/table.json
+- typ : type of object to recover (series or frame), default 'frame'
+- orient : The format of the JSON string, one of the following
+
+ * split : dict like {index -> [index], name -> name, data -> [values]}
+ * records : list like [value, ... , value]
+ * index : dict like {index -> value}
+
+- dtype : dtype of the resulting object
+- numpy : direct decoding to numpy arrays. default True but falls back to standard decoding if a problem occurs.
+- parse_dates : a list of columns to parse for dates; If True, then try to parse datelike columns, default is False
+- keep_default_dates : boolean, default True. If parsing dates, then parse the default datelike columns
+
+The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is
+not parsable.
+
+Reading from a JSON string
+
+.. ipython:: python
+
+ pd.read_json(json)
+
+Reading from a file, parsing dates
+
+.. ipython:: python
+
+ pd.read_json('test.json',parse_dates=True)
+
+.. ipython:: python
+ :suppress:
+
+ import os
+ os.remove('test.json')
+
HTML
----
@@ -2193,7 +2293,6 @@ into a .dta file. The format version of this file is always the latest one, 115.
.. ipython:: python
- from pandas.io.stata import StataWriter
df = DataFrame(randn(10, 2), columns=list('AB'))
df.to_stata('stata.dta')
View
@@ -16,6 +16,7 @@ API changes
* ``read_excel``
* ``read_hdf``
* ``read_sql``
+ * ``read_json``
* ``read_html``
* ``read_stata``
* ``read_clipboard``
@@ -26,6 +27,7 @@ API changes
* ``to_excel``
* ``to_hdf``
* ``to_sql``
+ * ``to_json``
* ``to_html``
* ``to_stata``
* ``to_clipboard``
@@ -175,6 +177,10 @@ Enhancements
accessable via ``read_stata`` top-level function for reading,
and ``to_stata`` DataFrame method for writing, :ref:`See the docs<io.stata>`
+ - Added module for reading and writing json format files: ``pandas.io.json``
+ accessable via ``read_json`` top-level function for reading,
+ and ``to_json`` DataFrame method for writing, :ref:`See the docs<io.json>`
+
- ``DataFrame.replace()`` now allows regular expressions on contained
``Series`` with object dtype. See the examples section in the regular docs
:ref:`Replacing via String Expression <missing_data.replace_expression>`
View
@@ -495,6 +495,45 @@ def to_clipboard(self):
from pandas.io import clipboard
clipboard.to_clipboard(self)
+ def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
+ double_precision=10, force_ascii=True):
+ """
+ Convert the object to a JSON string.
+
+ Note NaN's and None will be converted to null and datetime objects
+ will be converted to UNIX timestamps.
+
+ Parameters
+ ----------
+ path_or_buf : the path or buffer to write the result string
+ if this is None, return a StringIO of the converted string
+ orient : {'split', 'records', 'index', 'columns', 'values'},
+ default is 'index' for Series, 'columns' for DataFrame
+
+ The format of the JSON string
+ split : dict like
+ {index -> [index], columns -> [columns], data -> [values]}
+ records : list like [{column -> value}, ... , {column -> value}]
+ index : dict like {index -> {column -> value}}
+ columns : dict like {column -> {index -> value}}
+ values : just the values array
+ date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601),
+ default is epoch
+ double_precision : The number of decimal places to use when encoding
+ floating point values, default 10.
+ force_ascii : force encoded string to be ASCII, default True.
+
+ Returns
+ -------
+ result : a JSON compatible string written to the path_or_buf;
+ if the path_or_buf is none, return a StringIO of the result
+
+ """
+
+ from pandas.io import json
+ return json.to_json(path_or_buf=path_or_buf, obj=self, orient=orient, date_format=date_format,
+ double_precision=double_precision, force_ascii=force_ascii)
+
# install the indexerse
for _name, _indexer in indexing.get_indexers_list():
PandasObject._create_indexer(_name,_indexer)
View
@@ -6,6 +6,7 @@
from pandas.io.clipboard import read_clipboard
from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
from pandas.io.pytables import HDFStore, Term, get_store, read_hdf
+from pandas.io.json import read_json
from pandas.io.html import read_html
from pandas.io.sql import read_sql
from pandas.io.stata import read_stata
View
@@ -2,6 +2,7 @@
import urlparse
from pandas.util import py3compat
+from StringIO import StringIO
_VALID_URLS = set(urlparse.uses_relative + urlparse.uses_netloc +
urlparse.uses_params)
View
@@ -11,7 +11,7 @@
from pandas.io.parsers import TextParser
from pandas.tseries.period import Period
-import json
+from pandas import json
def read_excel(path_or_buf, sheetname, kind=None, **kwds):
"""Read an Excel table into a pandas DataFrame
Oops, something went wrong.