Skip to content

Commit

Permalink
Omx doc
Browse files Browse the repository at this point in the history
  • Loading branch information
jpn-- committed Mar 11, 2017
1 parent cbb0cb2 commit 77db68c
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 10 deletions.
1 change: 1 addition & 0 deletions doc/data-in-models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ been using the SQLite database interface.

Using SQLite <databases>
Using HDF5 <datatables>
Using OMX <openmatrix>
Abstract Data Interface <datafountain>


32 changes: 32 additions & 0 deletions doc/openmatrix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
.. currentmodule:: larch

==================================
Using Open Matrix
==================================

Larch embeds a python interface for interacting with
`open matrix (OMX) <https://github.com/osPlanning/omx/wiki>`_ files.
This data format rests on HDF5, the exact same underlying technology used in
:class:`DT` files. This makes merging and linking to open matrix data easy and
fun. :sup:`[citation needed]`

.. autoclass:: OMX(filename)

.. autoattribute:: OMX.shape

Importing Data
--------------

.. automethod:: OMX.import_datatable

.. automethod:: OMX.import_datatable_3d

.. automethod:: OMX.import_datatable_as_lookups


.. |idca| replace:: :ref:`idca <idca>`
.. |idco| replace:: :ref:`idco <idco>`




45 changes: 35 additions & 10 deletions py/omx.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class OMXNonUniqueLookup(LarchError):
pass

class OMX(_tb.file.File):
"""A subclass of the :class:`tables.File` class, adding an interface for openmatrix files.
As suggested in the openmatrix documentation, the default when creating an OMX file
is to use zlib compression level 1, although this can be overridden.
"""

def __repr__(self):
from .util.text_manip import max_len
Expand Down Expand Up @@ -74,6 +79,13 @@ def change_mode(self, mode, **kwarg):

@property
def shape(self):
"""The shape of the OMX file.
As required by the standard, all OMX files must have a two dimensional shape. This
attribute accesses or alters that shape. Note that attempting to change the
shape of an existing file that already has data tables that would be incompatible
with the new shape will raise an OMXIncompatibleShape exception.
"""
sh = self.root._v_attrs.SHAPE[:]
proposal = (sh[0],sh[1])
if proposal==(0,0) and self.data._v_nchildren>0:
Expand Down Expand Up @@ -281,11 +293,11 @@ def lookup_to_index(self, lookupname, arr):


def import_datatable_as_lookups(self, filepath, chunksize=10000, column_map=None, log=None, n_rows=None, zone_ix=None, zone_ix1=1, drop_zone=None):
"""Import a table in r,c,x,x,x... format into the matrix.
"""Import a table in r_or_c,x,x,x... format into the matrix.
The r and c columns need to be either 0-based or 1-based index values
The r_or_c column needs to be either 0-based or 1-based index values
(this may be relaxed in the future). The matrix must already be set up
with the correct size before importing the datatable.
with the correct shape before importing the datatable.
Parameters
----------
Expand Down Expand Up @@ -379,6 +391,10 @@ def import_datatable(self, filepath, one_based=True, chunksize=10000, column_map
----------
filepath : str or buffer
This argument will be fed directly to the :func:`pandas.read_csv` function.
one_based : bool
If True (the default) it is assumed that zones are indexed sequentially starting with 1
(as is typical for travel demand forecasting applications).
Otherwise, it is assumed that zones are indexed sequentially starting with 0 (typical for other c and python applications).
chunksize : int
The number of rows of the source file to read as a chunk. Reading a giant file in moderate sized
chunks can be much faster and less memory intensive than reading the entire file.
Expand Down Expand Up @@ -438,12 +454,17 @@ def import_datatable_3d(self, filepath, one_based=True, chunksize=10000, default
(this may be relaxed in the future). The matrix must already be set up
with the correct size before importing the datatable.
This method is more memory intensive but much faster than the non-3d version.
This method is functionally the same as :meth:`import_datatable` but uses a different implementation.
It is much more memory intensive but also much faster than the non-3d version.
Parameters
----------
filepath : str or buffer
This argument will be fed directly to the :func:`pandas.read_csv` function.
one_based : bool
If True (the default) it is assumed that zones are indexed sequentially starting with 1
(as is typical for travel demand forecasting applications).
Otherwise, it is assumed that zones are indexed sequentially starting with 0 (typical for other c and python applications).
chunksize : int
The number of rows of the source file to read as a chunk. Reading a giant file in moderate sized
chunks can be much faster and less memory intensive than reading the entire file.
Expand Down Expand Up @@ -527,19 +548,23 @@ def import_dbf(cls, dbffile, omxfile, shape, o, d, cols, smallest_zone_number=1)
def __getitem__(self, key):
if isinstance(key,str):
if key in self.data._v_children:
return self.data._v_children[key]
if key in self.lookup._v_children:
raise KeyError('key {} found in both data and lookup'.format(key))
else:
return self.data._v_children[key]
if key in self.lookup._v_children:
return self.lookup._v_children[key]
raise KeyError("matrix named {} not found".format(key))
raise TypeError("OMX matrix access must be by name (str)")

def __getattr__(self, key):
if key in self.data._v_children and key not in self.lookup._v_children:
return self.data._v_children[key]
if key not in self.data._v_children and key in self.lookup._v_children:
if key in self.data._v_children:
if key not in self.lookup._v_children:
return self.data._v_children[key]
else:
raise AttributeError('key {} found in both data and lookup'.format(key))
if key in self.lookup._v_children:
return self.lookup._v_children[key]
if key in self.data._v_children and key in self.lookup._v_children:
raise AttributeError('key {} found in both data and lookup'.format(key))
raise AttributeError('key {} not found'.format(key))

def import_omx(self, otherfile, tablenames, rowslicer=None, colslicer=None):
Expand Down

0 comments on commit 77db68c

Please sign in to comment.