Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

From tables to h5py #1351

Merged
merged 15 commits into from
Oct 13, 2017
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ cache:
addons:
apt:
packages:
# For runs with pytables
- libhdf5-serial-dev

env:
Expand All @@ -36,14 +35,14 @@ matrix:
- python: 2.7
env:
# Check these values against requirements.txt and dipy/info.py
- DEPENDS="cython==0.25.1 numpy==1.7.1 scipy==0.9.0 nibabel==2.1.0"
- DEPENDS="cython==0.25.1 numpy==1.7.1 scipy==0.9.0 nibabel==2.1.0 h5py==2.4.0"
- python: 2.7
env:
- DEPENDS="$DEPENDS scikit_learn tables"
- DEPENDS="$DEPENDS scikit_learn"
- python: 3.5
env:
- COVERAGE=1
- DEPENDS="$DEPENDS scikit_learn tables"
- DEPENDS="$DEPENDS scikit_learn"
# To test vtk functionality
- python: 2.7
sudo: true # This is set to true for apt-get
Expand All @@ -54,7 +53,7 @@ matrix:
- LIBGL_ALWAYS_INDIRECT=y
- VENV_ARGS="--system-site-packages --python=/usr/bin/python2.7"
- TEST_WITH_XVFB=true
- DEPENDS="$DEPENDS scikit_learn tables"
- DEPENDS="$DEPENDS scikit_learn"

- python: 2.7
env:
Expand Down
4 changes: 3 additions & 1 deletion dipy/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
NUMPY_MIN_VERSION='1.7.1'
SCIPY_MIN_VERSION='0.9'
NIBABEL_MIN_VERSION='2.1.0'
H5PY_MIN_VERSION='2.4.0'

# Main setup parameters
NAME = 'dipy'
Expand All @@ -105,4 +106,5 @@
PROVIDES = ["dipy"]
REQUIRES = ["numpy (>=%s)" % NUMPY_MIN_VERSION,
"scipy (>=%s)" % SCIPY_MIN_VERSION,
"nibabel (>=%s)" % NIBABEL_MIN_VERSION]
"nibabel (>=%s)" % NIBABEL_MIN_VERSION,
"h5py (>=%s)" % H5PY_MIN_VERSION]
115 changes: 49 additions & 66 deletions dipy/io/dpy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" A class for handling large tractography datasets.

It is built using the pytables tools which in turn implement
It is built using the h5py which in turn implement
key features of the HDF5 (hierachical data format) API [1]_.

References
Expand All @@ -9,27 +9,15 @@
"""

import numpy as np
import h5py

from distutils.version import LooseVersion

# Conditional testing machinery for pytables
from dipy.testing import doctest_skip_parser

# Conditional import machinery for pytables
from dipy.utils.optpkg import optional_package

# Allow import, but disable doctests, if we don't have pytables
tables, have_tables, _ = optional_package('tables')

# Useful variable for backward compatibility.
TABLES_LESS_3_0 = LooseVersion(tables.__version__) < "3.0" if have_tables else False
from dipy.tracking.streamline import Streamlines

# Make sure not to carry across setup module from * import
__all__ = ['Dpy']


class Dpy(object):
@doctest_skip_parser
def __init__(self, fname, mode='r', compression=0):
""" Advanced storage system for tractography based on HDF5

Expand All @@ -39,7 +27,6 @@ def __init__(self, fname, mode='r', compression=0):
mode : 'r' read
'w' write
'r+' read and write only if file already exists
'a' read and write even if file doesn't exist (not used yet)
compression : 0 no compression to 9 maximum compression

Examples
Expand All @@ -64,70 +51,67 @@ def __init__(self, fname, mode='r', compression=0):
... T=dpr.read_tracksi([0,1,2,0,0,2])
... dpr.close()
... os.remove(fname) #delete file from disk
>>> dpy_example() # skip if not have_tables

>>> dpy_example()
"""

self.mode = mode
self.f = tables.openFile(fname, mode=self.mode) if TABLES_LESS_3_0 else tables.open_file(fname, mode=self.mode)
self.N = 5 * 10**9
self.f = h5py.File(fname, mode=self.mode)
self.compression = compression

if self.mode == 'w':
if TABLES_LESS_3_0:
func_create_group = self.f.createGroup
func_create_array = self.f.createArray
func_create_earray = self.f.createEArray
else:
func_create_group = self.f.create_group
func_create_array = self.f.create_array
func_create_earray = self.f.create_earray

self.streamlines = func_create_group(self.f.root, 'streamlines')
# create a version number
self.version = func_create_array(self.f.root, 'version',
[b"0.0.1"], 'Dpy Version Number')

self.tracks = func_create_earray(self.f.root.streamlines,
'tracks',
tables.Float32Atom(),
(0, 3),
"scalar Float32 earray",
tables.Filters(self.compression),
expectedrows=self.N)
self.offsets = func_create_earray(self.f.root.streamlines,
'offsets',
tables.Int64Atom(), (0,),
"scalar Int64 earray",
tables.Filters(self.compression),
expectedrows=self.N + 1)

self.f.attrs['version'] = u'0.0.1'

self.streamlines = self.f.create_group('streamlines')

self.tracks = self.streamlines.create_dataset(
'tracks',
shape=(0, 3),
dtype='f4',
maxshape=(None, 3), chunks=True)

self.offsets = self.streamlines.create_dataset(
'offsets',
shape=(1,),
dtype='i8',
maxshape=(None,), chunks=True)

self.curr_pos = 0
self.offsets.append(np.array([self.curr_pos]).astype(np.int64))
self.offsets[:] = np.array([self.curr_pos]).astype(np.int64)

if self.mode == 'r':
self.tracks = self.f.root.streamlines.tracks
self.offsets = self.f.root.streamlines.offsets
self.tracks = self.f['streamlines']['tracks']
self.offsets = self.f['streamlines']['offsets']
self.track_no = len(self.offsets) - 1
self.offs_pos = 0

def version(self):
ver = self.f.root.version[:]
return ver[0].decode()

return self.f.attrs['version']

def write_track(self, track):
""" write on track each time
"""
self.tracks.append(track.astype(np.float32))
self.tracks.resize(self.tracks.shape[0] + track.shape[0], axis=0)
self.tracks[-track.shape[0]:] = track.astype(np.float32)
self.curr_pos += track.shape[0]
self.offsets.append(np.array([self.curr_pos]).astype(np.int64))

def write_tracks(self, T):
self.offsets.resize(self.offsets.shape[0] + 1, axis=0)
self.offsets[-1] = self.curr_pos

def write_tracks(self, tracks):
""" write many tracks together
"""
for track in T:
self.tracks.append(track.astype(np.float32))
self.curr_pos += track.shape[0]
self.offsets.append(np.array([self.curr_pos]).astype(np.int64))

self.tracks.resize(self.tracks.shape[0] + tracks._data.shape[0],
axis=0)
self.tracks[-tracks._data.shape[0]:] = tracks._data

self.offsets.resize(self.offsets.shape[0] + tracks._offsets.shape[0],
axis=0)
self.offsets[-tracks._offsets.shape[0]:] = \
self.offsets[-tracks._offsets.shape[0] - 1] + \
tracks._offsets + tracks._lengths

def read_track(self):
""" read one track each time
Expand All @@ -139,23 +123,22 @@ def read_track(self):
def read_tracksi(self, indices):
""" read tracks with specific indices
"""
T = []
tracks = Streamlines()
for i in indices:
# print(self.offsets[i:i+2])
off0, off1 = self.offsets[i:i + 2]
T.append(self.tracks[off0:off1])
return T
tracks.append(self.tracks[off0:off1])
return tracks

def read_tracks(self):
""" read the entire tractography
"""
I = self.offsets[:]
TR = self.tracks[:]
T = []
tracks = Streamlines()
for i in range(len(I) - 1):
off0, off1 = I[i:i + 2]
T.append(TR[off0:off1])
return T
tracks.append(TR[off0:off1])
return tracks

def close(self):
self.f.close()
Expand Down
Loading