Skip to content

Commit

Permalink
WIP don't keep h5py.File open between method calls
Browse files Browse the repository at this point in the history
  • Loading branch information
kwgoodman committed Dec 20, 2013
1 parent 857a7af commit f080d27
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 92 deletions.
60 changes: 26 additions & 34 deletions la/io.py
Expand Up @@ -18,7 +18,7 @@
class IO(object):
"Save and load larrys in HDF5 format using a dictionary-like interface."

def __init__(self, filename, max_freespace=np.inf):
def __init__(self, filename):
"""
Save and load larrys in HDF5 format using a dictionary-like interface.
Expand Down Expand Up @@ -114,12 +114,11 @@ def __init__(self, filename, max_freespace=np.inf):
False
"""
self.f = h5py.File(filename)
self.max_freespace = max_freespace
self.filename = filename

def keys(self):
"Return a list of larry names (keys) in archive."
return archive_directory(self.f)
return archive_directory(self.filename)

def values(self):
"Return a list of larry objects (values) in archive."
Expand Down Expand Up @@ -172,30 +171,20 @@ def merge(self, key, lar, update=False):
del self.f[key]
self[key] = lar2

def close(self):
self.f.close()

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
self.close()

def __iter__(self):
return iter(self.keys())

def __len__(self):
return len(self.keys())

def __getitem__(self, key):
if key in self.f:
if _is_archived_larry(self.f[key]):
return lara(self.f[key])
f = h5py.File(self.filename, 'r')
if key in f:
if _is_archived_larry(f[key]):
return lara(f[key])
else:
msg = "%s is in the archive but it is not a larry."
raise KeyError(msg % key)
else:
raise KeyError("A larry named %s is not in the archive." % key)
f.close()

def __setitem__(self, key, value):

Expand All @@ -205,17 +194,21 @@ def __setitem__(self, key, value):
if not isinstance(value, larry):
raise TypeError('value must be a larry.')

f = h5py.File(self.filename, 'w')

# Does an item (larry or otherwise) with given key already exist? If
# so delete. Note that self.f.keys() [all keys] is used instead of
# so delete. Note that f.keys() [all keys] is used instead of
# self.keys() [keys that are larrys].
if key in self.f.keys():
if key in f.keys():
self.__delitem__(key)

# If you've made it this far the data looks OK so save it
save(self.f, value, key)
save(f, value, key)

f.close()

def __delitem__(self, key):
delete(self.f, key)
delete(self.filename, key)

def __repr__(self):
table = [['larry', 'dtype', 'shape']]
Expand All @@ -232,22 +225,26 @@ def __repr__(self):
@property
def space(self):
"The number of bytes used by the archive."
self.f.flush()
return self.f.fid.get_filesize()
f = h5py.File(self.filename, 'r')
size = f.fid.get_filesize()
f.close()
return size

@property
def freespace(self):
"The number of bytes of freespace in the archive."
self.f.flush()
global size
size = 0
def sizefinder(key, value):
"Add size of object to running total"
global size
if isinstance(value, h5py.Dataset):
size += value.id.get_storage_size()
self.f.visititems(sizefinder)
return self.space - size
f = h5py.File(self.filename, 'r')
f.visititems(sizefinder)
fs = f.space - size
f.close()
return fs

def repack(self):
"""
Expand All @@ -258,12 +255,7 @@ def repack(self):
freespace across openening and closing of the archive.
"""
self.f = repack(self.f)

@property
def filename(self):
"filename of archive."
return self.f.filename
repack(self.filename)

class lara(object):
"""
Expand Down
119 changes: 61 additions & 58 deletions la/tests/io_test.py
Expand Up @@ -7,6 +7,7 @@

import numpy as np
nan = np.nan
import h5py

import la
from la import larry
Expand All @@ -28,82 +29,84 @@ def tearDown(self):

def test_io_1(self):
"io_general"
with IO(self.filename) as io:
x = larry([1,2,3])
io['x'] = x
self.assertTrue('x' in io, 'key missing')
self.assertTrue((x == io['x'][:]).all(), 'save and load difference')
self.assertTrue(['x'] == list(io.keys()), 'keys are different')
self.assertTrue(x.dtype == io['x'].dtype, 'dtype changed')
del io['x']
self.assertTrue(list(io.keys()) == [], 'key still present')
io = IO(self.filename)
x = larry([1,2,3])
io['x'] = x
self.assertTrue('x' in io, 'key missing')
self.assertTrue((x == io['x'][:]).all(), 'save and load difference')
self.assertTrue(['x'] == list(io.keys()), 'keys are different')
self.assertTrue(x.dtype == io['x'].dtype, 'dtype changed')
del io['x']
self.assertTrue(list(io.keys()) == [], 'key still present')

def test_io_2(self):
"io_repack"
with IO(self.filename) as io:
io['larry'] = la.rand(100, 100)
fs1 = io.freespace
sp1 = io.space
del io['larry']
io.repack()
fs2 = io.freespace
sp2 = io.space
self.assertTrue(fs2 < fs1, 'repack did not reduce freespace')
self.assertTrue(sp2 < sp1, 'repack did not reduce space')
io = IO(self.filename)
io['larry'] = la.rand(100, 100)
fs1 = io.freespace
sp1 = io.space
del io['larry']
io.repack()
fs2 = io.freespace
sp2 = io.space
self.assertTrue(fs2 < fs1, 'repack did not reduce freespace')
self.assertTrue(sp2 < sp1, 'repack did not reduce space')

def test_io_3(self):
"io_keys"
with IO(self.filename) as io:
io['1'] = larry([1,2,3])
io['2'] = larry([1,2,3])
io.f['3'] = [1,2,3]
io['1/2/3/4'] = larry([1,2,3])
keys = list(io.keys())
keys.sort()
theory = ['1', '1/2/3/4', '2']
self.assertTrue(keys == theory, 'keys do not match')
io = IO(self.filename)
io['1'] = larry([1,2,3])
io['2'] = larry([1,2,3])
f = h5py.File(self.filename)
f['3'] = [1,2,3]
f.close()
io['1/2/3/4'] = larry([1,2,3])
keys = list(io.keys())
keys.sort()
theory = ['1', '1/2/3/4', '2']
self.assertTrue(keys == theory, 'keys do not match')

def test_io_4(self):
"io_dates"
with IO(self.filename) as io:
x = [1, 2]
label = [[datetime.date(2010,3,1), datetime.date(2010,3,2)]]
desired = larry(x, label)
io['desired'] = desired
actual = io['desired'][:]
assert_larry_equal(actual, desired)
io = IO(self.filename)
x = [1, 2]
label = [[datetime.date(2010,3,1), datetime.date(2010,3,2)]]
desired = larry(x, label)
io['desired'] = desired
actual = io['desired'][:]
assert_larry_equal(actual, desired)

def test_io_5(self):
"io_datetimes"
with IO(self.filename) as io:
x = [1, 2]
label = [[datetime.datetime(2010,3,1,13,15,59,9998),
datetime.datetime(2010,3,2,11,23)]]
desired = larry(x, label)
io['desired'] = desired
actual = io['desired'][:]
assert_larry_equal(actual, desired)
io = IO(self.filename)
x = [1, 2]
label = [[datetime.datetime(2010,3,1,13,15,59,9998),
datetime.datetime(2010,3,2,11,23)]]
desired = larry(x, label)
io['desired'] = desired
actual = io['desired'][:]
assert_larry_equal(actual, desired)

def test_io_6(self):
"io_datetimes"
with IO(self.filename) as io:
x = [1, 2]
label = [[datetime.time(13,15,59,9998),
datetime.time(11,23)]]
desired = larry(x, label)
io['desired'] = desired
actual = io['desired'][:]
assert_larry_equal(actual, desired)
io = IO(self.filename)
x = [1, 2]
label = [[datetime.time(13,15,59,9998),
datetime.time(11,23)]]
desired = larry(x, label)
io['desired'] = desired
actual = io['desired'][:]
assert_larry_equal(actual, desired)

def test_io_7(self):
"io_empty (gh #68)"
with IO(self.filename) as io:
desired = larry([])
io['desired'] = desired
actual = io['desired']
if actual.size == 0:
actual = la.larry([])
assert_larry_equal(actual, desired)
io = IO(self.filename)
desired = larry([])
io['desired'] = desired
actual = io['desired']
if actual.size == 0:
actual = la.larry([])
assert_larry_equal(actual, desired)

# nose tests ----------------------------------------------------------------

Expand Down

0 comments on commit f080d27

Please sign in to comment.