Skip to content

Commit

Permalink
BUG: io: close file handles immediately after use, where possible (#1517
Browse files Browse the repository at this point in the history
)
  • Loading branch information
pv committed Oct 10, 2010
1 parent a16ee31 commit 8630830
Showing 1 changed file with 125 additions and 72 deletions.
197 changes: 125 additions & 72 deletions numpy/lib/npyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ class NpzFile(object):
fid : file or str
The zipped archive to open. This is either a file-like object
or a string containing the path to the archive.
own_fid : bool, optional
Whether NpzFile should close the file handle.
Requires that `fid` is a file-like object.
Examples
--------
Expand All @@ -163,7 +166,7 @@ class NpzFile(object):
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
"""
def __init__(self, fid):
def __init__(self, fid, own_fid=False):
# Import is postponed to here since zipfile depends on gzip, an optional
# component of the so-called standard library.
import zipfile
Expand All @@ -177,6 +180,25 @@ def __init__(self, fid):
self.files.append(x)
self.zip = _zip
self.f = BagObj(self)
if own_fid:
self.fid = fid
else:
self.fid = None

def close(self):
"""
Close the file.
"""
if self.zip is not None:
self.zip.close()
self.zip = None
if self.fid is not None:
self.fid.close()
self.fid = None

def __del__(self):
self.close()

def __getitem__(self, key):
# FIXME: This seems like it will copy strings around
Expand Down Expand Up @@ -293,31 +315,39 @@ def load(file, mmap_mode=None):
"""
import gzip

own_fid = False
if isinstance(file, basestring):
fid = open(file, "rb")
own_fid = True
elif isinstance(file, gzip.GzipFile):
fid = seek_gzip_factory(file)
own_fid = True
else:
fid = file

# Code to distinguish from NumPy binary files and pickles.
_ZIP_PREFIX = asbytes('PK\x03\x04')
N = len(format.MAGIC_PREFIX)
magic = fid.read(N)
fid.seek(-N, 1) # back-up
if magic.startswith(_ZIP_PREFIX): # zip-file (assume .npz)
return NpzFile(fid)
elif magic == format.MAGIC_PREFIX: # .npy file
if mmap_mode:
return format.open_memmap(file, mode=mmap_mode)
else:
return format.read_array(fid)
else: # Try a pickle
try:
return _cload(fid)
except:
raise IOError, \
"Failed to interpret file %s as a pickle" % repr(file)
try:
# Code to distinguish from NumPy binary files and pickles.
_ZIP_PREFIX = asbytes('PK\x03\x04')
N = len(format.MAGIC_PREFIX)
magic = fid.read(N)
fid.seek(-N, 1) # back-up
if magic.startswith(_ZIP_PREFIX): # zip-file (assume .npz)
own_fid = False
return NpzFile(fid, own_fid=True)
elif magic == format.MAGIC_PREFIX: # .npy file
if mmap_mode:
return format.open_memmap(file, mode=mmap_mode)
else:
return format.read_array(fid)
else: # Try a pickle
try:
return _cload(fid)
except:
raise IOError, \
"Failed to interpret file %s as a pickle" % repr(file)
finally:
if own_fid:
fid.close()

def save(file, arr):
"""
Expand Down Expand Up @@ -355,15 +385,21 @@ def save(file, arr):
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
"""
own_fid = False
if isinstance(file, basestring):
if not file.endswith('.npy'):
file = file + '.npy'
fid = open(file, "wb")
own_fid = True
else:
fid = file

arr = np.asanyarray(arr)
format.write_array(fid, arr)
try:
arr = np.asanyarray(arr)
format.write_array(fid, arr)
finally:
if own_fid:
fid.close()

def savez(file, *args, **kwds):
"""
Expand All @@ -380,12 +416,12 @@ def savez(file, *args, **kwds):
Either the file name (string) or an open file (file-like object)
where the data will be saved. If file is a string, the ``.npz``
extension will be appended to the file name if it is not already there.
\\*args : Arguments, optional
*args : Arguments, optional
Arrays to save to the file. Since it is not possible for Python to
know the names of the arrays outside `savez`, the arrays will be saved
with names "arr_0", "arr_1", and so on. These arguments can be any
expression.
\\*\\*kwds : Keyword arguments, optional
**kwds : Keyword arguments, optional
Arrays to save to the file. Arrays will be saved in the file with the
keyword names.
Expand Down Expand Up @@ -417,7 +453,7 @@ def savez(file, *args, **kwds):
>>> x = np.arange(10)
>>> y = np.sin(x)
Using `savez` with \\*args, the arrays are saved with default names.
Using `savez` with *args, the arrays are saved with default names.
>>> np.savez(outfile, x, y)
>>> outfile.seek(0) # Only needed here to simulate closing & reopening file
Expand All @@ -427,7 +463,7 @@ def savez(file, *args, **kwds):
>>> npzfile['arr_0']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Using `savez` with \\*\\*kwds, the arrays are saved with the keyword names.
Using `savez` with **kwds, the arrays are saved with the keyword names.
>>> outfile = TemporaryFile()
>>> np.savez(outfile, x=x, y=y)
Expand Down Expand Up @@ -586,9 +622,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
if usecols is not None:
usecols = list(usecols)

isstring = False
own_fh = False
if _is_string_like(fname):
isstring = True
own_fh = True
if fname.endswith('.gz'):
import gzip
fh = seek_gzip_factory(fname)
Expand Down Expand Up @@ -676,7 +712,7 @@ def split_line(line):
# Convert each value according to its column and store
X.append(tuple([conv(val) for (conv, val) in zip(converters, vals)]))
finally:
if isstring:
if own_fh:
fh.close()

if len(dtype_types) > 1:
Expand Down Expand Up @@ -798,7 +834,9 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n'):
fmt = asstr(fmt)
delimiter = asstr(delimiter)

own_fh = False
if _is_string_like(fname):
own_fh = True
if fname.endswith('.gz'):
import gzip
fh = gzip.open(fname, 'wb')
Expand All @@ -812,39 +850,43 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n'):
else:
raise ValueError('fname must be a string or file handle')

X = np.asarray(X)
try:
X = np.asarray(X)

# Handle 1-dimensional arrays
if X.ndim == 1:
# Common case -- 1d array of numbers
if X.dtype.names is None:
X = np.atleast_2d(X).T
ncol = 1
# Handle 1-dimensional arrays
if X.ndim == 1:
# Common case -- 1d array of numbers
if X.dtype.names is None:
X = np.atleast_2d(X).T
ncol = 1

# Complex dtype -- each field indicates a separate column
else:
ncol = len(X.dtype.descr)
else:
ncol = X.shape[1]

# `fmt` can be a string with multiple insertion points or a list of formats.
# E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
if type(fmt) in (list, tuple):
if len(fmt) != ncol:
raise AttributeError('fmt has wrong shape. %s' % str(fmt))
format = asstr(delimiter).join(map(asstr, fmt))
elif type(fmt) is str:
if fmt.count('%') == 1:
fmt = [fmt, ]*ncol
format = delimiter.join(fmt)
elif fmt.count('%') != ncol:
raise AttributeError('fmt has wrong number of %% formats. %s'
% fmt)
# Complex dtype -- each field indicates a separate column
else:
ncol = len(X.dtype.descr)
else:
format = fmt
ncol = X.shape[1]

# `fmt` can be a string with multiple insertion points or a
# list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
if type(fmt) in (list, tuple):
if len(fmt) != ncol:
raise AttributeError('fmt has wrong shape. %s' % str(fmt))
format = asstr(delimiter).join(map(asstr, fmt))
elif type(fmt) is str:
if fmt.count('%') == 1:
fmt = [fmt, ]*ncol
format = delimiter.join(fmt)
elif fmt.count('%') != ncol:
raise AttributeError('fmt has wrong number of %% formats. %s'
% fmt)
else:
format = fmt

for row in X:
fh.write(asbytes(format % tuple(row) + newline))
for row in X:
fh.write(asbytes(format % tuple(row) + newline))
finally:
if own_fh:
fh.close()

import re
def fromregex(file, regexp, dtype):
Expand Down Expand Up @@ -902,25 +944,32 @@ def fromregex(file, regexp, dtype):
array([1312, 1534, 444], dtype=int64)
"""
own_fh = False
if not hasattr(file, "read"):
file = open(file, 'rb')
if not hasattr(regexp, 'match'):
regexp = re.compile(asbytes(regexp))
if not isinstance(dtype, np.dtype):
dtype = np.dtype(dtype)
own_fh = True

seq = regexp.findall(file.read())
if seq and not isinstance(seq[0], tuple):
# Only one group is in the regexp.
# Create the new array as a single data-type and then
# re-interpret as a single-field structured array.
newdtype = np.dtype(dtype[dtype.names[0]])
output = np.array(seq, dtype=newdtype)
output.dtype = dtype
else:
output = np.array(seq, dtype=dtype)
try:
if not hasattr(regexp, 'match'):
regexp = re.compile(asbytes(regexp))
if not isinstance(dtype, np.dtype):
dtype = np.dtype(dtype)

seq = regexp.findall(file.read())
if seq and not isinstance(seq[0], tuple):
# Only one group is in the regexp.
# Create the new array as a single data-type and then
# re-interpret as a single-field structured array.
newdtype = np.dtype(dtype[dtype.names[0]])
output = np.array(seq, dtype=newdtype)
output.dtype = dtype
else:
output = np.array(seq, dtype=dtype)

return output
return output
finally:
if own_fh:
fh.close()



Expand Down Expand Up @@ -1092,8 +1141,10 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
raise TypeError(errmsg % type(user_converters))

# Initialize the filehandle, the LineSplitter and the NameValidator
own_fhd = False
if isinstance(fname, basestring):
fhd = np.lib._datasource.open(fname, 'U')
own_fhd = True
elif not hasattr(fname, 'read'):
raise TypeError("The input should be a string or a filehandle. "\
"(got %s instead)" % type(fname))
Expand Down Expand Up @@ -1354,6 +1405,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
append_to_masks(tuple([v.strip() in m
for (v, m) in zip(values, missing_values)]))

if own_fhd:
fhd.close()

# Upgrade the converters (if needed)
if dtype is None:
Expand Down

0 comments on commit 8630830

Please sign in to comment.