this takes care of a lot of the cleanup. So far less work than I was …

…anticipating. The one big annoyance is append mode: In Python 3 when you open a file in 'a' mode it actually opens it in 'w' and then seeks to the end (if seek is available). This is to normalize the different behavior of the system-level append mode on different systems, which is fine. But there doesn't seem to be any way of recovering whether or not the file was originally opened in append mode. I think whatever it does under the covers to handle the system differences should be *less* transparent. But anyways, had to deal with that. git-svn-id: http://svn6.assembla.com/svn/pyfits/branches/file-io-refactoring@1106 ed100bfc-0583-0410-97f2-c26b58777a21
spacetelescope · Sep 15, 2011 · ea6df40 · ea6df40
1 parent fbeb932
commit ea6df40
Show file tree

Hide file tree

Showing 7 changed files with 141 additions and 150 deletions.
diff --git a/lib/pyfits/convenience.py b/lib/pyfits/convenience.py
@@ -9,7 +9,8 @@
 from pyfits.hdu.image import PrimaryHDU, ImageHDU
 from pyfits.hdu.table import BinTableHDU, _TableBaseHDU
 from pyfits.header import Header
-from pyfits.util import _with_extensions, deprecated
+from pyfits.util import (_with_extensions, deprecated, fileobj_closed,
+                         fileobj_name, isfile)
 
 
 __all__ = ['getheader', 'getdata', 'getval', 'setval', 'delval', 'writeto',
@@ -473,10 +474,7 @@ def update(filename, data, *ext, **extkeys):
 
     new_hdu = _makehdu(data, header)
 
-    if not isinstance(filename, file) and hasattr(filename, 'closed'):
-        closed = filename.closed
-    else:
-        closed = True
+    closed = fileobj_closed(filename)
 
     hdulist, _ext = _getext(filename, 'update', *ext, **extkeys)
     hdulist[_ext] = new_hdu
@@ -718,26 +716,8 @@ def _makehdu(data, header, classExtensions={}):
 
 
 def _stat_filename_or_fileobj(filename):
-    closed = True
-    name = ''
-
-    if isinstance(filename, file):
-        closed = filename.closed
-        name = filename.name
-    elif isinstance(filename, gzip.GzipFile):
-        if filename.fileobj is not None:
-            closed = filename.fileobj.closed
-        name = filename.filename
-    elif isinstance(filename, basestring):
-        name = filename
-    else:
-        if hasattr(filename, 'closed'):
-            closed = filename.closed
-
-        if hasattr(filename, 'name'):
-            name = filename.name
-        elif hasattr(filename, 'filename'):
-            name = filename.filename
+    closed = fileobj_closed(filename)
+    name = fileobj_name(filename) or ''
 
     try:
         loc = filename.tell()
@@ -751,6 +731,7 @@ def _stat_filename_or_fileobj(filename):
     return name, closed, noexist_or_empty
 
 
+# TODO: Replace this with fileobj_mode
 def _get_file_mode(filename, default='readonly'):
     """
     Allow file object to already be opened in any of the valid modes and
@@ -766,8 +747,8 @@ def _get_file_mode(filename, default='readonly'):
     elif hasattr(filename, 'fileobj') and filename.fileobj is not None:
         closed = filename.fileobj.closed
 
-    if (isinstance(filename, file) or
-       isinstance(filename, gzip.GzipFile)) and not closed:
+    if (isfile(filename) or
+        isinstance(filename, gzip.GzipFile) and not closed):
         if isinstance(filename, gzip.GzipFile):
             file_mode = filename.fileobj.mode
         else:

diff --git a/lib/pyfits/file.py b/lib/pyfits/file.py
@@ -11,8 +11,10 @@
 import numpy as np
 from numpy import memmap as Memmap
 
-from pyfits.util import (Extendable, isreadable, iswritable, _array_from_file,
-                         _array_to_file, _write_string, deprecated)
+from pyfits.util import (Extendable, isreadable, iswritable, isfile,
+                         fileobj_name, fileobj_closed, fileobj_mode,
+                         _array_from_file, _array_to_file, _write_string,
+                         deprecated)
 
 
 PYTHON_MODES = {'readonly': 'rb', 'copyonwrite': 'rb', 'update': 'rb+',
@@ -44,27 +46,17 @@ def __init__(self, fileobj=None, mode='copyonwrite', memmap=False):
         if mode not in PYTHON_MODES:
             raise ValueError("Mode '%s' not recognized" % mode)
 
-        # Determine what the _File object's name should be
-        if isinstance(fileobj, basestring):
-            if mode != 'append' and not os.path.exists(fileobj) and \
-               not os.path.splitdrive(fileobj)[0]:
+        if (isinstance(fileobj, basestring) and mode != 'append' and
+            not os.path.exists(fileobj) and
+            not os.path.splitdrive(fileobj)[0]):
                 #
                 # Not writing file and file does not exist on local machine and
                 # name does not begin with a drive letter (Windows), try to
                 # get it over the web.
                 #
-                self.name, fileheader = urllib.urlretrieve(fileobj)
-            else:
-                self.name = fileobj
+            self.name, _ = urllib.urlretrieve(fileobj)
         else:
-            if hasattr(fileobj, 'name'):
-                self.name = fileobj.name
-            elif hasattr(fileobj, 'filename'):
-                self.name = fileobj.filename
-            elif hasattr(fileobj, '__class__'):
-                self.name = str(fileobj.__class__)
-            else:
-                self.name = str(type(fileobj))
+            self.name = fileobj_name(fileobj)
 
         self.closed = False
         self.mode = mode
@@ -82,37 +74,34 @@ def __init__(self, fileobj=None, mode='copyonwrite', memmap=False):
 
         self.readonly = False
         self.writeonly = False
-        if mode in ('readonly', 'copyonwrite') or \
-                (isinstance(fileobj, gzip.GzipFile) and mode == 'update'):
+        if (mode in ('readonly', 'copyonwrite') or
+                (self.compression and mode == 'update')):
             self.readonly = True
-        elif mode == 'ostream' or \
-                (isinstance(fileobj, gzip.GzipFile) and mode == 'append'):
+        elif (mode == 'ostream' or
+                (self.compression and mode == 'append')):
             self.writeonly = True
 
         if memmap and mode not in ('readonly', 'copyonwrite', 'update'):
             raise ValueError(
                    "Memory mapping is not implemented for mode `%s`." % mode)
         else:
             # Initialize the internal self.__file object
-            if isinstance(fileobj, (file, gzip.GzipFile)):
-                if hasattr(fileobj, 'fileobj'):
-                    closed = fileobj.fileobj.closed
-                    fileobj_mode = fileobj.fileobj.mode
-                elif hasattr(fileobj, 'closed'):
-                    closed = fileobj.closed
-                    fileobj_mode = fileobj.mode
-                else:
-                    closed = True
-                    fileobj_mode = PYTHON_MODES[mode]
+            if isfile(fileobj) or isinstance(fileobj, gzip.GzipFile):
+                closed = fileobj_closed(fileobj)
+                fmode = fileobj_mode(fileobj) or PYTHON_MODES[mode]
 
                 if not closed:
-                    if PYTHON_MODES[mode] != fileobj_mode:
+                    # In some cases (like on Python 3) a file opened for
+                    # appending still shows a mode of 'r+', hence the extra
+                    # check for the append case
+                    if ((mode == 'append' and fmode not in ('ab+', 'rb+')) or
+                        (mode != 'append' and PYTHON_MODES[mode] != fmode)):
                         raise ValueError(
                             "Input mode '%s' (%s) does not match mode of the "
-                            "input file (%s)." % (mode, PYTHON_MODES[mode],
-                                                  fileobj_mode))
+                            "input file (%s)." %
+                            (mode, PYTHON_MODES[mode], fmode))
                     self.__file = fileobj
-                elif isinstance(fileobj, file):
+                elif isfile(fileobj):
                     self.__file = open(self.name, PYTHON_MODES[mode])
                     # Return to the beginning of the file--in Python 3 when
                     # opening in append mode the file pointer is at the end of
@@ -125,30 +114,32 @@ def __init__(self, fileobj=None, mode='copyonwrite', memmap=False):
                     # Handle gzip files
                     if mode in ['update', 'append']:
                         raise IOError(
-                              "Writing to gzipped fits files is not supported")
-                    zfile = gzip.GzipFile(self.name)
-                    self.__file = tempfile.NamedTemporaryFile(suffix='.fits')
-                    self.name = self.__file.name
-                    self.__file.write(zfile.read())
-                    zfile.close()
+                              "Writing to gzipped fits files is not "
+                              "currently supported")
+                    self.__file = gzip.open(self.name)
+                    self.compression = 'gzip'
                 elif os.path.splitext(self.name)[1] == '.zip':
                     # Handle zip files
                     if mode in ['update', 'append']:
                         raise IOError(
-                              "Writing to zipped fits files is not supported")
-                    zfile = zipfile.ZipFile(self.name)
-                    namelist = zfile.namelist()
+                              "Writing to zipped fits files is not currently "
+                              "supported")
+                    self._zfile = zipfile.ZipFile(self.name)
+                    namelist = self._zfile.namelist()
                     if len(namelist) != 1:
                         raise IOError(
                           "Zip files with multiple members are not supported.")
-                    self.__file = tempfile.NamedTemporaryFile(suffix='.fits')
-                    self.name = self.__file.name
-                    self.__file.write(zfile.read(namelist[0]))
-                    zfile.close()
+                    if hasattr(self._zfile, 'open'):
+                        self.__file = self._zfile.open(namelist[0])
+                    else:
+                        self.__file = tempfile.NamedTemporaryFile(suffix='.fits')
+                        self.__file.write(self._zfile.read(namelist[0]))
+                        self._zfile.close()
+                    self.compression = 'zip'
                 else:
                     self.__file = open(self.name, PYTHON_MODES[mode])
-                # Make certain we're back at the beginning of the file
-                self.__file.seek(0)
+                    # Make certain we're back at the beginning of the file
+                    self.__file.seek(0)
             else:
                 # We are dealing with a file like object.
                 # Assume it is open.
@@ -175,28 +166,16 @@ def __init__(self, fileobj=None, mode='copyonwrite', memmap=False):
 
             # For 'ab+' mode, the pointer is at the end after the open in
             # Linux, but is at the beginning in Solaris.
-            if mode == 'ostream':
+            if (mode == 'ostream' or self.compression or
+                not hasattr(self.__file, 'seek')):
                 # For output stream start with a truncated file.
+                # For compressed files we can't really guess at the size
                 self.size = 0
-            elif isinstance(self.__file, gzip.GzipFile):
-                # This gives the size of the actual file, but it's not too
-                # useful since the semantics of this really should be the size
-                # of the compressed file.  Unfortunately there's no way to get
-                # that with decompressing the file first.
-                # TODO: Make .size into a lazyproperty that, for compressed
-                # files, will just decompress the file and give the actual size
-                pos = self.__file.tell()
-                self.__file.fileobj.seek(0, 2)
-                self.size = self.__file.fileobj.tell()
-                self.__file.fileobj.seek(0)
-                self.__file.seek(pos)
-            elif hasattr(self.__file, 'seek'):
+            else:
                 pos = self.__file.tell()
                 self.__file.seek(0, 2)
                 self.size = self.__file.tell()
                 self.__file.seek(pos)
-            else:
-                self.size = 0
 
     def __repr__(self):
         return '<%s.%s %s>' % (self.__module__, self.__class__.__name__,
@@ -316,11 +295,7 @@ def seek(self, offset, whence=0):
         else:
             self.__file.seek(offset, whence)
 
-        if self.compression:
-            pos = self.__file.fileobj.tell()
-        else:
-            pos = self.__file.tell()
-        if pos > self.size:
+        if self.size and self.__file.tell() > self.size:
             warnings.warn('File may have been truncated: actual file length '
                           '(%i) is smaller than the expected size (%i)' %
                           (self.size, pos))
@@ -342,5 +317,8 @@ def close(self):
         if hasattr(self.__file, 'close'):
             self.__file.close()
 
+        if hasattr(self, '_zfile'):
+            self._zfile.close()
+
         self.closed = True
 
diff --git a/lib/pyfits/hdu/base.py b/lib/pyfits/hdu/base.py
@@ -465,6 +465,8 @@ def size(self):
         Returns the size (in bytes) of the HDU's data part.
         """
 
+        # Note: On compressed files this might report a negative size; but the
+        # file is corrupt anyways so I'm not too worried about it.
         return self._file.size - self._datLoc
 
     def _summary(self):

diff --git a/lib/pyfits/hdu/hdulist.py b/lib/pyfits/hdu/hdulist.py
@@ -18,8 +18,9 @@
 from pyfits.hdu.groups import GroupsHDU
 from pyfits.hdu.image import PrimaryHDU, ImageHDU
 from pyfits.hdu.table import _TableBaseHDU
-from pyfits.util import Extendable, _is_int, _tmp_name, _with_extensions, \
-                        _pad_length, BLOCK_SIZE
+from pyfits.util import (Extendable, _is_int, _tmp_name, _with_extensions,
+                         _pad_length, BLOCK_SIZE, isfile, fileobj_name,
+                         fileobj_closed, fileobj_mode)
 from pyfits.verify import _Verify, _ErrList
 
 
@@ -764,14 +765,14 @@ def update_extend(self):
                 hdr.update('extend', True, after='naxis' + str(n))
 
     @_with_extensions
-    def writeto(self, name, output_verify='exception', clobber=False,
+    def writeto(self, fileobj, output_verify='exception', clobber=False,
                 classExtensions={}, checksum=False):
         """
         Write the `HDUList` to a new file.
 
         Parameters
         ----------
-        name : file path, file object or file-like object
+        fileobj : file path, file object or file-like object
             File to write to.  If a file object, must be opened for
             append (ab+).
 
@@ -803,48 +804,21 @@ def writeto(self, name, output_verify='exception', clobber=False,
         self.verify(option=output_verify)
 
         # check if the file object is closed
-        closed = True
-        fileMode = 'ab+'
-
-        if isinstance(name, gzip.GzipFile):
-            if name.fileobj is not None:
-                closed = name.fileobj.closed
-            filename = name.filename
-
-            if not closed:
-                fileMode = name.fileobj.mode
-
-        elif isinstance(name, basestring):
-            filename = name
-        else:
-            if hasattr(name, 'closed'):
-                closed = name.closed
-
-            if hasattr(name, 'mode'):
-                fileMode = name.mode
-
-            if hasattr(name, 'name'):
-                filename = name.name
-            elif hasattr(name, 'filename'):
-                filename = name.filename
-            elif hasattr(name, '__class__'):
-                filename = str(name.__class__)
-            else:
-                filename = str(type(name))
+        closed = fileobj_closed(fileobj)
+        fmode = fileobj_mode(fileobj) or 'ab+'
+        filename = fileobj_name(fileobj)
 
         # check if the output file already exists
-        if isinstance(name, (basestring, file, gzip.GzipFile)):
+        if isfile(fileobj) or isinstance(fileobj, gzip.GzipFile):
             if (os.path.exists(filename) and os.path.getsize(filename) != 0):
                 if clobber:
                     warnings.warn("Overwriting existing file '%s'." % filename)
-                    if (isinstance(name, file) and not name.closed) or \
-                       (isinstance(name,gzip.GzipFile) and \
-                       name.fileobj is not None and not name.fileobj.closed):
-                        name.close()
+                    if not closed:
+                        fileobj.close()
                     os.remove(filename)
                 else:
                     raise IOError("File '%s' already exists." % filename)
-        elif (hasattr(name, 'len') and name.len > 0):
+        elif (hasattr(fileobj, 'len') and fileobj.len > 0):
             if clobber:
                 warnings.warn("Overwriting existing file '%s'." % filename)
                 name.truncate(0)
@@ -857,11 +831,11 @@ def writeto(self, name, output_verify='exception', clobber=False,
 
         mode = 'copyonwrite'
         for key, val in PYTHON_MODES.iteritems():
-            if val == fileMode:
+            if val == fmode:
                 mode = key
                 break
 
-        hdulist = fitsopen(name, mode=mode)
+        hdulist = fitsopen(fileobj, mode=mode)
 
         for hdu in self:
             # TODO: Fix this once new HDU writing API is settled on