Merge pull request #239 from lakshmanok/master

Read a gzipped netcdf file
pydata · Sep 23, 2014 · b69fe0d · b69fe0d
2 parents 97db2b7 + 4718c48
commit b69fe0d
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 6 deletions.
diff --git a/xray/core/dataset.py b/xray/core/dataset.py
@@ -2,6 +2,7 @@
 import functools
 from io import BytesIO
 import warnings
+import sys
 
 import numpy as np
 import pandas as pd
@@ -20,6 +21,7 @@
                     multi_index_from_product)
 from .pycompat import iteritems, itervalues, basestring, OrderedDict
 
+import gzip
 
 def open_dataset(nc, decode_cf=True, mask_and_scale=True, decode_times=True,
                  concat_characters=True, *args, **kwargs):
@@ -30,7 +32,7 @@ def open_dataset(nc, decode_cf=True, mask_and_scale=True, decode_times=True,
     nc : str or file
         Path to a netCDF4 file or an OpenDAP URL (opened with python-netCDF4)
         or a file object or string serialization of a netCDF3 file (opened with
-        scipy.io.netcdf).
+        scipy.io.netcdf). If the filename ends with .gz, the file is gunzipped
     decode_cf : bool, optional
         Whether to decode these variables, assuming they were saved according
         to CF conventions.
@@ -58,11 +60,18 @@ def open_dataset(nc, decode_cf=True, mask_and_scale=True, decode_times=True,
     # move this to a classmethod Dataset.open?
     # TODO: this check has the unfortunate side-effect that
     # paths to files cannot start with 'CDF'.
-    if isinstance(nc, basestring) and not nc.startswith('CDF'):
-        # If the initialization nc is a string and it doesn't
-        # appear to be the contents of a netcdf file we load
-        # it using the netCDF4 package
-        store = backends.NetCDF4DataStore(nc, *args, **kwargs)
+    if isinstance(nc, basestring):
+        # If the initialization nc is a string and
+        if nc.endswith('.gz'):
+           # the name ends with .gz, then gunzip and open as netcdf file
+           # FIXME: does ScipyDataStore handle NetCDF4 files?
+           if sys.version_info[:2] < (2, 7):
+              raise ValueError('reading a gzipped netCDF not supported on Python 2.6')
+           store = backends.ScipyDataStore(gzip.open(nc), *args, **kwargs)
+        elif not nc.startswith('CDF'):
+           # it does not appear to be the contents of a netcdf file we load
+           # it using the netCDF4 package
+           store = backends.NetCDF4DataStore(nc, *args, **kwargs)
     else:
         # If nc is a file-like object we read it using
         # the scipy.io.netcdf package

diff --git a/xray/test/data/example_1.nc.gz b/xray/test/data/example_1.nc.gz
diff --git a/xray/test/test_backends.py b/xray/test/test_backends.py
@@ -7,6 +7,7 @@
 import os.path
 import tempfile
 import unittest
+import sys
 
 import numpy as np
 import pandas as pd
@@ -174,6 +175,15 @@ def test_roundtrip_example_1_netcdf(self):
             with self.roundtrip(expected) as actual:
                 self.assertDatasetIdentical(expected, actual)
 
+    def test_roundtrip_example_1_netcdf_gz(self):
+        if sys.version_info[:2] < (2, 7):
+           with self.assertRaisesRegexp(ValueError, 'gzipped netCDF not supported'):
+                open_example_dataset('example_1.nc.gz')
+        else:
+           with open_example_dataset('example_1.nc.gz') as expected:
+                with open_example_dataset('example_1.nc') as actual:
+                    self.assertDatasetIdentical(expected, actual)
+
     def test_orthogonal_indexing(self):
         in_memory = create_test_data()
         with self.roundtrip(in_memory) as on_disk: