Skip to content

Commit

Permalink
Merge pull request #239 from lakshmanok/master
Browse files Browse the repository at this point in the history
Read a gzipped netcdf file
  • Loading branch information
shoyer committed Sep 23, 2014
2 parents 97db2b7 + 4718c48 commit b69fe0d
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 6 deletions.
21 changes: 15 additions & 6 deletions xray/core/dataset.py
Expand Up @@ -2,6 +2,7 @@
import functools
from io import BytesIO
import warnings
import sys

import numpy as np
import pandas as pd
Expand All @@ -20,6 +21,7 @@
multi_index_from_product)
from .pycompat import iteritems, itervalues, basestring, OrderedDict

import gzip

def open_dataset(nc, decode_cf=True, mask_and_scale=True, decode_times=True,
concat_characters=True, *args, **kwargs):
Expand All @@ -30,7 +32,7 @@ def open_dataset(nc, decode_cf=True, mask_and_scale=True, decode_times=True,
nc : str or file
Path to a netCDF4 file or an OpenDAP URL (opened with python-netCDF4)
or a file object or string serialization of a netCDF3 file (opened with
scipy.io.netcdf).
scipy.io.netcdf). If the filename ends with .gz, the file is gunzipped
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
Expand Down Expand Up @@ -58,11 +60,18 @@ def open_dataset(nc, decode_cf=True, mask_and_scale=True, decode_times=True,
# move this to a classmethod Dataset.open?
# TODO: this check has the unfortunate side-effect that
# paths to files cannot start with 'CDF'.
if isinstance(nc, basestring) and not nc.startswith('CDF'):
# If the initialization nc is a string and it doesn't
# appear to be the contents of a netcdf file we load
# it using the netCDF4 package
store = backends.NetCDF4DataStore(nc, *args, **kwargs)
if isinstance(nc, basestring):
# If the initialization nc is a string and
if nc.endswith('.gz'):
# the name ends with .gz, then gunzip and open as netcdf file
# FIXME: does ScipyDataStore handle NetCDF4 files?
if sys.version_info[:2] < (2, 7):
raise ValueError('reading a gzipped netCDF not supported on Python 2.6')
store = backends.ScipyDataStore(gzip.open(nc), *args, **kwargs)
elif not nc.startswith('CDF'):
# it does not appear to be the contents of a netcdf file we load
# it using the netCDF4 package
store = backends.NetCDF4DataStore(nc, *args, **kwargs)
else:
# If nc is a file-like object we read it using
# the scipy.io.netcdf package
Expand Down
Binary file added xray/test/data/example_1.nc.gz
Binary file not shown.
10 changes: 10 additions & 0 deletions xray/test/test_backends.py
Expand Up @@ -7,6 +7,7 @@
import os.path
import tempfile
import unittest
import sys

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -174,6 +175,15 @@ def test_roundtrip_example_1_netcdf(self):
with self.roundtrip(expected) as actual:
self.assertDatasetIdentical(expected, actual)

def test_roundtrip_example_1_netcdf_gz(self):
if sys.version_info[:2] < (2, 7):
with self.assertRaisesRegexp(ValueError, 'gzipped netCDF not supported'):
open_example_dataset('example_1.nc.gz')
else:
with open_example_dataset('example_1.nc.gz') as expected:
with open_example_dataset('example_1.nc') as actual:
self.assertDatasetIdentical(expected, actual)

def test_orthogonal_indexing(self):
in_memory = create_test_data()
with self.roundtrip(in_memory) as on_disk:
Expand Down

0 comments on commit b69fe0d

Please sign in to comment.