Merge pull request #7 from rileyhales/mpoint

multipoint queries
rileyhales · Jun 28, 2021 · 9050b19 · 9050b19
2 parents b527dcb + f3ec00d
commit 9050b19
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 14 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -13,7 +13,7 @@
 author = 'Riley Hales'
 
 # The full version, including alpha/beta/rc tags
-release = '0.6'
+release = '0.7'
 master_doc = 'index'
 
 # -- General configuration ---------------------------------------------------

diff --git a/grids/__init__.py b/grids/__init__.py
@@ -4,4 +4,4 @@
 __author__ = 'Riley Chad Hales'
 __doi__ = 'https://doi.org/10.5281/zenodo.4560690'
 __license__ = 'BSD 3-Clause Clear License'
-__version__ = '0.6'
+__version__ = '0.7'
diff --git a/grids/_ts.py b/grids/_ts.py
@@ -9,7 +9,7 @@
 import warnings
 
 import affine
-import geopandas
+import geopandas as gpd
 import h5py
 import netCDF4 as nc
 import numpy as np
@@ -185,26 +185,28 @@ def __str__(self):
                 string += f'\n\t{p}: {self.__getattribute__(p)}'
         return string
 
+    def __repr__(self):
+        return self.__str__()
+
     def point(self,
-              *coordinates: int or float or None, ) -> pd.DataFrame:
+              *coords: int or float or None, ) -> pd.DataFrame:
         """
         Extracts a time series at a point for a given series of coordinate values
 
         Args:
-            coordinates (int or float or None): provide a coordinate value (integer or float) for each dimension of the
+            coords (int or float or None): provide a coordinate value (integer or float) for each dimension of the
                 array which you are creating a time series for. You need to provide exactly the same number of
                 coordinates as there are dimensions
         Returns:
             pandas.DataFrame with an index, a column named datetime, and a column named values.
         """
-        assert len(self.dim_order) == len(coordinates), 'Specify 1 coordinate for each dimension of the array'
+        assert len(self.dim_order) == len(coords), 'Specify 1 coordinate for each dimension of the array'
 
         # make the return item
         results = dict(datetime=[], values=[])
 
         # map coordinates -> cell indices -> python slice() objects
-        slices = self._map_coords_to_slice(coordinates)
-
+        slices = self._map_coords_to_slice(coords)
         # iterate over each file extracting the value and time for each
         for file in self.files:
             # open the file
@@ -229,6 +231,58 @@ def point(self,
         # return the data stored in a dataframe
         return pd.DataFrame(results)
 
+    def multipoint(self,
+                   *coords: list,
+                   labels: list = None, ) -> pd.DataFrame:
+        """
+        Extracts a time series at many points for a given series of coordinate values
+
+        Args:
+            coords (int or float or None): a list of coordinate tuples or a 2-D numpy array. Each coordinate pair in
+                the list should provide a coordinate value (integer or float) for each dimension of the array, e.g.
+                len(coordinate_pair) == len(dim_order). See TimeSeries.point for more explanation.
+            labels (list): an optional list of strings which label each of the coordinates provided. len(labels) should
+                be equal to len(coords)
+        Returns:
+            pandas.DataFrame with an index, a column named datetime, and a column named values.
+        """
+        assert len(self.dim_order) == len(coords[0]), 'Specify 1 coordinate for each dimension of the array'
+        labels = [f'point_{i}' for i in range(len(coords[0]))] if labels is None else labels
+        assert len(labels) == len(coords), 'You must provide a label for each point or use auto numbering'
+
+        # make the return item
+        results = dict(datetime=[])
+        for label in labels:
+            results[label] = []
+
+        # map coordinates -> cell indices -> python slice() objects
+        slices = [self._map_coords_to_slice(coord) for coord in coords]
+
+        # iterate over each file extracting the value and time for each
+        for file in self.files:
+            # open the file
+            opened_file = self._open_data(file)
+            results['datetime'] += list(self._handle_time_steps(opened_file, file))
+
+            for i, slc in enumerate(slices):
+                # extract the appropriate values from the variable
+                vs = _array_by_engine(opened_file, self.var, slc)
+                if vs.ndim == 0:
+                    if vs == self.fill_value:
+                        vs = np.nan
+                    results[labels[i]].append(vs)
+                elif vs.ndim == 1:
+                    vs[vs == self.fill_value] = np.nan
+                    for v in vs:
+                        results[labels[i]].append(v)
+                else:
+                    raise ValueError('There are too many dimensions after slicing')
+            if self.engine != 'pygrib':
+                opened_file.close()
+
+        # return the data stored in a dataframe
+        return pd.DataFrame(results)
+
     def bound(self,
               min_coordinates: tuple,
               max_coordinates: tuple,
@@ -287,7 +341,7 @@ def shape(self,
         Applicable only to source data with 2 spatial dimensions and, optionally, a time dimension.
 
         Args:
-            vector (str): path to any spatial polygon file, e.g. shapefile or geojson, which can be read by geopandas.
+            vector (str): path to any spatial polygon file, e.g. shapefile or geojson, which can be read by gpd.
             behavior (str): determines how the vector data is used to mask the arrays. Options are: dissolve, features
                 - dissolve: treats all features as if they were 1 feature and masks the entire set of polygons in 1 grid
                 - features: treats each feature as a separate entity, must specify an attribute shared by each feature
@@ -528,27 +582,28 @@ def _create_spatial_mask_array(self, vector: str, ) -> np.ma:
             y = y[:, 0]
 
         # read the shapefile
-        vector_gdf = geopandas.read_file(vector)
+        vector_gdf = gpd.read_file(vector)
         vector_gdf = vector_gdf.to_crs(epsg=4326)
 
         # set up the variables to creating and storing masks
         masks = []
         gridshape = (y.shape[0], x.shape[0],)
         affinetransform = affine.Affine(np.abs(x[1] - x[0]), 0, x.min(), 0, np.abs(y[1] - y[0]), y.min())
 
-        # creates a binary, boolean mask of the shapefile
+        # creates a binary/boolean mask of the shapefile
         # in it's crs, over the affine transform area, for a certain masking behavior
         if self.behavior == 'dissolve':
             masks.append(
-                ('featuremask',
+                ('shape',
                  rasterio.features.geometry_mask(vector_gdf.geometry, gridshape, affinetransform, invert=True),)
             )
         elif self.behavior == 'features':
+            assert self.labelby in vector_gdf.keys(), 'labelby parameter not found in attributes of the vector data'
             for idx, row in vector_gdf.iterrows():
                 masks.append(
                     (row[self.labelby],
                      rasterio.features.geometry_mask(
-                         geopandas.GeoSeries(row.geometry), gridshape, affinetransform, invert=True),)
+                         gpd.GeoSeries(row.geometry), gridshape, affinetransform, invert=True),)
                 )
         return masks
 

diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup
 
 name = 'grids'
-version = '0.6'
+version = '0.7'
 description = 'Tools for extracting time series subsets from n-dimensional arrays in several storage formats.'
 classifiers = [
     'Development Status :: 5 - Production/Stable',