From 112060d6b3726526da80c4c264f9f3a7bdcc82a5 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 5 Mar 2020 21:57:19 -0500 Subject: [PATCH 01/18] pep8 fixes --- xrspatial/tiles.py | 109 ++++++++++++++++++++++++--------------------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/xrspatial/tiles.py b/xrspatial/tiles.py index 853a6f9a..0a754d8c 100644 --- a/xrspatial/tiles.py +++ b/xrspatial/tiles.py @@ -11,12 +11,10 @@ from PIL.Image import fromarray - __all__ = ['render_tiles', 'MercatorTileDefinition'] def _create_dir(path): - import os import errno @@ -39,7 +37,6 @@ def _get_super_tile_min_max(tile_info, load_data_func, rasterize_func): def calculate_zoom_level_stats(full_extent, level, load_data_func, rasterize_func, color_ranging_strategy='fullscan'): - if color_ranging_strategy == 'fullscan': b = db.from_sequence(list(gen_super_tiles(full_extent, level))) b = b.map(_get_super_tile_min_max, load_data_func, @@ -60,7 +57,6 @@ def render_tiles(full_extent, levels, load_data_func, rasterize_func, shader_func, post_render_func, output_path, color_ranging_strategy='fullscan'): - # TODO: get full extent once at beginning for all levels results = dict() for level in levels: @@ -70,29 +66,34 @@ def render_tiles(full_extent, levels, load_data_func, color_ranging_strategy=color_ranging_strategy) super_tiles = list(gen_super_tiles(full_extent, level, span)) - print(f'rendering {len(super_tiles)} supertiles for zoom level {level} with span={span}') + print(f'rendering {len(super_tiles)} supertiles for zoom level {level}' + f' with span={span}') b = db.from_sequence(super_tiles) - b.map(render_super_tile, output_path, load_data_func, rasterize_func, shader_func, post_render_func).compute() - results[level] = dict(success=True, stats=span, supertile_count=len(super_tiles)) + b.map(render_super_tile, output_path, load_data_func, rasterize_func, + shader_func, post_render_func).compute() + results[level] = dict(success=True, stats=span, + supertile_count=len(super_tiles)) return results def gen_super_tiles(extent, zoom_level, span=None): xmin, ymin, xmax, ymax = extent - super_tile_size = min(2**4 * 256, - (2 ** zoom_level) * 256) - super_tile_def = MercatorTileDefinition(x_range=(xmin, xmax), y_range=(ymin, ymax), tile_size=super_tile_size) + super_tile_size = min(2 ** 4 * 256, + (2 ** zoom_level) * 256) + super_tile_def = MercatorTileDefinition(x_range=(xmin, xmax), + y_range=(ymin, ymax), + tile_size=super_tile_size) super_tiles = super_tile_def.get_tiles_by_extent(extent, zoom_level) for s in super_tiles: st_extent = s[3] x_range = (st_extent[0], st_extent[2]) y_range = (st_extent[1], st_extent[3]) yield {'level': zoom_level, - 'x_range': x_range, - 'y_range': y_range, - 'tile_size': super_tile_def.tile_size, - 'span': span} + 'x_range': x_range, + 'y_range': y_range, + 'tile_size': super_tile_def.tile_size, + 'span': span} def render_super_tile(tile_info, output_path, load_data_func, @@ -108,11 +109,12 @@ def render_super_tile(tile_info, output_path, load_data_func, y_range=tile_info['y_range'], height=tile_size, width=tile_size) ds_img = shader_func(agg, span=tile_info['span']) - return create_sub_tiles(ds_img, level, tile_info, output_path, post_render_func) - + return create_sub_tiles(ds_img, level, tile_info, output_path, + post_render_func) -def create_sub_tiles(data_array, level, tile_info, output_path, post_render_func=None): +def create_sub_tiles(data_array, level, tile_info, output_path, + post_render_func=None): # validate / createoutput_dir _create_dir(output_path) @@ -126,7 +128,8 @@ def create_sub_tiles(data_array, level, tile_info, output_path, post_render_func renderer = S3TileRenderer(tile_def, output_location=output_path, post_render_func=post_render_func) else: - renderer = FileSystemTileRenderer(tile_def, output_location=output_path, + renderer = FileSystemTileRenderer(tile_def, + output_location=output_path, post_render_func=post_render_func) return renderer.render(data_array, level=level) @@ -146,32 +149,35 @@ class MercatorTileDefinition(object): ---------- x_range : tuple - full extent of x dimension in data units + full extent of x dimension in data units y_range : tuple - full extent of y dimension in data units + full extent of y dimension in data units max_zoom : int - A maximum zoom level for the tile layer. This is the most zoomed-in level. + A maximum zoom level for the tile layer. + This is the most zoomed-in level. min_zoom : int - A minimum zoom level for the tile layer. This is the most zoomed-out level. + A minimum zoom level for the tile layer. + This is the most zoomed-out level. max_zoom : int - A maximum zoom level for the tile layer. This is the most zoomed-in level. + A maximum zoom level for the tile layer. + This is the most zoomed-in level. x_origin_offset : int - An x-offset in plot coordinates. + An x-offset in plot coordinates. y_origin_offset : int - An y-offset in plot coordinates. + An y-offset in plot coordinates. initial_resolution : int - Resolution (plot_units / pixels) of minimum zoom level of tileset - projection. None to auto-compute. + Resolution (plot_units / pixels) of minimum zoom level of tileset + projection. None to auto-compute. format : int - An y-offset in plot coordinates. + An y-offset in plot coordinates. Output ------ @@ -179,7 +185,8 @@ class MercatorTileDefinition(object): ''' - def __init__(self, x_range, y_range, tile_size=256, min_zoom=0, max_zoom=30, + def __init__(self, x_range, y_range, tile_size=256, + min_zoom=0, max_zoom=30, x_origin_offset=20037508.34, y_origin_offset=20037508.34, initial_resolution=156543.03392804097): self.x_range = x_range @@ -190,7 +197,8 @@ def __init__(self, x_range, y_range, tile_size=256, min_zoom=0, max_zoom=30, self.x_origin_offset = x_origin_offset self.y_origin_offset = y_origin_offset self.initial_resolution = initial_resolution - self._resolutions = [self._get_resolution(z) for z in range(self.min_zoom, self.max_zoom+1)] + self._resolutions = [self._get_resolution(z) + for z in range(self.min_zoom, self.max_zoom + 1)] def to_ogc_tile_metadata(self, output_file_path): ''' @@ -198,14 +206,12 @@ def to_ogc_tile_metadata(self, output_file_path): ''' pass - def to_esri_tile_metadata(self, output_file_path): ''' Create ESRI tile metadata JSON ''' pass - def is_valid_tile(self, x, y, z): if x < 0 or x >= math.pow(2, z): @@ -216,18 +222,15 @@ def is_valid_tile(self, x, y, z): return True - # TODO ngjit? def _get_resolution(self, z): return self.initial_resolution / (2 ** z) - def get_resolution_by_extent(self, extent, height, width): x_rs = (extent[2] - extent[0]) / width y_rs = (extent[3] - extent[1]) / height return [x_rs, y_rs] - def get_level_by_extent(self, extent, height, width): x_rs = (extent[2] - extent[0]) / width y_rs = (extent[3] - extent[1]) / height @@ -242,8 +245,7 @@ def get_level_by_extent(self, extent, height, width): if i > 0: return i - 1 i += 1 - return (i-1) - + return (i - 1) def pixels_to_meters(self, px, py, level): res = self._get_resolution(level) @@ -251,14 +253,12 @@ def pixels_to_meters(self, px, py, level): my = (py * res) - self.y_origin_offset return (mx, my) - def meters_to_pixels(self, mx, my, level): res = self._get_resolution(level) px = (mx + self.x_origin_offset) / res py = (my + self.y_origin_offset) / res return (px, py) - def pixels_to_tile(self, px, py, level): tx = math.ceil(px / self.tile_size) tx = tx if tx == 0 else tx - 1 @@ -266,17 +266,14 @@ def pixels_to_tile(self, px, py, level): # convert from TMS y coordinate return (int(tx), invert_y_tile(int(ty), level)) - def pixels_to_raster(self, px, py, level): map_size = self.tile_size << level return (px, map_size - py) - def meters_to_tile(self, mx, my, level): px, py = self.meters_to_pixels(mx, my, level) return self.pixels_to_tile(px, py, level) - def get_tiles_by_extent(self, extent, level): # unpack extent and convert to tile coordinates @@ -295,11 +292,13 @@ def get_tiles_by_extent(self, extent, level): return tiles - def get_tile_meters(self, tx, ty, level): - ty = invert_y_tile(ty, level) # convert to TMS for conversion to meters - xmin, ymin = self.pixels_to_meters(tx * self.tile_size, ty * self.tile_size, level) - xmax, ymax = self.pixels_to_meters((tx + 1) * self.tile_size, (ty + 1) * self.tile_size, level) + # convert to TMS for conversion to meters + ty = invert_y_tile(ty, level) + xmin, ymin = self.pixels_to_meters(tx * self.tile_size, + ty * self.tile_size, level) + xmax, ymax = self.pixels_to_meters((tx + 1) * self.tile_size, + (ty + 1) * self.tile_size, level) return (xmin, ymin, xmax, ymax) @@ -331,7 +330,8 @@ def render(self, da, level): if 0 in arr.shape: continue - img = fromarray(np.flip(arr.data, 0), 'RGBA') # flip since y tiles go down (Google map tiles) + # flip since y tiles go down (Google map tiles) + img = fromarray(np.flip(arr.data, 0), 'RGBA') if self.post_render_func: extras = dict(x=x, y=y, z=z) @@ -362,7 +362,8 @@ def tile_previewer(full_extent, tileset_url, from bokeh.io import output_file, save from os import path except ImportError: - raise ImportError('conda install bokeh to enable creation of simple tile viewer') + raise ImportError('conda install bokeh to enable creation ' + 'of simple tile viewer') if output_dir: output_file(filename=path.join(output_dir, filename), @@ -399,13 +400,15 @@ def tile_previewer(full_extent, tileset_url, class FileSystemTileRenderer(TileRenderer): def render(self, da, level): - for img, x, y, z in super(FileSystemTileRenderer, self).render(da, level): + for img, x, y, z in super(FileSystemTileRenderer, self).render(da, + level): tile_file_name = '{}.{}'.format(y, self.tile_format.lower()) tile_directory = os.path.join(self.output_location, str(z), str(x)) output_file = os.path.join(tile_directory, tile_file_name) _create_dir(tile_directory) img.save(output_file, self.tile_format) + class S3TileRenderer(TileRenderer): def render(self, da, level): @@ -425,10 +428,14 @@ def render(self, da, level): client = boto3.client('s3') for img, x, y, z in super(S3TileRenderer, self).render(da, level): tile_file_name = '{}.{}'.format(y, self.tile_format.lower()) - key = os.path.join(s3_info.path, str(z), str(x), tile_file_name).lstrip('/') + key = os.path.join(s3_info.path, + str(z), + str(x), + tile_file_name).lstrip('/') output_buf = BytesIO() img.save(output_buf, self.tile_format) output_buf.seek(0) - client.put_object(Body=output_buf, Bucket=bucket, Key=key, ACL='public-read') + client.put_object(Body=output_buf, Bucket=bucket, + Key=key, ACL='public-read') return 'https://{}.s3.amazonaws.com/{}'.format(bucket, s3_info.path) From 2069e114b17075c55aa473daf88879593a5896fc Mon Sep 17 00:00:00 2001 From: thuydotm Date: Sat, 4 Apr 2020 19:27:48 -0400 Subject: [PATCH 02/18] wip: general focal analysis with distance, circular kernel --- xrspatial/focal.py | 87 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index d1220b11..899e05a5 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -1,8 +1,7 @@ import numpy as np - from xarray import DataArray - from xrspatial.utils import ngjit +from numba import stencil #TODO: Make convolution more generic with numba first-class functions. @@ -53,3 +52,87 @@ def mean(agg, passes=1, excludes=[np.nan]): return DataArray(out, name='mean', dims=agg.dims, coords=agg.coords, attrs=agg.attrs) + + +def _gen_ellipse_kernel(half_w, half_h): + # x values of interest + x = np.linspace(-half_w, half_w, 2 * half_w + 1) + # y values of interest, as a "column" array + y = np.linspace(-half_h, half_w, 2 * half_h + 1)[:, None] + + # True for points inside the ellipse + # (x / a)^2 + (y / b)^2 <= 1, avoid division to avoid rounding issue + ellipse = (x * half_h) ** 2 + (y * half_w) ** 2 <= (half_w * half_h) ** 2 + + return ellipse.astype(int) + + +def _apply_convolution(array, kernel): + kernel_half_h, kernel_half_w = kernel.shape + h = int(kernel_half_h / 2) + w = int(kernel_half_w / 2) + # in case the kernel presents a circular filter, + # h = w and are the radius of the kernel + + # return of the function + res = 0 + + # row id of the kernel + k_row = 0 + for i in range(-h, h + 1): + # column id of the kernel + k_col = 0 + for j in range(-w, w + 1): + res += array[i, j] * kernel[k_row, k_col] + k_col += 1 + k_row += 1 + + return res + + +def focal_analysis(raster, shape='circle', radius=1): + # check raster + if not isinstance(raster, DataArray): + raise TypeError("`raster` must be instance of DataArray") + + if raster.ndim != 2: + raise ValueError("`raster` must be 2D") + + if not (issubclass(raster.values.dtype.type, np.integer) or + issubclass(raster.values.dtype.type, np.float)): + raise ValueError( + "`raster` must be an array of integers or float") + + raster_values = raster.values + + cell_size_x = 1 + cell_size_y = 1 + + # calculate cell size from input `raster` + for dim in raster.dims: + if (dim.lower().count('x')) > 0 or (dim.lower().count('lon')) > 0: + # dimension of x-coordinates + if len(raster[dim]) > 1: + cell_size_x = raster[dim].values[1] - raster[dim].values[0] + elif (dim.lower().count('y')) > 0 or (dim.lower().count('lat')) > 0: + # dimension of y-coordinates + if len(raster[dim]) > 1: + cell_size_y = raster[dim].values[1] - raster[dim].values[0] + + # TODO: check coordinate unit, convert from lat-lon to meters + + # create kernel + if shape == 'circle': + # convert radius (meter) to pixel + kernel_half_w = int(radius / cell_size_x) + kernel_half_h = int(radius / cell_size_y) + kernel = _gen_ellipse_kernel(kernel_half_w, kernel_half_h) + + print(cell_size_x, cell_size_y, kernel) + # apply kernel to raster values + res = stencil(_apply_convolution, + standard_indexing=("kernel",), + neighborhood=((-kernel_half_h, kernel_half_h), + (-kernel_half_w, kernel_half_w)))(raster_values, kernel) + + return res From 02c434cf91e7f0f09b9463dbf1715fb4d22c847f Mon Sep 17 00:00:00 2001 From: thuydotm Date: Sat, 4 Apr 2020 23:06:36 -0400 Subject: [PATCH 03/18] add padding before convolution --- xrspatial/focal.py | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 899e05a5..93419480 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -29,6 +29,7 @@ def _mean(data, excludes): out[y, x] = data[y, x] return out + # TODO: add optional name parameter `name='mean'` def mean(agg, passes=1, excludes=[np.nan]): """ @@ -54,6 +55,12 @@ def mean(agg, passes=1, excludes=[np.nan]): dims=agg.dims, coords=agg.coords, attrs=agg.attrs) +def _zscores(array): + mean = np.nanmean(array) + std = np.nanstd(array) + return (array - mean) / std + + def _gen_ellipse_kernel(half_w, half_h): # x values of interest x = np.linspace(-half_w, half_w, 2 * half_w + 1) @@ -71,8 +78,9 @@ def _apply_convolution(array, kernel): kernel_half_h, kernel_half_w = kernel.shape h = int(kernel_half_h / 2) w = int(kernel_half_w / 2) - # in case the kernel presents a circular filter, - # h = w and are the radius of the kernel + + # number of pixels inside the kernel + num_pixels = 0 # return of the function res = 0 @@ -85,9 +93,11 @@ def _apply_convolution(array, kernel): for j in range(-w, w + 1): res += array[i, j] * kernel[k_row, k_col] k_col += 1 + if (kernel[k_row, k_col] == 1): + num_pixels += 1 k_row += 1 - return res + return res / num_pixels def focal_analysis(raster, shape='circle', radius=1): @@ -103,8 +113,6 @@ def focal_analysis(raster, shape='circle', radius=1): raise ValueError( "`raster` must be an array of integers or float") - raster_values = raster.values - cell_size_x = 1 cell_size_y = 1 @@ -128,11 +136,23 @@ def focal_analysis(raster, shape='circle', radius=1): kernel_half_h = int(radius / cell_size_y) kernel = _gen_ellipse_kernel(kernel_half_w, kernel_half_h) - print(cell_size_x, cell_size_y, kernel) - # apply kernel to raster values - res = stencil(_apply_convolution, - standard_indexing=("kernel",), - neighborhood=((-kernel_half_h, kernel_half_h), - (-kernel_half_w, kernel_half_w)))(raster_values, kernel) + # zero padding + height, width = raster.shape + padded_raster_val = np.zeros((height + 2*kernel_half_h, + width + 2*kernel_half_w)) + padded_raster_val[kernel_half_h:height + kernel_half_h, + kernel_half_w:width + kernel_half_w] = raster.values - return res + # apply kernel to raster values + padded_res = stencil(_apply_convolution, + standard_indexing=("kernel",), + neighborhood=((-kernel_half_h, kernel_half_h), + (-kernel_half_w, kernel_half_w)))(padded_raster_val, kernel) + + result = DataArray(padded_res[kernel_half_h:height + kernel_half_h, + kernel_half_w:width + kernel_half_w], + coords=raster.coords, + dims=raster.dims, + attrs=raster.attrs) + + return result From de957296d853223e49e35f3cf87718bcd3cec928 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Sun, 5 Apr 2020 15:07:48 -0400 Subject: [PATCH 04/18] handle distance unit --- xrspatial/focal.py | 115 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 3 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 93419480..21e655fe 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -2,9 +2,11 @@ from xarray import DataArray from xrspatial.utils import ngjit from numba import stencil +import re +DEFAULT_UNIT = 'meter' -#TODO: Make convolution more generic with numba first-class functions. +# TODO: Make convolution more generic with numba first-class functions. @ngjit @@ -55,6 +57,104 @@ def mean(agg, passes=1, excludes=[np.nan]): dims=agg.dims, coords=agg.coords, attrs=agg.attrs) +def is_number(s): + try: + float(s) + return True + except ValueError: + return False + + +# modified from https://stackoverflow.com/questions/3943752/the-dateutil-parser-parse-of-distance-strings +class Distance(object): + METER = 1 + FOOT = 0.3048 + KILOMETER = 1000 + MILE = 1609.344 + UNITS = {'meter': METER, + 'meters': METER, + 'm': METER, + 'feet': FOOT, + 'foot': FOOT, + 'ft': FOOT, + 'miles': MILE, + 'mls': MILE, + 'ml': MILE, + 'kilometer': KILOMETER, + 'kilometers': KILOMETER, + 'km': KILOMETER, + } + + def __init__(self, s): + self.number, unit = self._get_distance_unit(s) + self._convert(unit) + + def _get_distance_unit(self, s): + # spit string into numbers and text + splits = [x for x in re.split(r'(-?\d*\.?\d+)', s) if x != ''] + + if len(splits) not in [1, 2]: + raise ValueError("Invalid distance.") + + number = splits[0] + unit = DEFAULT_UNIT + if len(splits) == 1: + print("Distance unit not provided. Use meter as default.") + elif len(splits) == 2: + unit = splits[1] + + unit = unit.lower() + if unit not in self.UNITS: + raise ValueError( + "Invalid value.\n" + "Distance unit should be one of the following: \n" + "meter (meter, meters, m),\n" + "kilometer (kilometer, kilometers, km),\n" + "foot (foot, feet, ft),\n" + "mile (mile, miles, ml, mls)") + return number, unit + + def _convert(self, unit): + self.number = float(self.number) + if self.UNITS[unit] != 1: + self.number *= self.UNITS[unit] + + @property + def meters(self): + return self.number + + @meters.setter + def meters(self, v): + self.number = float(v) + + @property + def miles(self): + return self.number / self.MILE + + @miles.setter + def miles(self, v): + self.number = v + self._convert('miles') + + @property + def feet(self): + return self.number / self.FOOT + + @feet.setter + def feet(self, v): + self.number = v + self._convert('feet') + + @property + def kilometers(self): + return self.number / self.KILOMETER + + @kilometers.setter + def kilometers(self, v): + self.number = v + self._convert('KILOMETER') + + def _zscores(array): mean = np.nanmean(array) std = np.nanstd(array) @@ -128,12 +228,21 @@ def focal_analysis(raster, shape='circle', radius=1): cell_size_y = raster[dim].values[1] - raster[dim].values[0] # TODO: check coordinate unit, convert from lat-lon to meters + if 'unit' in raster.attrs: + unit = raster.attrs['unit'] + else: + unit = DEFAULT_UNIT + print("Raster distance unit not provided. Use meter as default.") + + sx = Distance(str(cell_size_x) + unit) + sy = Distance(str(cell_size_y) + unit) + sr = Distance(str(radius)) # create kernel if shape == 'circle': # convert radius (meter) to pixel - kernel_half_w = int(radius / cell_size_x) - kernel_half_h = int(radius / cell_size_y) + kernel_half_w = int(sr.meters / sx.meters) + kernel_half_h = int(sr.meters / sy.meters) kernel = _gen_ellipse_kernel(kernel_half_w, kernel_half_h) # zero padding From e8eeda331e6e00832082df35ae3cdf6f5e5bda39 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Sun, 5 Apr 2020 16:06:12 -0400 Subject: [PATCH 05/18] minor fix in _apply_convolution() --- xrspatial/focal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 21e655fe..63585cfb 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -192,9 +192,9 @@ def _apply_convolution(array, kernel): k_col = 0 for j in range(-w, w + 1): res += array[i, j] * kernel[k_row, k_col] - k_col += 1 if (kernel[k_row, k_col] == 1): num_pixels += 1 + k_col += 1 k_row += 1 return res / num_pixels From 44349d2de93844fa459aeac5aedb2cec95a457a7 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Sun, 5 Apr 2020 16:07:56 -0400 Subject: [PATCH 06/18] validate shape --- xrspatial/focal.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 63585cfb..d3e30e00 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -213,6 +213,11 @@ def focal_analysis(raster, shape='circle', radius=1): raise ValueError( "`raster` must be an array of integers or float") + # validate shape + if shape not in ['circle']: + raise ValueError( + "kernel shape must be one of the following: \'circle\'") + cell_size_x = 1 cell_size_y = 1 From bcc8824c1baed7673101698d69de81474b8906c5 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Sun, 5 Apr 2020 16:15:22 -0400 Subject: [PATCH 07/18] import numpy in utils --- xrspatial/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xrspatial/utils.py b/xrspatial/utils.py index 221c2a6d..1b28d893 100644 --- a/xrspatial/utils.py +++ b/xrspatial/utils.py @@ -1,4 +1,5 @@ import numba as nb +import numpy as np ngjit = nb.jit(nopython=True, nogil=True) From d3d8a036c00bc89cd11b1e3aa29be65f70cc4001 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Sun, 5 Apr 2020 16:18:22 -0400 Subject: [PATCH 08/18] cell size: lon-lat to meter --- xrspatial/focal.py | 79 ++++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 27 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index d3e30e00..e26f07ca 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -1,6 +1,6 @@ import numpy as np from xarray import DataArray -from xrspatial.utils import ngjit +from xrspatial.utils import ngjit, lnglat_to_meters from numba import stencil import re @@ -200,47 +200,72 @@ def _apply_convolution(array, kernel): return res / num_pixels -def focal_analysis(raster, shape='circle', radius=1): - # check raster - if not isinstance(raster, DataArray): - raise TypeError("`raster` must be instance of DataArray") - - if raster.ndim != 2: - raise ValueError("`raster` must be 2D") - - if not (issubclass(raster.values.dtype.type, np.integer) or - issubclass(raster.values.dtype.type, np.float)): - raise ValueError( - "`raster` must be an array of integers or float") - - # validate shape - if shape not in ['circle']: - raise ValueError( - "kernel shape must be one of the following: \'circle\'") +def _calc_cell_size(raster): + if 'unit' in raster.attrs: + unit = raster.attrs['unit'] + else: + unit = DEFAULT_UNIT + print("Raster distance unit not provided. Use meter as default.") cell_size_x = 1 cell_size_y = 1 # calculate cell size from input `raster` for dim in raster.dims: - if (dim.lower().count('x')) > 0 or (dim.lower().count('lon')) > 0: + if (dim.lower().count('x')) > 0: # dimension of x-coordinates if len(raster[dim]) > 1: cell_size_x = raster[dim].values[1] - raster[dim].values[0] - elif (dim.lower().count('y')) > 0 or (dim.lower().count('lat')) > 0: + elif (dim.lower().count('y')) > 0: # dimension of y-coordinates if len(raster[dim]) > 1: cell_size_y = raster[dim].values[1] - raster[dim].values[0] - # TODO: check coordinate unit, convert from lat-lon to meters - if 'unit' in raster.attrs: - unit = raster.attrs['unit'] - else: + lon0, lon1, lat0, lat1 = None, None, None, None + for dim in raster.dims: + if (dim.lower().count('lon')) > 0: + # dimension of x-coordinates + if len(raster[dim]) > 1: + lon0, lon1 = raster[dim].values[0], raster[dim].values[1] + elif (dim.lower().count('lat')) > 0: + # dimension of y-coordinates + if len(raster[dim]) > 1: + lat0, lat1 = raster[dim].values[0], raster[dim].values[1] + + # convert lat-lon to meters + if (lon0, lon1, lat0, lat1) != (None, None, None, None): + mx0, my0 = lnglat_to_meters(lon0, lat0) + mx1, my1 = lnglat_to_meters(lon1, lat1) + cell_size_x = mx1 - mx0 + cell_size_y = my1 - my0 unit = DEFAULT_UNIT - print("Raster distance unit not provided. Use meter as default.") sx = Distance(str(cell_size_x) + unit) sy = Distance(str(cell_size_y) + unit) + return sx, sy + + +def focal_analysis(raster, shape='circle', radius='10km'): + # validate raster + if not isinstance(raster, DataArray): + raise TypeError("`raster` must be instance of DataArray") + + if raster.ndim != 2: + raise ValueError("`raster` must be 2D") + + if not (issubclass(raster.values.dtype.type, np.integer) or + issubclass(raster.values.dtype.type, np.float)): + raise ValueError( + "`raster` must be an array of integers or float") + + # validate shape + if shape not in ['circle']: + raise ValueError( + "kernel shape must be one of the following: \'circle\'") + + # calculate cell size over the x and y axis + sx, sy = _calc_cell_size(raster) + # create Distance object of radius sr = Distance(str(radius)) # create kernel @@ -252,8 +277,8 @@ def focal_analysis(raster, shape='circle', radius=1): # zero padding height, width = raster.shape - padded_raster_val = np.zeros((height + 2*kernel_half_h, - width + 2*kernel_half_w)) + padded_raster_val = np.zeros((height + 2 * kernel_half_h, + width + 2 * kernel_half_w)) padded_raster_val[kernel_half_h:height + kernel_half_h, kernel_half_w:width + kernel_half_w] = raster.values From add95ec8d862687f0e4cc13fb7bbdf30bbbe25d6 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Sun, 5 Apr 2020 19:59:00 -0400 Subject: [PATCH 09/18] focal analysis: add tests --- xrspatial/tests/test_focal.py | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index e1841a6d..9440976f 100644 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -2,6 +2,8 @@ import numpy as np from xrspatial import mean +from xrspatial.focal import focal_analysis +import pytest def _do_sparse_array(data_array): @@ -51,3 +53,52 @@ def test_mean_transfer_function(): da_mean[:, 0] = data_random[:, 0] da_mean[:, -1] = data_random[:, -1] assert abs(da_mean.mean() - data_random.mean()) < 10**-3 + + +def test_focal_invalid_input(): + invalid_raster_type = np.array([0, 1, 2, 3]) + with pytest.raises(Exception) as e_info: + focal_analysis(invalid_raster_type) + assert e_info + + invalid_raster_dtype = xr.DataArray(np.array([['cat', 'dog']])) + with pytest.raises(Exception) as e_info: + focal_analysis(invalid_raster_dtype) + assert e_info + + invalid_raster_shape = xr.DataArray(np.array([0, 0])) + with pytest.raises(Exception) as e_info: + focal_analysis(invalid_raster_shape) + assert e_info + + raster = xr.DataArray(np.ones((5, 5))) + invalid_kernel_shape = 'line' + with pytest.raises(Exception) as e_info: + focal_analysis(raster=raster, shape=invalid_kernel_shape) + assert e_info + + raster = xr.DataArray(np.ones((5, 5))) + invalid_radius = '10 inch' + with pytest.raises(Exception) as e_info: + focal_analysis(raster=raster, radius=invalid_radius) + assert e_info + + +def test_focal_default(): + raster = xr.DataArray(np.ones((10, 10)), dims=['x', 'y']) + raster['x'] = np.linspace(0, 9, 10) + raster['y'] = np.linspace(0, 9, 10) + + focal_stats = focal_analysis(raster, radius='1m') + + # check output's properties + # output must be an xarray DataArray + assert isinstance(focal_stats, xr.DataArray) + assert isinstance(focal_stats.values, np.ndarray) + # shape, dims, coords, attr preserved + assert raster.shape == focal_stats.shape + assert raster.dims == focal_stats.dims + assert raster.attrs == focal_stats.attrs + assert raster.coords == focal_stats.coords + + # TODO: validate output value From bc0875f66c03d306b8088477bccc2e869eeb0e0e Mon Sep 17 00:00:00 2001 From: thuydotm Date: Tue, 7 Apr 2020 00:59:14 -0400 Subject: [PATCH 10/18] move out stencil --- xrspatial/focal.py | 277 ++++++++++++++++++---------------- xrspatial/tests/test_focal.py | 132 +++++++++++----- 2 files changed, 249 insertions(+), 160 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index e26f07ca..a291f496 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -1,60 +1,16 @@ import numpy as np from xarray import DataArray from xrspatial.utils import ngjit, lnglat_to_meters -from numba import stencil +from numba import prange import re +import warnings -DEFAULT_UNIT = 'meter' - -# TODO: Make convolution more generic with numba first-class functions. - - -@ngjit -def _mean(data, excludes): - out = np.zeros_like(data) - rows, cols = data.shape - for y in range(1, rows-1): - for x in range(1, cols-1): - - exclude = False - for ex in excludes: - if data[y,x] == ex: - exclude = True - break - - if not exclude: - a,b,c,d,e,f,g,h,i = [data[y-1, x-1], data[y, x-1], data[y+1, x-1], - data[y-1, x], data[y, x], data[y+1, x], - data[y-1, x+1], data[y, x+1], data[y+1, x+1]] - out[y, x] = (a+b+c+d+e+f+g+h+i) / 9 - else: - out[y, x] = data[y, x] - return out - +warnings.simplefilter('default') -# TODO: add optional name parameter `name='mean'` -def mean(agg, passes=1, excludes=[np.nan]): - """ - Returns Mean filtered array using a 3x3 window +DEFAULT_UNIT = 'meter' - Parameters - ---------- - agg : DataArray - passes : int, number of times to run mean - Returns - ------- - data: DataArray - """ - out = None - for i in range(passes): - if out is None: - out = _mean(agg.data, tuple(excludes)) - else: - out = _mean(out, tuple(excludes)) - - return DataArray(out, name='mean', - dims=agg.dims, coords=agg.coords, attrs=agg.attrs) +# TODO: Make convolution more generic with numba first-class functions. def is_number(s): @@ -92,7 +48,6 @@ def __init__(self, s): def _get_distance_unit(self, s): # spit string into numbers and text splits = [x for x in re.split(r'(-?\d*\.?\d+)', s) if x != ''] - if len(splits) not in [1, 2]: raise ValueError("Invalid distance.") @@ -104,6 +59,8 @@ def _get_distance_unit(self, s): unit = splits[1] unit = unit.lower() + unit = unit.replace(' ', '') + print("unit", unit) if unit not in self.UNITS: raise ValueError( "Invalid value.\n" @@ -155,51 +112,6 @@ def kilometers(self, v): self._convert('KILOMETER') -def _zscores(array): - mean = np.nanmean(array) - std = np.nanstd(array) - return (array - mean) / std - - -def _gen_ellipse_kernel(half_w, half_h): - # x values of interest - x = np.linspace(-half_w, half_w, 2 * half_w + 1) - # y values of interest, as a "column" array - y = np.linspace(-half_h, half_w, 2 * half_h + 1)[:, None] - - # True for points inside the ellipse - # (x / a)^2 + (y / b)^2 <= 1, avoid division to avoid rounding issue - ellipse = (x * half_h) ** 2 + (y * half_w) ** 2 <= (half_w * half_h) ** 2 - - return ellipse.astype(int) - - -def _apply_convolution(array, kernel): - kernel_half_h, kernel_half_w = kernel.shape - h = int(kernel_half_h / 2) - w = int(kernel_half_w / 2) - - # number of pixels inside the kernel - num_pixels = 0 - - # return of the function - res = 0 - - # row id of the kernel - k_row = 0 - for i in range(-h, h + 1): - # column id of the kernel - k_col = 0 - for j in range(-w, w + 1): - res += array[i, j] * kernel[k_row, k_col] - if (kernel[k_row, k_col] == 1): - num_pixels += 1 - k_col += 1 - k_row += 1 - - return res / num_pixels - - def _calc_cell_size(raster): if 'unit' in raster.attrs: unit = raster.attrs['unit'] @@ -245,7 +157,146 @@ def _calc_cell_size(raster): return sx, sy -def focal_analysis(raster, shape='circle', radius='10km'): +def _gen_ellipse_kernel(half_w, half_h): + # x values of interest + x = np.linspace(-half_w, half_w, 2 * half_w + 1) + # y values of interest, as a "column" array + y = np.linspace(-half_h, half_w, 2 * half_h + 1)[:, None] + + # True for points inside the ellipse + # (x / a)^2 + (y / b)^2 <= 1, avoid division to avoid rounding issue + ellipse = (x * half_h) ** 2 + (y * half_w) ** 2 <= (half_w * half_h) ** 2 + + return ellipse.astype(int) + + +class Kernel: + def __init__(self, shape='circle', radius=10000): + self.shape = shape + self.radius = radius + self._validate_shape() + self._validate_radius() + + def _validate_shape(self): + # validate shape + if self.shape not in ['circle']: + raise ValueError( + "Kernel shape must be \'circle\'") + + def _validate_radius(self): + # try to convert into Distance object + d = Distance(str(self.radius)) + + def to_array(self, raster): + # calculate cell size over the x and y axis + sx, sy = _calc_cell_size(raster) + # create Distance object of radius + sr = Distance(str(self.radius)) + if self.shape == 'circle': + # convert radius (meter) to pixel + kernel_half_w = int(sr.meters / sx.meters) + kernel_half_h = int(sr.meters / sy.meters) + kernel = _gen_ellipse_kernel(kernel_half_w, kernel_half_h) + return kernel + + +@ngjit +def _mean(data, excludes): + out = np.zeros_like(data) + rows, cols = data.shape + for y in range(1, rows-1): + for x in range(1, cols-1): + + exclude = False + for ex in excludes: + if data[y,x] == ex: + exclude = True + break + + if not exclude: + a,b,c,d,e,f,g,h,i = [data[y-1, x-1], data[y, x-1], data[y+1, x-1], + data[y-1, x], data[y, x], data[y+1, x], + data[y-1, x+1], data[y, x+1], data[y+1, x+1]] + out[y, x] = (a+b+c+d+e+f+g+h+i) / 9 + else: + out[y, x] = data[y, x] + return out + + +# TODO: add optional name parameter `name='mean'` +def mean(agg, passes=1, excludes=[np.nan]): + """ + Returns Mean filtered array using a 3x3 window + + Parameters + ---------- + agg : DataArray + passes : int, number of times to run mean + + Returns + ------- + data: DataArray + """ + out = None + for i in range(passes): + if out is None: + out = _mean(agg.data, tuple(excludes)) + else: + out = _mean(out, tuple(excludes)) + + return DataArray(out, name='mean', + dims=agg.dims, coords=agg.coords, attrs=agg.attrs) + + +@ngjit +def calc_mean(array): + return np.nanmean(array) + + +@ngjit +def calc_sum(array): + return np.nansum(array) + + +@ngjit +def _calc_std(array): + return np.nanstd(array) + + +# @ngjit +# def _calc_zscores(array): +# arr_mean = calc_mean(array) +# arr_std = _calc_std(array) +# res = np.zeros_like(array, dtype=array.dtype) +# for i in prange(h): +# for j in prange(w): +# res[i, j] = (array[i, j] - arr_mean) / arr_std +# return res + + +@ngjit +def _apply(data, kernel_array, func): + out = np.zeros_like(data) + rows, cols = data.shape + krows, kcols = kernel_array.shape + hrows, hcols = int(krows / 2), int(kcols / 2) + kernel_values = np.zeros_like(kernel_array, dtype=data.dtype) + + for y in prange(rows): + for x in prange(cols): + # kernel values are all nans at the beginning of each step + kernel_values.fill(np.nan) + for ky in range(y - hrows, y + hrows + 1): + for kx in range(x - hcols, x + hcols + 1): + if ky >= 0 and kx >= 0: + if ky >= 0 and ky < rows and kx >= 0 and kx < cols: + if kernel_array[ky - (y - hrows), kx - (x - hcols)] == 1: + kernel_values[ky - (y - hrows), kx - (x - hcols)] = data[ky, kx] + out[y, x] = func(kernel_values) + return out + + +def apply(raster, kernel, func=calc_mean): # validate raster if not isinstance(raster, DataArray): raise TypeError("`raster` must be instance of DataArray") @@ -258,38 +309,12 @@ def focal_analysis(raster, shape='circle', radius='10km'): raise ValueError( "`raster` must be an array of integers or float") - # validate shape - if shape not in ['circle']: - raise ValueError( - "kernel shape must be one of the following: \'circle\'") - - # calculate cell size over the x and y axis - sx, sy = _calc_cell_size(raster) - # create Distance object of radius - sr = Distance(str(radius)) - - # create kernel - if shape == 'circle': - # convert radius (meter) to pixel - kernel_half_w = int(sr.meters / sx.meters) - kernel_half_h = int(sr.meters / sy.meters) - kernel = _gen_ellipse_kernel(kernel_half_w, kernel_half_h) - - # zero padding - height, width = raster.shape - padded_raster_val = np.zeros((height + 2 * kernel_half_h, - width + 2 * kernel_half_w)) - padded_raster_val[kernel_half_h:height + kernel_half_h, - kernel_half_w:width + kernel_half_w] = raster.values - + # create kernel mask array + kernel_values = kernel.to_array(raster) # apply kernel to raster values - padded_res = stencil(_apply_convolution, - standard_indexing=("kernel",), - neighborhood=((-kernel_half_h, kernel_half_h), - (-kernel_half_w, kernel_half_w)))(padded_raster_val, kernel) + out = _apply(raster.values, kernel_values, func) - result = DataArray(padded_res[kernel_half_h:height + kernel_half_h, - kernel_half_w:width + kernel_half_w], + result = DataArray(out, coords=raster.coords, dims=raster.dims, attrs=raster.attrs) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 9440976f..fc6f67dc 100644 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -2,7 +2,7 @@ import numpy as np from xrspatial import mean -from xrspatial.focal import focal_analysis +from xrspatial.focal import apply, Kernel, calc_mean, calc_sum import pytest @@ -55,50 +55,114 @@ def test_mean_transfer_function(): assert abs(da_mean.mean() - data_random.mean()) < 10**-3 -def test_focal_invalid_input(): - invalid_raster_type = np.array([0, 1, 2, 3]) +def test_kernel(): with pytest.raises(Exception) as e_info: - focal_analysis(invalid_raster_type) - assert e_info + invalid_shape_kernel = Kernel(shape='line') - invalid_raster_dtype = xr.DataArray(np.array([['cat', 'dog']])) with pytest.raises(Exception) as e_info: - focal_analysis(invalid_raster_dtype) - assert e_info + invalid_radius_kernel = Kernel(radius='10 inch') - invalid_raster_shape = xr.DataArray(np.array([0, 0])) + +def test_apply_invalid_input(): + kernel = Kernel(radius=1) + invalid_raster_type = np.array([0, 1, 2, 3]) with pytest.raises(Exception) as e_info: - focal_analysis(invalid_raster_shape) + apply(invalid_raster_type, kernel) assert e_info - raster = xr.DataArray(np.ones((5, 5))) - invalid_kernel_shape = 'line' + invalid_raster_dtype = xr.DataArray(np.array([['cat', 'dog']])) with pytest.raises(Exception) as e_info: - focal_analysis(raster=raster, shape=invalid_kernel_shape) + apply(invalid_raster_dtype, kernel) assert e_info - raster = xr.DataArray(np.ones((5, 5))) - invalid_radius = '10 inch' + invalid_raster_shape = xr.DataArray(np.array([0, 0])) with pytest.raises(Exception) as e_info: - focal_analysis(raster=raster, radius=invalid_radius) + apply(invalid_raster_shape, kernel) assert e_info -def test_focal_default(): - raster = xr.DataArray(np.ones((10, 10)), dims=['x', 'y']) - raster['x'] = np.linspace(0, 9, 10) - raster['y'] = np.linspace(0, 9, 10) - - focal_stats = focal_analysis(raster, radius='1m') - - # check output's properties - # output must be an xarray DataArray - assert isinstance(focal_stats, xr.DataArray) - assert isinstance(focal_stats.values, np.ndarray) - # shape, dims, coords, attr preserved - assert raster.shape == focal_stats.shape - assert raster.dims == focal_stats.dims - assert raster.attrs == focal_stats.attrs - assert raster.coords == focal_stats.coords - - # TODO: validate output value +def test_apply_mean(): + n, m = 10, 15 + raster = xr.DataArray(np.ones((n, m)), dims=['x', 'y']) + raster['x'] = np.linspace(0, n, n) + raster['y'] = np.linspace(0, m, m) + + for i in range(m): + kernel = Kernel(radius=i) + mean_output = apply(raster, kernel) + + # check output's properties + # output must be an xarray DataArray + assert isinstance(mean_output, xr.DataArray) + assert isinstance(mean_output.values, np.ndarray) + # shape, dims, coords, attr preserved + assert raster.shape == mean_output.shape + assert raster.dims == mean_output.dims + assert raster.attrs == mean_output.attrs + for coord in raster.coords: + assert np.all(raster[coord] == mean_output[coord]) + assert (np.all(mean_output.values == np.ones((n, m)))) + + +def test_apply_sum(): + n, m = 10, 15 + raster = xr.DataArray(np.ones((n, m)), dims=['x', 'y']) + raster['x'] = np.linspace(0, n, n) + raster['y'] = np.linspace(0, m, m) + + for i in range(m): + kernel = Kernel(radius=i) + + kernel_array = kernel.to_array(raster) + krows, kcols = kernel_array.shape + hrows, hcols = min(n, int(krows / 2)), min(m, int(kcols / 2)) + + sum_output = apply(raster, kernel, calc_sum) + + # check output's properties + # output must be an xarray DataArray + assert isinstance(sum_output, xr.DataArray) + assert isinstance(sum_output.values, np.ndarray) + # shape, dims, coords, attr preserved + assert raster.shape == sum_output.shape + assert raster.dims == sum_output.dims + assert raster.attrs == sum_output.attrs + for coord in raster.coords: + assert np.all(raster[coord] == sum_output[coord]) + + kernel_sum = np.sum(kernel_array) + raster_sum = np.sum(raster.values) + # in case the kernel smaller than the raster + # cells that fit the kernel has neighborhood sum equal to sum(kernel) + for y in range(hrows, n - hrows): + for x in range(hcols, m - hcols): + assert sum_output.values[y, x] == min(kernel_sum, raster_sum) + + # cell in border has sum less than np.sum(kernel) + for y in range(hrows): + for x in range(m): + assert sum_output.values[y, x] < kernel_sum + + for y in range(n-hrows, n): + for x in range(m): + assert sum_output.values[y, x] < kernel_sum + + for y in range(n): + for x in range(hcols): + assert sum_output.values[y, x] < kernel_sum + + for y in range(n): + for x in range(m-hcols, m): + assert sum_output.values[y, x] < kernel_sum + + +# def test_apply_with_nan(): +# n, m = 10, 10 +# raster = xr.DataArray(np.ones((n, m)), dims=['x', 'y']) +# raster['x'] = np.linspace(0, n, n) +# raster['y'] = np.linspace(0, m, m) +# +# nan_cells = [(i, i) for i in range(n)] +# for cell in nan_cells: +# raster[cell[0], cell[1]] = np.nan +# From fd733fefd68dd6823ea36c40ae0c71df23353424 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Tue, 7 Apr 2020 01:07:09 -0400 Subject: [PATCH 11/18] add warning if distance unit not provided --- xrspatial/focal.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index a291f496..c69a8d89 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -54,13 +54,13 @@ def _get_distance_unit(self, s): number = splits[0] unit = DEFAULT_UNIT if len(splits) == 1: - print("Distance unit not provided. Use meter as default.") + warnings.warn('Raster distance unit not provided. ' + 'Use meter as default.', Warning) elif len(splits) == 2: unit = splits[1] unit = unit.lower() unit = unit.replace(' ', '') - print("unit", unit) if unit not in self.UNITS: raise ValueError( "Invalid value.\n" @@ -117,7 +117,8 @@ def _calc_cell_size(raster): unit = raster.attrs['unit'] else: unit = DEFAULT_UNIT - print("Raster distance unit not provided. Use meter as default.") + warnings.warn('Raster distance unit not provided. ' + 'Use meter as default.', Warning) cell_size_x = 1 cell_size_y = 1 From e8856176b818addde6cf72f6157bfc147ce742f7 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Tue, 7 Apr 2020 01:42:38 -0400 Subject: [PATCH 12/18] add test with nan --- xrspatial/tests/test_focal.py | 59 +++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index fc6f67dc..c60a6688 100644 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -156,13 +156,52 @@ def test_apply_sum(): assert sum_output.values[y, x] < kernel_sum -# def test_apply_with_nan(): -# n, m = 10, 10 -# raster = xr.DataArray(np.ones((n, m)), dims=['x', 'y']) -# raster['x'] = np.linspace(0, n, n) -# raster['y'] = np.linspace(0, m, m) -# -# nan_cells = [(i, i) for i in range(n)] -# for cell in nan_cells: -# raster[cell[0], cell[1]] = np.nan -# +def test_apply_with_nan(): + n, m = 6, 6 + raster = xr.DataArray(np.ones((n, m)), dims=['x', 'y']) + raster['x'] = np.linspace(0, n, n) + raster['y'] = np.linspace(0, m, m) + + nan_cells = [(i, i) for i in range(n)] + for cell in nan_cells: + raster[cell[0], cell[1]] = np.nan + + kernel_1 = Kernel(radius=1) + # kernel array = [[1]] + sum_output_1 = apply(raster, kernel_1, calc_sum) + # np.nansum(np.array([np.nan])) = 0.0 + expected_out_sum_1 = np.array([[0., 1., 1., 1., 1., 1.], + [1., 0., 1., 1., 1., 1.], + [1., 1., 0., 1., 1., 1.], + [1., 1., 1., 0., 1., 1.], + [1., 1., 1., 1., 0., 1.], + [1., 1., 1., 1., 1., 0.]]) + assert np.all(sum_output_1.values == expected_out_sum_1) + + # np.nanmean(np.array([np.nan])) = nan + mean_output_1 = apply(raster, kernel_1, calc_mean) + for cell in nan_cells: + assert np.isnan(mean_output_1[cell[0], cell[1]]) + # remaining cells are 1s + for i in range(n): + for j in range(m): + if i != j: + assert mean_output_1[i, j] == 1 + + kernel_2 = Kernel(radius=2) + # kernel array: [[0, 1, 0], + # [1, 1, 1], + # [0, 1, 0]] + sum_output_2 = apply(raster, kernel_2, calc_sum) + expected_out_sum_2 = np.array([[2., 2., 4., 4., 4., 3.], + [2., 4., 3., 5., 5., 4.], + [4., 3., 4., 3., 5., 4.], + [4., 5., 3., 4., 3., 4.], + [4., 5., 5., 3., 4., 2.], + [3., 4., 4., 4., 2., 2.]]) + + assert np.all(sum_output_2.values == expected_out_sum_2) + + mean_output_2 = apply(raster, kernel_2, calc_mean) + expected_mean_output_2 = np.ones((n, m)) + assert np.all(mean_output_2.values == expected_mean_output_2) From 77ee55d66617c3bbea3dd1ad8dc9732c2f7dca47 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 8 Apr 2020 15:32:53 -0400 Subject: [PATCH 13/18] add hotspots --- xrspatial/focal.py | 89 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 77 insertions(+), 12 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index c69a8d89..e74d9ec0 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -168,7 +168,7 @@ def _gen_ellipse_kernel(half_w, half_h): # (x / a)^2 + (y / b)^2 <= 1, avoid division to avoid rounding issue ellipse = (x * half_h) ** 2 + (y * half_w) ** 2 <= (half_w * half_h) ** 2 - return ellipse.astype(int) + return ellipse.astype(float) class Kernel: @@ -260,19 +260,47 @@ def calc_sum(array): @ngjit -def _calc_std(array): - return np.nanstd(array) +def calc_zscores(array): + arr_mean = np.nanmean(array) + arr_std = np.nanstd(array) + out = np.zeros_like(array, dtype=array.dtype) + h, w = array.shape + for i in prange(h): + for j in prange(w): + out[i, j] = (array[i, j] - arr_mean) / arr_std + return out + + +@ngjit +def upper_bound_p_value(zscore): + if abs(zscore) >= 2.33: + return 0.0099 + if abs(zscore) >= 1.65: + return 0.0495 + if abs(zscore) >= 1.29: + return 0.0985 + return 1 + + +@ngjit +def _hot_cold(zscore): + if zscore > 0: + return 1 + if zscore < 0: + return -1 + return 0 -# @ngjit -# def _calc_zscores(array): -# arr_mean = calc_mean(array) -# arr_std = _calc_std(array) -# res = np.zeros_like(array, dtype=array.dtype) -# for i in prange(h): -# for j in prange(w): -# res[i, j] = (array[i, j] - arr_mean) / arr_std -# return res +@ngjit +def _confidence(zscore): + p_value = upper_bound_p_value(zscore) + if abs(zscore) > 2.58 and p_value < 0.01: + return 99 + if abs(zscore) > 1.96 and p_value < 0.05: + return 95 + if abs(zscore) > 1.65 and p_value < 0.1: + return 90 + return 0 @ngjit @@ -321,3 +349,40 @@ def apply(raster, kernel, func=calc_mean): attrs=raster.attrs) return result + + +@ngjit +def _hotspots(z_array): + out = np.zeros_like(z_array) + rows, cols = z_array.shape + for y in prange(rows): + for x in prange(cols): + out[y, x] = _hot_cold(z_array[y, x]) * _confidence(z_array[y, x]) + return out + + +def hotspots(raster, kernel): + # validate raster + if not isinstance(raster, DataArray): + raise TypeError("`raster` must be instance of DataArray") + + if raster.ndim != 2: + raise ValueError("`raster` must be 2D") + + if not (issubclass(raster.values.dtype.type, np.integer) or + issubclass(raster.values.dtype.type, np.float)): + raise ValueError( + "`raster` must be an array of integers or float") + + # create kernel mask array + kernel_values = kernel.to_array(raster) + # apply kernel to raster values + z_array = calc_zscores(raster.values) + z_mean = _apply(z_array, kernel_values, calc_mean) + out = _hotspots(z_mean) + result = DataArray(out, + coords=raster.coords, + dims=raster.dims, + attrs=raster.attrs) + + return result From 3483c9047e381b41340046e3997ac7f74ce6d41c Mon Sep 17 00:00:00 2001 From: thuydotm Date: Wed, 8 Apr 2020 15:54:18 -0400 Subject: [PATCH 14/18] fix bug in _gen_ellipse_kernel --- xrspatial/focal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index e74d9ec0..676d05f2 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -162,7 +162,7 @@ def _gen_ellipse_kernel(half_w, half_h): # x values of interest x = np.linspace(-half_w, half_w, 2 * half_w + 1) # y values of interest, as a "column" array - y = np.linspace(-half_h, half_w, 2 * half_h + 1)[:, None] + y = np.linspace(-half_h, half_h, 2 * half_h + 1)[:, None] # True for points inside the ellipse # (x / a)^2 + (y / b)^2 <= 1, avoid division to avoid rounding issue From 61e78a952b2ff9e93c573f43827395c4bbda5eed Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 9 Apr 2020 14:03:15 -0400 Subject: [PATCH 15/18] correct zscores calculation --- xrspatial/focal.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 676d05f2..959e7cdb 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -259,18 +259,6 @@ def calc_sum(array): return np.nansum(array) -@ngjit -def calc_zscores(array): - arr_mean = np.nanmean(array) - arr_std = np.nanstd(array) - out = np.zeros_like(array, dtype=array.dtype) - h, w = array.shape - for i in prange(h): - for j in prange(w): - out[i, j] = (array[i, j] - arr_mean) / arr_std - return out - - @ngjit def upper_bound_p_value(zscore): if abs(zscore) >= 2.33: @@ -377,9 +365,17 @@ def hotspots(raster, kernel): # create kernel mask array kernel_values = kernel.to_array(raster) # apply kernel to raster values - z_array = calc_zscores(raster.values) - z_mean = _apply(z_array, kernel_values, calc_mean) - out = _hotspots(z_mean) + mean_array = _apply(raster.values, kernel_values, calc_mean) + + # calculate z-scores + global_mean = np.nanmean(raster.values) + global_std = np.nanstd(raster.values) + if global_std == 0: + raise ZeroDivisionError("Standard deviation " + "of the input raster values is 0.") + z_array = (mean_array - global_mean) / global_std + + out = _hotspots(z_array) result = DataArray(out, coords=raster.coords, dims=raster.dims, From b53def3097fca115e39afb5f7e04da2fd428518a Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 9 Apr 2020 14:53:46 -0400 Subject: [PATCH 16/18] avoid nan in int --- xrspatial/focal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 959e7cdb..9486b3a0 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -329,7 +329,7 @@ def apply(raster, kernel, func=calc_mean): # create kernel mask array kernel_values = kernel.to_array(raster) # apply kernel to raster values - out = _apply(raster.values, kernel_values, func) + out = _apply(raster.values.astype(float), kernel_values, func) result = DataArray(out, coords=raster.coords, @@ -365,7 +365,7 @@ def hotspots(raster, kernel): # create kernel mask array kernel_values = kernel.to_array(raster) # apply kernel to raster values - mean_array = _apply(raster.values, kernel_values, calc_mean) + mean_array = _apply(raster.values.astype(float), kernel_values, calc_mean) # calculate z-scores global_mean = np.nanmean(raster.values) From b0264e8cc21ebc06b7f66b8ffa653dd3ea44c8d1 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 9 Apr 2020 16:18:27 -0400 Subject: [PATCH 17/18] add hotspots tests --- xrspatial/tests/test_focal.py | 86 ++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index c60a6688..04b211bf 100644 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -2,7 +2,7 @@ import numpy as np from xrspatial import mean -from xrspatial.focal import apply, Kernel, calc_mean, calc_sum +from xrspatial.focal import apply, Kernel, calc_mean, calc_sum, hotspots import pytest @@ -205,3 +205,87 @@ def test_apply_with_nan(): mean_output_2 = apply(raster, kernel_2, calc_mean) expected_mean_output_2 = np.ones((n, m)) assert np.all(mean_output_2.values == expected_mean_output_2) + + +def test_hotspot_invalid(): + kernel = Kernel(radius=1) + invalid_raster_type = np.array([0, 1, 2, 3]) + with pytest.raises(Exception) as e_info: + hotspots(invalid_raster_type, kernel) + assert e_info + + invalid_raster_dtype = xr.DataArray(np.array([['cat', 'dog']])) + with pytest.raises(Exception) as e_info: + hotspots(invalid_raster_dtype, kernel) + assert e_info + + invalid_raster_shape = xr.DataArray(np.array([0, 0])) + with pytest.raises(Exception) as e_info: + hotspots(invalid_raster_shape, kernel) + assert e_info + + invalid_raster_std = xr.DataArray(np.ones((10, 10))) + # std of the raster is 0 + with pytest.raises(Exception) as e_info: + hotspots(invalid_raster_std, kernel) + assert e_info + + +def test_hotspot(): + n, m = 10, 10 + raster = xr.DataArray(np.zeros((n, m), dtype=float), dims=['x', 'y']) + raster['x'] = np.linspace(0, n, n) + raster['y'] = np.linspace(0, m, m) + + all_idx = zip(*np.where(raster.values == 0)) + + nan_cells = [(i, i) for i in range(m)] + for cell in nan_cells: + raster[cell[0], cell[1]] = np.nan + + # add some extreme values + hot_region = [(1, 1), (1, 2), (1, 3), + (2, 1), (2, 2), (2, 3), + (3, 1), (3, 2), (3, 3)] + cold_region = [(7, 7), (7, 8), (7, 9), + (8, 7), (8, 8), (8, 9), + (9, 7), (9, 8), (9, 9)] + for p in hot_region: + raster[p[0], p[1]] = 10000 + for p in cold_region: + raster[p[0], p[1]] = -10000 + + no_significant_region = [id for id in all_idx if id not in hot_region and + id not in cold_region] + + kernel = Kernel(radius=2) + hotspots_output = hotspots(raster, kernel) + + # check output's properties + # output must be an xarray DataArray + assert isinstance(hotspots_output, xr.DataArray) + assert isinstance(hotspots_output.values, np.ndarray) + assert issubclass(hotspots_output.values.dtype.type, np.float) + + # shape, dims, coords, attr preserved + assert raster.shape == hotspots_output.shape + assert raster.dims == hotspots_output.dims + assert raster.attrs == hotspots_output.attrs + for coord in raster.coords: + assert np.all(raster[coord] == hotspots_output[coord]) + + # no nan in output + assert not np.isnan(np.min(hotspots_output)) + + # output of extreme regions are non-zeros + # hot spots + hot_spot = np.asarray([hotspots_output[p] for p in hot_region]) + assert np.all(hot_spot >= 0) + assert np.sum(hot_spot) > 0 + # cold spots + cold_spot = np.asarray([hotspots_output[p] for p in cold_region]) + assert np.all(cold_spot <= 0) + assert np.sum(cold_spot) < 0 + # output of no significant regions are 0s + no_sign = np.asarray([hotspots_output[p] for p in no_significant_region]) + assert np.all(no_sign == 0) From 2d631e93108ab3dcc2634d147ae5802d59638fd3 Mon Sep 17 00:00:00 2001 From: thuydotm Date: Thu, 9 Apr 2020 16:32:45 -0400 Subject: [PATCH 18/18] add docstring; dtype of hotspots is int8 --- xrspatial/focal.py | 28 +++++++++++++++++++++++++++- xrspatial/tests/test_focal.py | 2 +- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 9486b3a0..06e90b48 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -341,7 +341,7 @@ def apply(raster, kernel, func=calc_mean): @ngjit def _hotspots(z_array): - out = np.zeros_like(z_array) + out = np.zeros_like(z_array, dtype=np.int8) rows, cols = z_array.shape for y in prange(rows): for x in prange(cols): @@ -350,6 +350,32 @@ def _hotspots(z_array): def hotspots(raster, kernel): + """Identify statistically significant hot spots and cold spots in an input + raster. To be a statistically significant hot spot, a feature will have a + high value and be surrounded by other features with high values as well. + Neighborhood of a feature defined by the input kernel, which currently + support a shape of circle and a radius in meters. + + The result should be a raster with the following 7 values: + 90 for 90% confidence high value cluster + 95 for 95% confidence high value cluster + 99 for 99% confidence high value cluster + -90 for 90% confidence low value cluster + -95 for 95% confidence low value cluster + -99 for 99% confidence low value cluster + 0 for no significance + + Parameters + ---------- + raster: xarray.DataArray + Input raster image with shape=(height, width) + kernel: Kernel + + Returns + ------- + hotspots: xarray.DataArray + """ + # validate raster if not isinstance(raster, DataArray): raise TypeError("`raster` must be instance of DataArray") diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 04b211bf..a9a766ec 100644 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -265,7 +265,7 @@ def test_hotspot(): # output must be an xarray DataArray assert isinstance(hotspots_output, xr.DataArray) assert isinstance(hotspots_output.values, np.ndarray) - assert issubclass(hotspots_output.values.dtype.type, np.float) + assert issubclass(hotspots_output.values.dtype.type, np.int8) # shape, dims, coords, attr preserved assert raster.shape == hotspots_output.shape