From 433e45ae781a0aae75226b2b2f9b73dd1a244f78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=AD=20Bosch?= Date: Wed, 11 Mar 2020 16:07:33 +0100 Subject: [PATCH] advanced topics docs, drop `num_blocks` arg --- detectree/filters.py | 16 +++++++--- detectree/image_descriptor.py | 58 +++++++++++++++++++++++++++++++++-- detectree/pixel_features.py | 51 +++++++++++++++++++++++++++--- detectree/pixel_response.py | 15 ++++++++- detectree/train_test_split.py | 10 +++--- docs/src/advanced_topics.rst | 56 +++++++++++++++++++++++++++++++++ docs/src/conf.py | 2 +- docs/src/index.rst | 6 +++- docs/src/references.bib | 14 ++++++++- tests/test_detectree.py | 10 +++--- 10 files changed, 213 insertions(+), 25 deletions(-) create mode 100644 docs/src/advanced_topics.rst diff --git a/detectree/filters.py b/detectree/filters.py index eb7657c..e674317 100644 --- a/detectree/filters.py +++ b/detectree/filters.py @@ -24,10 +24,12 @@ def get_texture_kernel(sigma): Parameters ---------- sigma : numeric + Scale parameter to build a texture kernel, based on a Gaussian on the + X dimension and a second-derivative Gaussian in the Y dimension Returns ------- - texture_kernel : + texture_kernel : array-like """ g0_kernel_arr = _get_gaussian_kernel1d(sigma, 0) g2_kernel_arr = _get_gaussian_kernel1d(sigma, 2) @@ -40,13 +42,19 @@ def get_gabor_filter_bank(frequencies, num_orientations): Parameters ---------- frequencies : list-like - + Set of frequencies used to build the Gabor filter bank. num_orientations : int or list-like - + Number of orientations used to build the Gabor filter bank. If an + integer is provided, the corresponding number of orientations will be + used for each scale (determined by `gabor_frequencies`). If a tuple is + provided, each element will determine the number of orientations that + must be used at its matching scale (determined by `gabor_frequencies`) + - thus the tuple must match the length of `frequencies`. Returns ------- - kernels : + kernels : list-like + List of kernel 2-D arrays that correspond to the filter bank """ kernels = [] diff --git a/detectree/image_descriptor.py b/detectree/image_descriptor.py index 17f0fc6..2cb2ad7 100644 --- a/detectree/image_descriptor.py +++ b/detectree/image_descriptor.py @@ -13,8 +13,34 @@ def compute_image_descriptor(img_rgb, kernels, response_bins_per_axis, - num_blocks, num_color_bins): + num_color_bins): + """ + Compute a GIST descriptor for `img_rgb`. See the `background + `_ example notebook for more details. + + Parameters + ---------- + img_rgb : array-like + The image in RGB format, i.e., in a 3-D array + kernels : list-like + List of kernel 2-D arrays that correspond to the filter bank + response_bins_per_axis : int + Number of spatial bins per axis into which the responses to the filter + bank will be aggreated. For example, a value of 2 will aggregate the + responses into the four quadrants of the image (i.e., 2x2, 2 bins in + each axis of the image). + num_color_bins : int + Number of color bins per axis of the L*a*b color space with which + the joint color histogram will be computed + + Returns + ------- + img_descr : array-like + Vector representing GIST descriptor of `img_rgb` + """ + # gist descriptor + num_blocks = response_bins_per_axis**2 gist_descr = np.zeros(len(kernels) * num_blocks) img_gray = color.rgb2gray(img_rgb) block_shape = tuple(size // response_bins_per_axis @@ -56,8 +82,34 @@ def compute_image_descriptor(img_rgb, kernels, response_bins_per_axis, def compute_image_descriptor_from_filepath(img_filepath, kernels, - response_bins_per_axis, num_blocks, + response_bins_per_axis, num_color_bins): + """ + Compute a GIST descriptor for `img_filepath`. See the `background + `_ example notebook for more details. + + Parameters + ---------- + img_filepath : str, file object or pathlib.Path object + Path to a file, URI, file object opened in binary ('rb') mode, or a + Path object representing the image for which a GIST descriptor will be + computed. The value will be passed to `rasterio.open`. + kernels : list-like + List of kernel 2-D arrays that correspond to the filter bank + response_bins_per_axis : int + Number of spatial bins per axis into which the responses to the filter + bank will be aggreated. For example, a value of 2 will aggregate the + responses into the four quadrants of the image (i.e., 2x2, 2 bins in + each axis of the image). + num_color_bins : int + Number of color bins per axis of the L*a*b color space with which + the joint color histogram will be computed + + Returns + ------- + img_descr : array-like + Vector representing GIST descriptor of `img_rgb` + """ img_rgb = utils.img_rgb_from_filepath(img_filepath) return compute_image_descriptor(img_rgb, kernels, response_bins_per_axis, - num_blocks, num_color_bins) + num_color_bins) diff --git a/detectree/pixel_features.py b/detectree/pixel_features.py index 4088f55..e5e3dde 100644 --- a/detectree/pixel_features.py +++ b/detectree/pixel_features.py @@ -32,6 +32,48 @@ class PixelFeaturesBuilder(object): def __init__(self, sigmas=None, num_orientations=None, neighborhood=None, min_neighborhood_range=None, num_neighborhoods=None): + """ + Class that customizes how the pixel features are computed. See the + `background `_ example notebook for more + details. + + Parameters + ---------- + sigmas : list-like, optional + The list of scale parameters (sigmas) to build the Gaussian filter + bank that will be used to compute the pixel-level features. The + provided argument will be passed to the initialization method of + the `PixelFeaturesBuilder` class. If no value is provided, the + default value set in `settings.GAUSS_DEFAULT_SIGMAS` will be taken. + num_orientations : int, optional + The number of equally-distributed orientations to build the + Gaussian filter bank that will be used to compute the pixel-level + features. The provided argument will be passed to the + initialization method of the `PixelFeaturesBuilder` class. If no + value is provided, the default value set in + `settings.GAUSS_DEFAULT_NUM_ORIENTATIONS` will be taken. + neighborhood : array-like, optional + The base neighborhood structure that will be used to compute the + entropy features. The provided argument will be passed to the + initialization method of the `PixelFeaturesBuilder` class. If no + value is provided, a square with a side size of + `2 * min_neighborhood_range + 1` will be used. + min_neighborhood_range : int, optional + The range (i.e., the square radius) of the smallest neigbhorhood + window that will be used to compute the entropy features. The + provided argument will be passed to the initialization method of + the `PixelFeaturesBuilder` class. If no value is provided, the + default value set in + `settings.ENTROPY_DEFAULT_MIN_NEIGHBORHOOD_RANGE` will be taken. + num_neighborhoods : int, optional + The number of neigbhorhood windows (whose size follows a geometric + progression starting at `min_neighborhood_range`) that will be + used to compute the entropy features. The provided argument will + be passed to the initialization method of the + `PixelFeaturesBuilder` class. If no value is provided, the default + value set in `settings.ENTROPY_DEFAULT_NUM_NEIGHBORHOODS` will be + taken. + """ # preprocess technical keyword arguments # texture features if sigmas is None: @@ -93,8 +135,8 @@ def build_features_from_arr(self, img_rgb): img_ill_vec = np.dot(A, np.log(np.dot(B, img_xyz_vec.transpose()) + 1)).transpose() X[:, :NUM_LAB_CHANNELS] = img_lab_vec - X[:, NUM_LAB_CHANNELS:NUM_LAB_CHANNELS + - NUM_ILL_CHANNELS] = img_ill_vec + X[:, + NUM_LAB_CHANNELS:NUM_LAB_CHANNELS + NUM_ILL_CHANNELS] = img_ill_vec # texture features # tpf.compute_texture_features(X_img[:, self.texture_slice], @@ -122,8 +164,9 @@ def build_features_from_arr(self, img_rgb): img = transform.resize( transform.downscale_local_mean(img_lab_l, (factor, factor)), img_lab_l.shape).astype(np.uint16) - X[:, entropy_start + i] = rank.entropy( - img, self.neighborhood).flatten() + X[:, + entropy_start + i] = rank.entropy(img, + self.neighborhood).flatten() return X diff --git a/detectree/pixel_response.py b/detectree/pixel_response.py index 9746b96..194de66 100644 --- a/detectree/pixel_response.py +++ b/detectree/pixel_response.py @@ -17,6 +17,18 @@ class PixelResponseBuilder(object): # It is really not necessary to use a class for this, but we do so for the # sake of API consistency with the `pixel_features` module def __init__(self, tree_val=None, nontree_val=None): + """ + Class that customizes how the pixel response (i.e., the tree/non-tree + labels of each pixel) is computed. See the `background + `_ example notebook for more details. + + Parameters + ---------- + tree_val : int, optional + The value that designates tree pixels in the response images. + nontree_val : int, optional + The value that designates non-tree pixels in the response images. + """ if tree_val is None: tree_val = settings.RESPONSE_DEFAULT_TREE_VAL self.tree_val = tree_val @@ -53,7 +65,8 @@ def build_response(self, split_df=None, response_img_dir=None, response_img_filepaths=None, img_filename_pattern=None, method=None, img_cluster=None): """ - TODO + Build the pixel response (i.e., the tree/non-tree labels of each pixel) + for a list of images Parameters ------- diff --git a/detectree/train_test_split.py b/detectree/train_test_split.py index 337c92f..ec142c9 100644 --- a/detectree/train_test_split.py +++ b/detectree/train_test_split.py @@ -38,11 +38,11 @@ def __init__(self, img_filepaths=None, img_dir=None, `settings.IMG_DEFAULT_FILENAME_PATTERN` will be taken. Ignored if `img_filepaths` is provided. gabor_frequencies : tuple, optional - Set of frequencies used to build the Gabor filter. If no value is - provided (default), the value will be taken from + Set of frequencies used to build the Gabor filter bank. If no value + is provided (default), the value will be taken from `settings.GIST_DEFAULT_GABOR_FREQUENCIES`. gabor_num_orientations : int or tuple, optional - Number of orientations used to build the Gabor filter. If an + Number of orientations used to build the Gabor filter bank. If an integer is provided, the corresponding number of orientations will be used for each scale (determined by `gabor_frequencies`). If a tuple is provided, each element will determine the number of @@ -109,7 +109,7 @@ def descr_feature_matrix(self): frequencies=self.gabor_frequencies, num_orientations=self.gabor_num_orientations) - num_blocks = self.response_bins_per_axis**2 + # num_blocks = self.response_bins_per_axis**2 # feature_rows = [ # TrainingSelector._get_image_descr( @@ -121,7 +121,7 @@ def descr_feature_matrix(self): dask.delayed( image_descriptor.compute_image_descriptor_from_filepath)( img_filepath, kernels, self.response_bins_per_axis, - num_blocks, self.num_color_bins) + self.num_color_bins) for img_filepath in self.img_filepaths ] diff --git a/docs/src/advanced_topics.rst b/docs/src/advanced_topics.rst new file mode 100644 index 0000000..22afd98 --- /dev/null +++ b/docs/src/advanced_topics.rst @@ -0,0 +1,56 @@ +=============== +Advanced Topics +=============== + +Most use cases of DetecTree only make use of the `TrainingSelector`, `ClassifierTrainer` and `Classifier` classes and their respective methods. Nevertheless, +See the `"background" example notebook `_ and the article of Yang et al. :cite:`b-yang2009tree` for more information. + +---------------- +Train/test split +---------------- + +In order to enhance the robustness of the classifier, it is important that the subset of pixels selected as training samples are representative of the whole dataset. Given the large variety of scenes that can be found in such a datset of urban aerial imagery (e.g., lakes, buildings, parks, forests...), a random selection of training tiles might not be representative of such variety and therefore lead to a classifier with low overall accuracy. + +To overcome such problem, Yang et al. :cite:`b-yang2009tree` proposed a procedure of selecting training samples that intends to find the set of tiles that is most representative of the dataset. The scene structure of an image can be represented by a Gist descriptor :cite:`b-oliva2001modeling`, a low dimensional vector encoding which captures the high-level semantics of real-world aerial images. Following the approach of Yang et al. :cite:`b-yang2009tree`, the image descriptor is computed by: + +* convolving it with Gabor filters on 3 frequencies and 4, 8 and orientations respectively, which accounts for 320 components +* computing a 8x8x8 joint color histogram in the Lab color space, which accounts for 512 components the two components are normalized to unit L-1 norm separatedly and then concatenated to form a 832-component image descriptor. + +Nevertheless, the way in which such image descriptor is computer can be customized by means of the arguments of `TrainingSelector.__init__`. Such arguments will then be forwarded to the following function in order to compute the GIST descriptor of the input image: + +.. autofunction:: detectree.image_descriptor.compute_image_descriptor_from_filepath + +The GIST descriptor might also be directly computed from an array with the RGB representation of the image: + +.. autofunction:: detectree.image_descriptor.compute_image_descriptor + +On the other hand, in order to obtain a Gabor filter bank (e.g., for the `kernels` argument), the following function can be used: + +.. autofunction:: detectree.filters.get_gabor_filter_bank + +-------------------- +Pixel classification +-------------------- + +In order to perform a binary pixel-level classification of tree/non-tree pixels, each pixel is transformed into a feature vector. In DetecTree, the way in which feature vectors are computed can be customized by means of the arguments of `Classifier.__init__`. With the default argument values, which follow the methods of Yang et al. [1], each pixel is transformed into a 27-feature vector where 6, 18 and 3 features capture characteristics of color, texture and entropy respectively. Such arguments are forwarded to the following class: + +.. autoclass:: detectree.pixel_features.PixelFeaturesBuilder + :members: __init__, build_features + +The texture features are obtained by convolving the images with a filter bank, which is obtained by means of the following function: + +.. autofunction:: detectree.filters.get_texture_kernel + +The arguments of `Classifier.__init__` also serve to customize how the pixel response (i.e., tree/non-tree labels of each pixel) is computed, by forwarding them to the following class: + +.. autoclass:: detectree.pixel_response.PixelResponseBuilder + :members: __init__, build_response + +---------- +References +---------- + +.. bibliography:: references.bib + :style: plain + :labelprefix: B + :keyprefix: b- diff --git a/docs/src/conf.py b/docs/src/conf.py index 01a1045..22d6ed3 100644 --- a/docs/src/conf.py +++ b/docs/src/conf.py @@ -104,7 +104,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = [] # -- Options for HTMLHelp output --------------------------------------- diff --git a/docs/src/index.rst b/docs/src/index.rst index efe5232..1c055dc 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -1,7 +1,7 @@ DetecTree documentation ======================= -DetecTree is a Pythonic library to classify tree/non-tree pixels from aerial imagery, following the methods of Yang et al. :cite:`yang2009tree`. +DetecTree is a Pythonic library to classify tree/non-tree pixels from aerial imagery, following the methods of Yang et al. :cite:`a-yang2009tree`. .. toctree:: :maxdepth: 1 @@ -9,6 +9,8 @@ DetecTree is a Pythonic library to classify tree/non-tree pixels from aerial ima train_test_split pixel_classification + advanced_topics + utils .. toctree:: @@ -40,3 +42,5 @@ References .. bibliography:: references.bib :style: plain + :labelprefix: A + :keyprefix: a- diff --git a/docs/src/references.bib b/docs/src/references.bib index 7b84d20..3080333 100644 --- a/docs/src/references.bib +++ b/docs/src/references.bib @@ -1,8 +1,20 @@ +@article{oliva2001modeling, + title={Modeling the shape of the scene: A holistic representation of the spatial envelope}, + author={Oliva, Aude and Torralba, Antonio}, + journal={International journal of computer vision}, + volume={42}, + number={3}, + pages={145--175}, + year={2001}, + publisher={Springer}, + doi={10.1023/A:1011139631724} +} + @inproceedings{yang2009tree, title={Tree detection from aerial imagery}, author={Yang, Lin and Wu, Xiaqing and Praun, Emil and Ma, Xiaoxu}, booktitle={Proceedings of the 17th ACM SIGSPATIAL International Conference on Advances in Geographic Information Systems}, pages={131--137}, year={2009}, - organization={ACM} + doi={10.1145/1653771.1653792} } diff --git a/tests/test_detectree.py b/tests/test_detectree.py index 504f724..655c4c4 100644 --- a/tests/test_detectree.py +++ b/tests/test_detectree.py @@ -115,13 +115,13 @@ def test_image_descriptor(self): frequencies=gabor_frequencies, num_orientations=gabor_num_orientations) response_bins_per_axis = 4 - num_blocks = response_bins_per_axis**2 + # num_blocks = response_bins_per_axis**2 num_color_bins = 8 img_descr = image_descriptor.compute_image_descriptor_from_filepath( - self.img_filepath, kernels, response_bins_per_axis, num_blocks, - num_color_bins) - self.assertEqual(len(img_descr), - len(kernels) * num_blocks + num_color_bins**3) + self.img_filepath, kernels, response_bins_per_axis, num_color_bins) + self.assertEqual( + len(img_descr), + len(kernels) * response_bins_per_axis**2 + num_color_bins**3) # TODO: more technical test, e.g., passing an all-zero filter bank # should return an all-zero gist descriptor