Skip to content

Commit

Permalink
Enhance paper as recommended by @cmillion, issue #19
Browse files Browse the repository at this point in the history
Full implementation of parallel, fix issue #18 #21
  • Loading branch information
nkarasiak committed Feb 10, 2020
1 parent 6fe12ae commit d4a87ad
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 123 deletions.
4 changes: 2 additions & 2 deletions examples/processing/rasterMath_testBlockSize_3d_andNBands.py
Expand Up @@ -37,7 +37,7 @@

rM = RasterMath(raster,in_image_mask='/tmp/mask.tif',return_3d=return_3d)

rM.custom_block_size(128,128) # block of 128x128
rM.custom_block_size(10,10) # block of 128x128

# print(rM.get_random_block().shape)

Expand All @@ -53,7 +53,7 @@
rM.add_function(returnFlatten,'/tmp/x_flatten_{}.tif'.format(str(return_3d)))
t=time.time()

rM.run(1)
rM.run(2)
# rM.run()

print(time.time()-t)
Expand Down
2 changes: 1 addition & 1 deletion museotoolbox/__init__.py
Expand Up @@ -14,4 +14,4 @@
# =============================================================================
from . import ai, processing, cross_validation, datasets, stats

__version__ = "0.12.1-beta.1"
__version__ = "0.12.1-beta.2"
255 changes: 141 additions & 114 deletions museotoolbox/processing/__init__.py
Expand Up @@ -612,6 +612,7 @@ def __init__(self, in_image, in_image_mask=False, return_3d=False, block_size=[2
self.verbose = verbose
self.message = message
self.driver = gdal.GetDriverByName('GTiff')
self.itemsize = 0

# Load raster
self.opened_images = []
Expand All @@ -629,7 +630,6 @@ def __init__(self, in_image, in_image_mask=False, return_3d=False, block_size=[2
# Get block size and itemsize
band = self.opened_images[0].GetRasterBand(1)
self.input_block_sizes = band.GetBlockSize()
self.itemsize = band.ReadAsArray(0,0,1,1).itemsize*self.n_bands

# input block size
if block_size is False:
Expand Down Expand Up @@ -677,7 +677,7 @@ def add_image(
opened_raster = gdal.Open(in_image, gdal.GA_ReadOnly)
if opened_raster is None:
raise ReferenceError('Impossible to open image ' + in_image)

sameSize = True

if len(self.opened_images) > 0:
Expand All @@ -686,6 +686,13 @@ def add_image(
sameSize = False
push_feedback("raster {} doesn't have the same size (X and Y) as the initial raster.\n \
Museotoolbox can't add it as an input raster.".format(os.path.basename(in_image)))
n_bands = opened_raster.RasterCount

band = opened_raster.GetRasterBand(1)
mem_size = band.ReadAsArray(0,0,1,1).itemsize*n_bands
self.itemsize += mem_size
del band
# self.itemsize += opened_raster.GetRasterBand(0).ReadAsArray(0,0,1,1).itemsize*n_bands

if sameSize:
self.opened_images.append(opened_raster)
Expand Down Expand Up @@ -797,7 +804,8 @@ def add_function(
self._outputs[-1]['n_bands'] = out_n_bands
self._outputs[-1]['kwargs'] = kwargs
self._outputs[-1]['nodata'] = out_nodata

self._outputs[-1]['itemsize'] = randomBlock.itemsize*out_n_bands

def _init_raster_parameters(self, compress=True):

self._raster_options = []
Expand Down Expand Up @@ -1110,6 +1118,7 @@ def get_random_block(self, random_state=None):
if np.ma.is_masked(tmp[0]):
if np.all(tmp[0].mask == True):
size = 0

else:
if np.ma.is_masked(tmp):
if np.all(tmp.mask == True):
Expand Down Expand Up @@ -1224,7 +1233,7 @@ def _run_old(self):
-------
None
"""

self._position = 0
if self.verbose:
self.pb = ProgressBar(self.n_blocks, message=self.message)

Expand Down Expand Up @@ -1354,112 +1363,6 @@ def run(self, n_jobs = 1, size = '1G'):
-------
None
"""
# =============================================================================
# Begin_process_block function
# =============================================================================
def _process_block(fun,kwargs,block,n_bands,nodata,np_dtype,return_3d):
"""
Private function to compute external function per block.
In order to save with the size as the input block, this function need the input mask.
Parameters
----------
fun :
kwargs :
block :
nodata :
np_dtype :
return_3d :
Returns
-------
res : np.ndarray
The block to write
"""
tmp_arr = True

if isinstance(block,list):
mask_block=block[0].mask
else:
mask_block=block.mask

# if everything is masked
if np.all(mask_block == True):
mask = True
out_block = nodata
# if everything is not masked
elif np.all(mask_block == False):
mask = False

# if part masked, part unmasked
elif np.any(mask_block == False):
tmp_arr = mask_block.size
mask = mask_block[...,0]


# if not is fully masked
if mask is not True:

# if no mask, we send the np.ndarray only
if mask is False:
if isinstance(block,list):
block = [b.data for b in block]
else:
block = block.data
if kwargs:
out_block = fun(block,**kwargs)
else:
out_block = fun(block)

# if part is masked
else:
# if 3d we send the np.ma.MaskedArray
if return_3d:
if kwargs:
out_block = fun(block,**kwargs)
else:
out_block = fun(block)

if out_block.ndim == 1:
out_block = np.expand_dims(out_block,2)
out_block[mask,...] = nodata

# if 2d, we send only the np.ndarray without masked data
else:
# create empty array with nodata value
out_block_shape = list(mask_block.shape[:-1])
out_block_shape.append(n_bands)
out_block = np.full(out_block_shape,nodata,np_dtype)


if isinstance(block,list):
block = [b[~mask,...].data for b in block]
if kwargs:
tmp_arr = fun(block,**kwargs)
else:
tmp_arr = fun(block)
else:
if kwargs:
tmp_arr = fun(block[~mask].data,**kwargs)
else:
tmp_arr = fun(block[~mask,...].data)

if tmp_arr.ndim == 1:
tmp_arr = tmp_arr.reshape(-1,1)

out_block[~mask,...] = tmp_arr

return out_block

# =============================================================================
# End of _process_block function
# =============================================================================
# Initalize the run
self._position = 0
size_value=size[:-1]
Expand All @@ -1472,8 +1375,12 @@ def _process_block(fun,kwargs,block,n_bands,nodata,np_dtype,return_3d):
else :
raise ValueError(' {} is not a valid value. Use for example 100M, 10G or 1T.'.format(size))

# to keep some memory space (because of outputs array, we multiply by 2 the input array memory)
length = min(int(size_value/self.size*self.itemsize/2),self.n_blocks)
# Compute the size needed in memory for each output function and input block

items_size = self.itemsize
for i in self._outputs:
items_size += i['itemsize']
length = min(int(size_value/self.size*items_size),self.n_blocks)

if self.verbose:
self.pb = ProgressBar(self.n_blocks, message=self.message)
Expand All @@ -1493,11 +1400,11 @@ def _process_block(fun,kwargs,block,n_bands,nodata,np_dtype,return_3d):
kwargs = output['kwargs']

if n_jobs > 1 or n_jobs < 0:
res = Parallel(n_jobs)(delayed(_process_block)(function,kwargs,self.get_block(idx_block,return_with_mask=True),output['n_bands'],output['nodata'],output['np_type'],self.return_3d) for idx,idx_block in enumerate(idx_blocks))
res = Parallel(n_jobs)(delayed(_process_block)(function,kwargs,self.get_block(idx_block,return_with_mask=True),output['n_bands'],output['nodata'],output['np_type'],self.return_3d,idx_block) for idx,idx_block in enumerate(idx_blocks))
else:
res=[]
for idx,idx_block in enumerate(idx_blocks):
res.append(_process_block(function,kwargs,self.get_block(idx_block,return_with_mask=True),output['n_bands'],output['nodata'],output['np_type'],self.return_3d))
res.append(_process_block(function,kwargs,self.get_block(idx_block,return_with_mask=True),output['n_bands'],output['nodata'],output['np_type'],self.return_3d,idx_block))

for idx_block, block in enumerate(res):
self.write_block(block, idx_blocks[idx_block], idx_output)
Expand All @@ -1518,6 +1425,7 @@ def _process_block(fun,kwargs,block,n_bands,nodata,np_dtype,return_3d):
if self.verbose:
self.pb.add_position(self.n_blocks)


def write_block(self, block, idx_block, idx_func):
"""
Write a block at a position on a output image
Expand Down Expand Up @@ -1559,6 +1467,125 @@ def write_block(self, block, idx_block, idx_func):
curBand.FlushCache()


# =============================================================================
# Begin_process_block function for RasterMath
# Function is outside of class in order to be used with Parallel
# =============================================================================
def _process_block(fun,kwargs,block,n_bands,nodata,np_dtype,return_3d,idx_block):
"""
Private function to compute external function per block.
In order to save with the size as the input block, this function need the input mask.
Parameters
----------
fun : function
kwargs : kwargs
False, or dict of kwargs.
block : np.ndarray or np.ma.MaskedArray
The function will compute using the given block
nodata : nodata, int or float
No Data value is some pixels are masked
np_dtype : numpy datatype
Numpy datatype for the output array
return_3d : boolean
2d-array or 3d-array.
Returns
-------
res : np.ndarray
The block to write
"""
tmp_arr = True

if isinstance(block,list):
mask_block=block[0].mask
else:
mask_block=block.mask

# if everything is masked
if np.all(mask_block == True):
mask = True
out_block = nodata
# if everything is not masked
elif np.all(mask_block == False):
mask = False

# if part masked, part unmasked
elif np.any(mask_block == False):
tmp_arr = mask_block.size
if return_3d:
if mask_block.ndim > 2:
mask = mask_block[...,0]
elif mask_block.ndim == 1:
mask = mask_block
else:
mask = mask_block[...,0]

# if block is not fully masked
if mask is not True:

# if no mask, we send the np.ndarray only
if mask is False:
if isinstance(block,list):
block = [b.data for b in block]
else:
block = block.data
if kwargs:
out_block = fun(block,**kwargs)
else:
out_block = fun(block)

# if part is masked
else:
# if 3d we send the np.ma.MaskedArray
if return_3d:
if kwargs:
out_block = fun(block,**kwargs)
else:
out_block = fun(block)

if out_block.ndim == 1:
out_block = np.expand_dims(out_block,2)
out_block[mask,...] = nodata

# if 2d, we send only the np.ndarray without masked data
else:
# create empty array with nodata value
if mask_block.ndim > 1:
mbs = mask_block.shape[:-1]
else:
mbs = mask_block.shape

out_block_shape = list(mbs)
out_block_shape.append(n_bands)
out_block = np.full(out_block_shape,nodata,np_dtype)


if isinstance(block,list):
block = [b[~mask,...].data for b in block]
if kwargs:
tmp_arr = fun(block,**kwargs)
else:
tmp_arr = fun(block)
else:
if kwargs:
tmp_arr = fun(block[~mask].data,**kwargs)
else:
tmp_arr = fun(block[~mask,...].data)

if tmp_arr.ndim == 1:
tmp_arr = tmp_arr.reshape(-1,1)

out_block[~mask,...] = tmp_arr


return out_block

# =============================================================================
# End of _process_block function
# =============================================================================



Expand Down
2 changes: 0 additions & 2 deletions paper.bib
@@ -1,4 +1,3 @@

@article{karasiak_2019,
title = {Statistical stability and spatial unstability in prediction of forest tree species using satellite image time series},
journal = {Remote Sensing},
Expand Down Expand Up @@ -27,7 +26,6 @@ @article{olofsson_good_2014
volume = {148},
issn = {0034-4257},
doi = {10.1016/j.rse.2014.02.015},
abstract = {The remote sensing science and application communities have developed increasingly reliable, consistent, and robust approaches for capturing land dynamics to meet a range of information needs. Statistically robust and transparent approaches for assessing accuracy and estimating area of change are critical to ensure the integrity of land change information. We provide practitioners with a set of “good practice” recommendations for designing and implementing an accuracy assessment of a change map and estimating area based on the reference sample data. The good practice recommendations address the three major components: sampling design, response design and analysis. The primary good practice recommendations for assessing accuracy and estimating area are: (i) implement a probability sampling design that is chosen to achieve the priority objectives of accuracy and area estimation while also satisfying practical constraints such as cost and available sources of reference data; (ii) implement a response design protocol that is based on reference data sources that provide sufficient spatial and temporal representation to accurately label each unit in the sample (i.e., the “reference classification” will be considerably more accurate than the map classification being evaluated); (iii) implement an analysis that is consistent with the sampling design and response design protocols; (iv) summarize the accuracy assessment by reporting the estimated error matrix in terms of proportion of area and estimates of overall accuracy, user's accuracy (or commission error), and producer's accuracy (or omission error); (v) estimate area of classes (e.g., types of change such as wetland loss or types of persistence such as stable forest) based on the reference classification of the sample units; (vi) quantify uncertainty by reporting confidence intervals for accuracy and area parameters; (vii) evaluate variability and potential error in the reference classification; and (viii) document deviations from good practice that may substantially affect the results. An example application is provided to illustrate the recommended process.},
urldate = {2019-02-18},
journal = {Remote Sensing of Environment},
author = {Olofsson, Pontus and Foody, Giles M. and Herold, Martin and Stehman, Stephen V. and Woodcock, Curtis E. and Wulder, Michael A.},
Expand Down

0 comments on commit d4a87ad

Please sign in to comment.