/
raster.py
1892 lines (1466 loc) · 63.4 KB
/
raster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import print_function
import math
import tempfile
from collections import Counter
from collections import namedtuple
from copy import deepcopy
from functools import partial
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
import rasterio.mask
import rasterio.plot
import concurrent.futures
from mpl_toolkits.axes_grid1 import make_axes_locatable
from rasterio.transform import Affine
from rasterio.warp import calculate_default_transform, reproject
from rasterio.windows import Window
from tqdm import tqdm
from .base import (BaseRaster, _file_path_tempfile, _get_nodata,
_get_num_workers)
from .indexing import _LocIndexer, _iLocIndexer
from .rasterlayer import RasterLayer
class Raster(BaseRaster):
"""
Flexible class that represents a collection of file-based GDAL-supported
raster datasets which share a common coordinate reference system and
geometry.
Raster objects encapsulate RasterLayer objects, which represent single band
raster datasets that can physically be represented by either separate
single-band raster files, multi-band raster files, or any combination of
individual bands from multi-band raster and single-band raster datasets.
Methods defined in a Raster class comprise those that would typically
applied to a stack of raster datasets. In addition, these methods always
return a new Raster object.
"""
def __init__(self, src=None, arr=None, crs=None, transform=None,
nodata=None, mode='r', file_path=None):
"""
Initiate a new Raster object.
Parameters
----------
src : file path, RasterLayer, or rasterio dataset (opt)
Initiate a Raster object from any combination of a file path or
list of file paths to GDAL-supported raster datasets, RasterLayer
objects, or directly from a rasterio dataset or band object that is
opened in 'r' or 'rw' mode.
arr : ndarray (opt)
Whether to initiate a Raster object from a numpy.ndarray.
Additional arguments `crs` and `transform` should also be provided
to supply spatial coordinate information. Parameters `arr` and
`src` are mutually-exclusive.
crs : rasterio.crs.CRS object (opt)
CRS object containing projection information for data if provided
by the associated `arr` parameter.
transform : affine.Affine object (opt)
Affine object containing transform information for data if provided
by the associated `arr` parameter.
nodata : any number (opt)
Assign a nodata value to the Raster dataset when `arr` is used
for initiation. If a nodata value is not specified then it is
determined based on the minimum permissible value for the array's
data type.
mode : str (opt). Default is 'r'
Mode used to open the raster datasets. Can be one of 'r', 'r+' or
'rw'.
file_path : str (opt)
Path to save new Raster object if created from `arr`.
Returns
-------
pyspatialml.Raster
Raster object containing the src layers stacked into a single
object
"""
self.loc = _LocIndexer(self)
self.iloc = _iLocIndexer(self, self.loc)
self.files = []
self.dtypes = []
self.nodatavals = []
self.count = 0
self.res = None
self.meta = None
self._block_shape = (256, 256)
self.max_memory = 5e+09
# some checks
if src and arr:
raise ValueError('Arguments src and arr are mutually exclusive')
if mode not in ['r', 'r+', 'w']:
raise ValueError("mode must be one of 'r', 'r+', or 'w'")
# initiate from array
if arr is not None:
if file_path is None:
file_path = tempfile.NamedTemporaryFile().name
with rasterio.open(
fp=file_path, mode='w', driver='GTiff', height=arr.shape[1],
width=arr.shape[2], count=arr.shape[0],
dtype=arr.dtype, crs=crs, transform=transform,
nodata=nodata) as dst:
dst.write(arr)
src = [file_path]
if not isinstance(src, list):
src = [src]
src_layers = []
# initiated from file paths
if all(isinstance(x, str) for x in src):
for f in src:
r = rasterio.open(f, mode=mode)
for i in range(r.count):
band = rasterio.band(r, i+1)
src_layers.append(RasterLayer(band))
# initiate from RasterLayer objects
elif all(isinstance(x, RasterLayer)
for x in src):
src_layers = src
# initiate from rasterio.io.datasetreader
elif all(isinstance(x, rasterio.io.DatasetReader) for x in src):
for r in src:
for i in range(r.count):
band = rasterio.band(r, i + 1)
src_layers.append(RasterLayer(band))
# initiate from rasterio.band objects
elif all(isinstance(x, rasterio.Band) for x in src):
for band in src:
src_layers.append(RasterLayer(band))
# otherwise raise error
elif all(isinstance(x, type(x[0])) for x in src):
raise ValueError(
'Cannot initiated a Raster from a list of different type '
'objects')
# call property with a list of rasterio.band objects
self._layers = src_layers
def __getitem__(self, key):
"""
Subset the Raster object using a label or list of labels.
Parameters
----------
key : str or list
Key-based indexing of RasterLayer objects within the Raster.
Returns
-------
Raster
A new Raster object only containing the subset of layers specified
in the label argument.
"""
if isinstance(key, str):
key = [key]
subset_layers = []
for i in key:
if i in self.names is False:
raise KeyError('layername not present in Raster object')
else:
subset_layers.append(self.loc[i])
subset_raster = Raster(subset_layers)
return subset_raster
def __setitem__(self, key, value):
"""
Replace a RasterLayer within the Raster object with a new
RasterLayer.
Note that this modifies the Raster object in place.
Parameters
----------
key : str
Key-based index of layer to be replaced.
value : pyspatialml.RasterLayer
RasterLayer object to use for replacement.
"""
if isinstance(value, RasterLayer):
self.loc[key] = value
self.iloc[self.names.index(key)] = value
setattr(self, key, value)
else:
raise ValueError('value is not a RasterLayer object')
def __iter__(self):
"""
Iterate over RasterLayers.
"""
return iter(self.loc.items())
def close(self):
"""
Close all of the RasterLayer objects in the Raster.
Note that this will cause any rasters based on temporary files to be
removed. This is intended as a method of clearing temporary files that
may have accumulated during an analysis session.
"""
for layer in self.iloc:
layer.close()
@staticmethod
def _check_alignment(layers):
"""
Check that a list of raster datasets are aligned with the same pixel
dimensions and geotransforms.
Parameters
----------
layers : list
List of pyspatialml.RasterLayer objects.
Returns
-------
dict or False
Dict of metadata if all layers are spatially aligned, otherwise
returns False.
"""
src_meta = []
for layer in layers:
src_meta.append(layer.ds.meta.copy())
if not all(i['crs'] == src_meta[0]['crs'] for i in src_meta):
Warning('crs of all rasters does not match, '
'possible unintended consequences')
if not all([i['height'] == src_meta[0]['height'] or
i['width'] == src_meta[0]['width'] or
i['transform'] == src_meta[0]['transform']
for i in src_meta]):
return False
else:
return src_meta[0]
@staticmethod
def _fix_names(combined_names):
"""
Adjusts the names of pyspatialml.RasterLayer objects within the
Raster when appending new layers. This avoids the Raster object
containing duplicated names in the case that multiple RasterLayer's are
appended with the same name.
In the case of duplicated names, the RasterLayer names are appended
with a `_n` with n = 1, 2, 3 .. n.
Parameters
----------
combined_names : list
List of str representing names of RasterLayers. Any duplicates with
have a suffix appended to them.
Returns
-------
list
List with adjusted names
"""
counts = Counter(combined_names)
for s, num in counts.items():
if num > 1:
for suffix in range(1, num + 1):
if s + "_" + str(suffix) not in combined_names:
combined_names[combined_names.index(s)] = (
s + "_" + str(suffix))
else:
i = 1
while s + "_" + str(i) in combined_names:
i += 1
combined_names[combined_names.index(s)] = (
s + "_" + str(i))
return combined_names
@property
def block_shape(self):
"""
Return the windows size used for raster calculations, specified as a
tuple (rows, columns).
Returns
-------
tuple
Block window shape that is currently set for the Raster as a tuple
in the format of (n_rows, n_columns) in pixels.
"""
return self._block_shape
@block_shape.setter
def block_shape(self, value):
"""
Set the windows size used for raster calculations, specified as a
tuple (rows, columns).
Parameters
----------
value : tuple
Tuple of integers for default block shape to read and write
data from the Raster object for memory-safe calculations.
Specified as (n_rows, n_columns).
"""
if not isinstance(value, tuple):
raise ValueError('block_shape must be set using an integer tuple '
'as (rows, cols)')
rows, cols = value
if not isinstance(rows, int) or not isinstance(cols, int):
raise ValueError('tuple must consist of integer values referring '
'to number of rows, cols')
self._block_shape = (rows, cols)
@property
def names(self):
"""
Return the names of the RasterLayers in the Raster object.
Returns
-------
list
List of names of RasterLayer objects
"""
return list(self.loc.keys())
@property
def _layers(self):
"""
Getter method.
Returns
-------
pyspatialml.indexing._LocIndexer
Returns a dict of key-value pairs of names and RasterLayers.
"""
return self.loc
@_layers.setter
def _layers(self, layers):
"""
Setter method for the files attribute in the Raster object.
Parameters
----------
layers : RasterLayer or list of RasterLayers
RasterLayers used to initiate a Raster object.
"""
# some checks
if isinstance(layers, RasterLayer):
layers = [layers]
if all(isinstance(x, type(layers[0])) for x in layers) is False:
raise ValueError(
'Cannot create a Raster object from a mixture of input types')
meta = self._check_alignment(layers)
if meta is False:
raise ValueError(
'Raster datasets do not all have the same dimensions or '
'transform')
# reset existing attributes
for name in self.names:
delattr(self, name)
self.loc = _LocIndexer(self)
self.iloc = _iLocIndexer(self, self.loc)
self.files = []
self.dtypes = []
self.nodatavals = []
# update global Raster object attributes with new values
self.count = len(layers)
self.width = meta['width']
self.height = meta['height']
self.shape = (self.height, self.width)
self.transform = meta['transform']
self.res = (abs(meta['transform'].a), abs(meta['transform'].e))
self.crs = meta['crs']
bounds = rasterio.transform.array_bounds(
self.height,
self.width,
self.transform)
BoundingBox = namedtuple(
'BoundingBox', ['left', 'bottom', 'right', 'top'])
self.bounds = BoundingBox(bounds[0], bounds[1], bounds[2], bounds[3])
names = [i.names[0] for i in layers]
names = self._fix_names(names)
# update attributes per dataset
for layer, name in zip(layers, names):
self.dtypes.append(layer.dtype)
self.nodatavals.append(layer.nodata)
self.files.append(layer.file)
layer.names = [name]
self.loc[name] = layer
setattr(self, name, self.loc[name])
self.meta = dict(crs=self.crs,
transform=self.transform,
width=self.width,
height=self.height,
count=self.count,
dtype=np.find_common_type(self.dtypes, []))
def read(self, masked=False, window=None, out_shape=None,
resampling='nearest', as_df=False, **kwargs):
"""
Reads data from the Raster object into a numpy array.
Overrides read BaseRaster class read method and replaces it with a
method that reads from multiple RasterLayer objects.
Parameters
----------
masked : bool (opt). Default is False
Read data into a masked array.
window : rasterio.window.Window object (opt)
Tuple of col_off, row_off, width, height of a window of data
to read a chunk of data into a ndarray.
out_shape : tuple (opt)
Shape of shape of array (rows, cols) to read data into using
decimated reads.
resampling : str (opt). Default is 'nearest'
Resampling method to use when applying decimated reads when
out_shape is specified. Supported methods are: 'average',
'bilinear', 'cubic', 'cubic_spline', 'gauss', 'lanczos',
'max', 'med', 'min', 'mode', 'q1', 'q3'.
as_df : bool (opt). Default is False
Whether to return the data as a pandas.DataFrame with columns
named by the RasterLayer names. Can be useful when using
scikit-learn ColumnTransformer to select columns based on
names rather than keeping track of indexes.
**kwargs : dict
Other arguments to pass to rasterio.DatasetReader.read method
Returns
-------
ndarray
Raster values in 3d ndarray with the dimensions in order of
(band, row, and column).
"""
dtype = self.meta['dtype']
# get window to read from window or height/width of dataset
if window is None:
width = self.width
height = self.height
else:
width = window.width
height = window.height
# decimated reads using nearest neighbor resampling
if out_shape:
height, width = out_shape
# read bands separately into numpy array
if masked is True:
arr = np.ma.zeros((self.count, height, width), dtype=dtype)
else:
arr = np.zeros((self.count, height, width), dtype=dtype)
for i, layer in enumerate(self.iloc):
arr[i, :, :] = layer.read(
masked=masked,
window=window,
out_shape=out_shape,
resampling=resampling,
**kwargs)
if masked is True:
arr[i, :, :] = np.ma.MaskedArray(
data=arr[i, :, :],
mask=np.isfinite(arr[i, :, :]).mask)
if as_df is True:
arr_flat = arr.reshape((arr.shape[1]*arr.shape[2], arr.shape[0]))
df = pd.DataFrame(data=arr_flat, columns=self.names)
return df
return arr
def write(self, file_path, driver="GTiff", dtype=None, nodata=None):
"""
Write the Raster object to a file.
Overrides the write RasterBase class method, which is a partial
function of the rasterio.DatasetReader.write method.
Parameters
----------
file_path : str
File path used to save the Raster object.
driver : str (opt). Default is 'GTiff'
Name of GDAL driver used to save Raster data.
dtype : str (opt)
Optionally specify a numpy compatible data type when saving to
file. If not specified, a data type is selected based on the data
types of RasterLayers in the Raster object.
nodata : any number (opt)
Optionally assign a new nodata value when saving to file. If not
specified a nodata value based on the minimum permissible value for
the data types of RasterLayers in the Raster object is used. Note
that this does not change the pixel nodata values of the raster, it
only changes the metadata of what value represents a nodata pixel.
Returns
-------
Raster
New Raster object from saved file.
"""
if dtype is None:
dtype = self.meta['dtype']
if nodata is None:
nodata = _get_nodata(dtype)
meta = self.meta
meta['driver'] = driver
meta['nodata'] = nodata
meta['dtype'] = dtype
with rasterio.open(file_path, mode='w', **meta) as dst:
for i, layer in enumerate(self.iloc):
arr = layer.read()
arr[arr == layer.nodata] = nodata
dst.write(arr.astype(dtype), i+1)
raster = self._newraster(file_path, self.names)
return raster
def to_pandas(self, max_pixels=50000, resampling='nearest'):
"""
Raster to pandas DataFrame.
Parameters
----------
max_pixels: int (opt). Default is 50000
Maximum number of pixels to sample.
resampling : str (opt). Default is 'nearest'
Resampling method to use when applying decimated reads when
out_shape is specified. Supported methods are: 'average',
'bilinear', 'cubic', 'cubic_spline', 'gauss', 'lanczos',
'max', 'med', 'min', 'mode', 'q1', 'q3'/
Returns
-------
pandas.DataFrame
DataFrame containing values of names of RasterLayers in the Raster
as columns, and pixel values as rows.
"""
n_pixels = self.shape[0] * self.shape[1]
scaling = max_pixels / n_pixels
# read dataset using decimated reads
out_shape = (round(self.shape[0] * scaling),
round(self.shape[1] * scaling))
arr = self.read(masked=True,
out_shape=out_shape,
resampling=resampling)
# x and y grid coordinate arrays
x_range = np.linspace(start=self.bounds.left,
stop=self.bounds.right,
num=arr.shape[2])
y_range = np.linspace(start=self.bounds.top,
stop=self.bounds.bottom,
num=arr.shape[1])
xs, ys = np.meshgrid(x_range, y_range)
arr = arr.reshape((arr.shape[0], arr.shape[1] * arr.shape[2]))
arr = arr.transpose()
df = pd.DataFrame(
data=np.column_stack((xs.flatten(), ys.flatten(), arr)),
columns=['x', 'y'] + self.names)
# set nodata values to nan
for i, col_name in enumerate(self.names):
df.loc[df[col_name] == self.nodatavals[i], col_name] = np.nan
return df
def predict_proba(self, estimator, file_path=None, indexes=None,
driver='GTiff', dtype=None, nodata=None,
progress=False):
"""
Apply class probability prediction of a scikit learn model to a
Raster.
Parameters
----------
estimator : estimator object implementing 'fit'
The object to use to fit the data.
file_path : str (opt)
Path to a GeoTiff raster for the prediction results. If not
specified then the output is written to a temporary file.
indexes : list of integers (opt)
List of class indices to export. In some circumstances, only a
subset of the class probability estimations are desired, for
instance when performing a binary classification only the
probabilities for the positive class may be desired.
driver : str (opt). Default is 'GTiff'
Named of GDAL-supported driver for file export.
dtype : str (opt)
Optionally specify a numpy compatible data type when saving to
file. If not specified, a data type is set based on the data type
of the prediction.
nodata : any number (opt)
Nodata value for file export. If not specified then the nodata
value is derived from the minimum permissible value for the given
data type.
progress : bool (opt). Default is False
Show progress bar for prediction.
Returns
-------
pyspatialml.Raster
Raster containing predicted class probabilities. Each predicted
class is represented by a RasterLayer object. The RasterLayers are
named `prob_n` for 1,2,3..n, with `n` based on the index position
of the classes, not the number of the class itself.
For example, a classification model predicting classes with integer
values of 1, 3, and 5 would result in three RasterLayers named
prob_1, prob_2 and prob_3.
"""
file_path, tfile = _file_path_tempfile(file_path)
predfun = self._probfun
# determine output count
if isinstance(indexes, int):
indexes = range(indexes, indexes + 1)
elif indexes is None:
window = Window(0, 0, self.width, 1)
img = self.read(masked=True, window=window)
n_features, rows, cols = img.shape[0], img.shape[1], img.shape[2]
n_samples = rows * cols
flat_pixels = img.transpose(
1, 2, 0).reshape((n_samples, n_features))
result = estimator.predict_proba(flat_pixels)
indexes = np.arange(0, result.shape[1])
if dtype is None:
dtype = result.dtype
if nodata is None:
nodata = _get_nodata(dtype)
# open output file with updated metadata
meta = deepcopy(self.meta)
meta.update(driver=driver, count=len(
indexes), dtype=dtype, nodata=nodata)
with rasterio.open(file_path, 'w', **meta) as dst:
# define windows
windows = [window for ij, window in dst.block_windows()]
# generator gets raster arrays for each window
data_gen = (self.read(window=window, masked=True)
for window in windows)
if progress is True:
for window, arr, pbar in zip(windows, data_gen, tqdm(windows)):
result = predfun(arr, estimator)
result = np.ma.filled(result, fill_value=nodata)
dst.write(result[indexes, :, :].astype(
dtype), window=window)
else:
for window, arr in zip(windows, data_gen):
result = predfun(arr, estimator)
result = np.ma.filled(result, fill_value=nodata)
dst.write(result[indexes, :, :].astype(
dtype), window=window)
# generate layer names
prefix = "prob_"
names = [prefix + str(i) for i in range(len(indexes))]
# create new raster object
new_raster = self._newraster(file_path, names)
if tfile is not None:
for layer in new_raster.iloc:
layer.close = tfile.close
return new_raster
def predict(self, estimator, file_path=None, driver='GTiff', dtype=None,
nodata=None, n_jobs=-1, progress=False):
"""
Apply prediction of a scikit learn model to a Raster. The model can
represent any scikit learn model or compatible api with a `fit` and
`predict` method. These can consist of classification or regression
models. Multi-class classifications and multi-target regressions are
also supported.
Parameters
----------
estimator : estimator object implementing 'fit'
The object to use to fit the data.
file_path : str (opt)
Path to a GeoTiff raster for the prediction results. If not
specified then the output is written to a temporary file.
driver : str (opt). Default is 'GTiff'
Named of GDAL-supported driver for file export
dtype : str (opt)
Optionally specify a numpy compatible data type when saving to file.
If not specified, a data type is set based on the data types of
the prediction.
nodata : any number (opt)
Nodata value for file export. If not specified then the nodata
value is derived from the minimum permissible value for the given
data type.
n_jobs : int
Number of processing cores to use for parallel execution. Default
is n_jobs=1. -1 is all cores; -2 is all cores -1.
progress : bool (opt). Default is False
Show progress bar for prediction.
Returns
-------
pyspatial.Raster
Raster object containing prediction results as a RasterLayers.
For classification and regression models, the Raster will contain a
single RasterLayer, unless the model is multi-class or
multi-target. Layers are named automatically as `pred_raw_n` with
n = 1, 2, 3 ..n.
"""
file_path, tfile = _file_path_tempfile(file_path)
n_jobs = _get_num_workers(n_jobs)
# determine output count for multi output cases
window = Window(0, 0, self.width, 1)
img = self.read(masked=True, window=window)
n_features, rows, cols = img.shape[0], img.shape[1], img.shape[2]
n_samples = rows * cols
flat_pixels = img.transpose(1, 2, 0).reshape((n_samples, n_features))
result = estimator.predict(flat_pixels)
if result.ndim > 1:
n_outputs = result.shape[result.ndim-1]
else:
n_outputs = 1
indexes = np.arange(0, n_outputs)
# chose prediction function
if len(indexes) == 1:
predfun = partial(self._predfun, estimator=estimator)
else:
predfun = partial(self._predfun_multioutput, estimator=estimator)
if dtype is None:
dtype = result.dtype
if nodata is None:
nodata = _get_nodata(dtype)
# open output file with updated metadata
meta = deepcopy(self.meta)
meta.update(driver=driver, count=len(
indexes), dtype=dtype, nodata=nodata)
with rasterio.open(file_path, 'w', **meta) as dst:
# define windows
# windows = [window for ij, window in dst.block_windows()]
windows = [window for window in
self.block_shapes(*self._block_shape)]
# generator gets raster arrays for each window
data_gen = (
self.read(window=window, masked=True)
for window in windows)
with concurrent.futures.ThreadPoolExecutor(max_workers=n_jobs) as executor:
if progress is True:
for window, result, pbar in zip(windows, executor.map(predfun, data_gen), tqdm(windows)):
result = np.ma.filled(result, fill_value=nodata)
dst.write(result[indexes, :, :].astype(
dtype), window=window)
else:
for window, result in zip(windows, executor.map(predfun, data_gen)):
result = np.ma.filled(result, fill_value=nodata)
dst.write(result[indexes, :, :].astype(
dtype), window=window)
# generate layer names
prefix = "pred_raw_"
names = [prefix + str(i) for i in range(len(indexes))]
# create new raster object
new_raster = self._newraster(file_path, names)
if tfile is not None:
for layer in new_raster.iloc:
layer.close = tfile.close
return new_raster
def _predfun(self, img, estimator):
"""
Prediction function for classification or regression response.
Parameters
----
img : numpy.ndarray
3d ndarray of raster data with the dimensions in order of
(band, rows, columns).
estimator : estimator object implementing 'fit'
The object to use to fit the data.
Returns
-------
numpy.ndarray
2d numpy array representing a single band raster containing the
classification or regression result.
"""
n_features, rows, cols = img.shape[0], img.shape[1], img.shape[2]
# reshape each image block matrix into a 2D matrix
# first reorder into rows,cols,bands(transpose)
# then resample into 2D array (rows=sample_n, cols=band_values)
n_samples = rows * cols
flat_pixels = img.transpose(1, 2, 0).reshape((n_samples, n_features))
# create mask for NaN values and replace with number
flat_pixels_mask = flat_pixels.mask.copy()
# predict and replace masl
result_cla = estimator.predict(flat_pixels)
result_cla = np.ma.masked_array(
data=result_cla, mask=flat_pixels_mask.any(axis=1))
# reshape the prediction from a 1D into 3D array [band, row, col]
result_cla = result_cla.reshape((1, rows, cols))
return result_cla
@staticmethod
def _probfun(img, estimator):
"""
Class probabilities function.
Parameters
----------
img : numpy.ndarray
3d numpy array of raster data with the dimensions in order of
(band, row, column).
estimator : estimator object implementing 'fit'
The object to use to fit the data.
Returns
-------
numpy.ndarray
Multi band raster as a 3d numpy array containing the probabilities
associated with each class. ndarray dimensions are in the order of
(class, row, column).
"""
n_features, rows, cols = img.shape[0], img.shape[1], img.shape[2]
mask2d = img.mask.any(axis=0)
# reshape each image block matrix into a 2D matrix
# first reorder into rows,cols,bands(transpose)
# then resample into 2D array (rows=sample_n, cols=band_values)
n_samples = rows * cols
flat_pixels = img.transpose(1, 2, 0).reshape((n_samples, n_features))
# predict probabilities
result_proba = estimator.predict_proba(flat_pixels)
# reshape class probabilities back to 3D image [iclass, rows, cols]
result_proba = result_proba.reshape(
(rows, cols, result_proba.shape[1]))
# reshape band into rasterio format [band, row, col]
result_proba = result_proba.transpose(2, 0, 1)
# repeat mask for n_bands
mask3d = np.repeat(a=mask2d[np.newaxis, :, :],
repeats=result_proba.shape[0], axis=0)
# convert proba to masked array
result_proba = np.ma.masked_array(
result_proba,
mask=mask3d,
fill_value=np.nan)
return result_proba
@staticmethod
def _predfun_multioutput(img, estimator):
"""
Multi-target prediction function.
Parameters
----------
img : numpy.ndarray
3d numpy array of raster data with the dimensions in order of
(band, row, column).
estimator : estimator object implementing 'fit'
The object to use to fit the data.
Returns
-------
numpy.ndarray
3d numpy array representing the multi-target prediction result with
the dimensions in the order of (target, row, column).
"""
n_features, rows, cols = img.shape[0], img.shape[1], img.shape[2]
mask2d = img.mask.any(axis=0)
# reshape each image block matrix into a 2D matrix
# first reorder into rows,cols,bands(transpose)
# then resample into 2D array (rows=sample_n, cols=band_values)
n_samples = rows * cols
flat_pixels = img.transpose(1, 2, 0).reshape((n_samples, n_features))
# predict probabilities
result = estimator.predict(flat_pixels)
# reshape class probabilities back to 3D image [iclass, rows, cols]
result = result.reshape((rows, cols, result.shape[1]))