/
hra.py
2304 lines (2031 loc) · 98.9 KB
/
hra.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Habitat risk assessment (HRA) model for InVEST."""
# -*- coding: UTF-8 -*-
import collections
import itertools
import logging
import math
import os
import re
import shutil
import numpy
import pandas
import pygeoprocessing
import taskgraph
from osgeo import gdal
from osgeo import ogr
from osgeo import osr
from . import datastack
from . import gettext
from . import spec_utils
from . import utils
from . import validation
from .model_metadata import MODEL_METADATA
from .spec_utils import u
LOGGER = logging.getLogger(__name__)
# Parameters to be used in dataframe and output stats CSV
_HABITAT_HEADER = 'HABITAT'
_STRESSOR_HEADER = 'STRESSOR'
_TOTAL_REGION_NAME = 'Total Region'
# Parameters for the spatially explicit criteria shapefiles
_RATING_FIELD = 'rating'
# Target cell type or values for raster files.
_TARGET_GDAL_TYPE_FLOAT32 = gdal.GDT_Float32
_TARGET_GDAL_TYPE_BYTE = gdal.GDT_Byte
_TARGET_NODATA_FLOAT32 = float(numpy.finfo(numpy.float32).min)
_TARGET_NODATA_BYTE = 255 # for unsigned 8-bit int
# ESPG code for warping rasters to WGS84 coordinate system.
_WGS84_SRS = osr.SpatialReference()
_WGS84_SRS.ImportFromEPSG(4326)
_WGS84_WKT = _WGS84_SRS.ExportToWkt()
# Resampling method for rasters.
_RESAMPLE_METHOD = 'near'
# An argument list that will be passed to the GTiff driver. Useful for
# blocksizes, compression, and more.
_DEFAULT_GTIFF_CREATION_OPTIONS = (
'TILED=YES', 'BIGTIFF=YES', 'COMPRESS=DEFLATE',
'BLOCKXSIZE=256', 'BLOCKYSIZE=256')
ARGS_SPEC = {
"model_name": MODEL_METADATA["habitat_risk_assessment"].model_title,
"pyname": MODEL_METADATA["habitat_risk_assessment"].pyname,
"userguide": MODEL_METADATA["habitat_risk_assessment"].userguide,
"args": {
"workspace_dir": spec_utils.WORKSPACE,
"results_suffix": spec_utils.SUFFIX,
"n_workers": spec_utils.N_WORKERS,
"info_table_path": {
"name": gettext("habitat stressor table"),
"about": gettext("A table describing each habitat and stressor."),
"type": "csv",
"columns": {
"name": {
"type": "freestyle_string",
"about": gettext(
"A unique name for each habitat or stressor. These "
"names must match the habitat and stressor names in "
"the Criteria Scores Table.")},
"path": {
"type": {"vector", "raster"},
"bands": {1: {
"type": "number",
"units": u.none,
"about": gettext(
"Pixel values are 1, indicating presence of the "
"habitat/stressor, or 0 indicating absence. Any "
"values besides 0 or 1 will be treated as 0.")
}},
"fields": {},
"geometries": spec_utils.POLYGONS,
"about": gettext(
"Map of where the habitat or stressor exists. For "
"rasters, a pixel value of 1 indicates presence of "
"the habitat or stressor. 0 (or any other value) "
"indicates absence of the habitat or stressor. For "
"vectors, a polygon indicates an area where the "
"habitat or stressor is present.")
},
"type": {
"type": "option_string",
"options": {
"habitat": {"description": gettext("habitat")},
"stressor": {"description": gettext("stressor")}
},
"about": gettext(
"Whether this row is for a habitat or a stressor.")
},
"stressor buffer (meters)": {
"type": "number",
"units": u.meter,
"about": gettext(
"The desired buffer distance used to expand a given "
"stressor’s influence or footprint. This should be "
"left blank for habitats, but must be filled in for "
"stressors. Enter 0 if no buffering is desired for a "
"given stressor. The model will round down this "
"buffer distance to the nearest cell unit. e.g., a "
"buffer distance of 600m will buffer a stressor’s "
"footprint by two grid cells if the resolution of "
"analysis is 250m.")
}
},
"excel_ok": True
},
"criteria_table_path": {
"name": gettext("criteria scores table"),
"about": gettext(
"A table of criteria scores for all habitats and stressors."),
"type": "csv",
"excel_ok": True,
},
"resolution": {
"name": gettext("resolution of analysis"),
"about": gettext(
"The resolution at which to run the analysis. The model "
"outputs will have this resolution."),
"type": "number",
"units": u.meter,
"expression": "value > 0",
},
"max_rating": {
"name": gettext("maximum criteria score"),
"about": gettext(
"The highest possible criteria score in the scoring system."),
"type": "number",
"units": u.none,
"expression": "value > 0"
},
"risk_eq": {
"name": gettext("risk equation"),
"about": gettext(
"The equation to use to calculate risk from exposure and "
"consequence."),
"type": "option_string",
"options": {
"multiplicative": {"display_name": gettext("Multiplicative")},
"euclidean": {"display_name": gettext("Euclidean")}
}
},
"decay_eq": {
"name": gettext("decay equation"),
"about": gettext(
"The equation to model effects of stressors in buffer areas."),
"type": "option_string",
"options": {
"none": {
"display_name": gettext("None"),
"description": gettext(
"No decay. Stressor has full effect in the buffer "
"area.")},
"linear": {
"display_name": gettext("Linear"),
"description": gettext(
"Stressor effects in the buffer area decay linearly "
"with distance from the stressor.")},
"exponential": {
"display_name": gettext("Exponential"),
"description": gettext(
"Stressor effects in the buffer area decay "
"exponentially with distance from the stressor.")}
}
},
"aoi_vector_path": {
**spec_utils.AOI,
"projected": True,
"projection_units": u.meter,
"fields": {
"name": {
"required": False,
"type": "freestyle_string",
"about": gettext(
"Uniquely identifies each feature. Required if "
"the vector contains more than one feature.")
}
},
"about": gettext(
"A GDAL-supported vector file containing features "
"representing one or more planning regions or subregions."),
},
"n_overlapping_stressors": {
"name": gettext("Number of Overlapping Stressors"),
"type": "number",
"required": True,
"about": gettext(
"The number of overlapping stressors to consider as "
"'maximum' when reclassifying risk scores into "
"high/medium/low. Affects the breaks between risk "
"classifications."),
"units": u.none,
"expression": "value > 0",
},
"visualize_outputs": {
"name": gettext("Generate GeoJSONs"),
"about": gettext(
"Generate GeoJSON outputs for web visualization."),
"type": "boolean",
"required": False,
}
}
}
_VALID_RISK_EQS = set(ARGS_SPEC['args']['risk_eq']['options'].keys())
_VALID_DECAY_TYPES = set(ARGS_SPEC['args']['decay_eq']['options'].keys())
def execute(args):
"""Habitat Risk Assessment.
Args:
args['workspace_dir'] (str): a path to the output workspace folder.
It will overwrite any files that exist if the path already exists.
args['results_suffix'] (str): a string appended to each output file
path. (optional)
args['info_table_path'] (str): a path to the CSV or Excel file that
contains the name of the habitat (H) or stressor (s) on the
``NAME`` column that matches the names in criteria_table_path.
Each H/S has its corresponding vector or raster path on the
``PATH`` column. The ``STRESSOR BUFFER (meters)`` column should
have a buffer value if the ``TYPE`` column is a stressor.
args['criteria_table_path'] (str): a path to the CSV or Excel file that
contains the set of criteria ranking of each stressor on each
habitat.
args['resolution'] (int): a number representing the desired pixel
dimensions of output rasters in meters.
args['max_rating'] (str, int or float): a number representing the
highest potential value that should be represented in rating in the
criteria scores table.
args['risk_eq'] (str): a string identifying the equation that should be
used in calculating risk scores for each H-S overlap cell. This
will be either 'Euclidean' or 'Multiplicative'.
args['decay_eq'] (str): a string identifying the equation that should
be used in calculating the decay of stressor buffer influence. This
can be 'None', 'Linear', or 'Exponential'.
args['aoi_vector_path'] (str): a path to the shapefile containing one
or more planning regions used to get the average risk value for
each habitat-stressor combination over each area. Optionally, if
each of the shapefile features contain a 'name' field, it will
be used as a way of identifying each individual shape.
args['n_overlapping_stressors'] (number): This number will be used
in risk reclassification instead of the calculated maximum
number of stressor layers that overlap.
args['n_workers'] (int): the number of worker processes to
use for processing this model. If omitted, computation will take
place in the current process. (optional)
args['visualize_outputs'] (bool): if True, create output GeoJSONs and
save them in a visualization_outputs folder, so users can visualize
results on the web app. Default to True if not specified.
(optional)
Returns:
None.
"""
intermediate_dir = os.path.join(args['workspace_dir'],
'intermediate_outputs')
output_dir = os.path.join(args['workspace_dir'], 'outputs')
taskgraph_working_dir = os.path.join(args['workspace_dir'], '.taskgraph')
utils.make_directories([intermediate_dir, output_dir])
suffix = utils.make_suffix_string(args, 'results_suffix')
resolution = float(args['resolution'])
max_rating = float(args['max_rating'])
max_n_stressors = float(args['n_overlapping_stressors'])
target_srs_wkt = pygeoprocessing.get_vector_info(
args['aoi_vector_path'])['projection_wkt']
if args['risk_eq'].lower() == 'multiplicative':
max_pairwise_risk = max_rating * max_rating
elif args['risk_eq'].lower() == 'euclidean':
max_pairwise_risk = math.sqrt(
((max_rating - 1) ** 2) + ((max_rating - 1) ** 2))
else:
raise ValueError(
"args['risk_eq'] must be either 'Multiplicative' or 'Euclidean' "
f"not {args['risk_eq']}")
LOGGER.info(
f"The maximum pairwise risk score for {args['risk_eq'].lower()} "
f"risk is {max_pairwise_risk}")
try:
n_workers = int(args['n_workers'])
except (KeyError, ValueError, TypeError):
# KeyError when n_workers is not present in args
# ValueError when n_workers is an empty string.
# TypeError when n_workers is None.
n_workers = -1 # single process mode.
graph = taskgraph.TaskGraph(taskgraph_working_dir, n_workers)
# parse the info table and get info dicts for habitats, stressors.
habitats_info, stressors_info = _parse_info_table(args['info_table_path'])
# parse the criteria table to get the composite table
composite_criteria_table_path = os.path.join(
intermediate_dir, f'composite_criteria{suffix}.csv')
criteria_habitats, criteria_stressors = _parse_criteria_table(
args['criteria_table_path'], composite_criteria_table_path)
# Validate that habitats and stressors match precisely.
for label, info_table_set, criteria_table_set in [
('habitats', set(habitats_info.keys()), criteria_habitats),
('stressors', set(stressors_info.keys()), criteria_stressors)]:
if info_table_set != criteria_table_set:
missing_from_info_table = ", ".join(
sorted(criteria_table_set - info_table_set))
missing_from_criteria_table = ", ".join(
sorted(info_table_set - criteria_table_set))
raise ValueError(
f"The {label} in the info and criteria tables do not match:\n"
f" Missing from info table: {missing_from_info_table}\n"
f" Missing from criteria table: {missing_from_criteria_table}"
)
criteria_df = pandas.read_csv(composite_criteria_table_path,
index_col=False)
# Because criteria may be spatial, we need to prepare those spatial inputs
# as well.
spatial_criteria_attrs = {}
for (habitat, stressor, criterion, rating) in criteria_df[
['habitat', 'stressor', 'criterion',
'rating']].itertuples(index=False):
try:
float(rating) # numeric rating
continue
except ValueError:
# If we can't cast it to a float, assume it's a string and
# therefore spatial.
pass
# If the rating is non-numeric, it should be a spatial criterion.
# this dict matches the structure of the outputs for habitat/stressor
# dicts, from _parse_info_table
name = f'{habitat}-{stressor}-{criterion}'
spatial_criteria_attrs[name] = {
'name': name,
'path': rating, # verified gdal file in _parse_criteria_table
}
# Preprocess habitat, stressor spatial criteria datasets.
# All of these are spatial in nature but might be rasters or vectors.
user_files_to_aligned_raster_paths = {}
alignment_source_raster_paths = {}
alignment_source_vector_paths = {}
alignment_dependent_tasks = []
aligned_habitat_raster_paths = {}
aligned_stressor_raster_paths = {}
habitat_stressor_vectors = set([])
for name, attributes in itertools.chain(habitats_info.items(),
stressors_info.items(),
spatial_criteria_attrs.items()):
source_filepath = attributes['path']
gis_type = pygeoprocessing.get_gis_type(source_filepath)
aligned_raster_path = os.path.join(
intermediate_dir, f'aligned_{name}{suffix}.tif')
user_files_to_aligned_raster_paths[
source_filepath] = aligned_raster_path
# If the input is already a raster, run it through raster_calculator to
# ensure we know the nodata value and pixel values.
if gis_type == pygeoprocessing.RASTER_TYPE:
rewritten_raster_path = os.path.join(
intermediate_dir, f'rewritten_{name}{suffix}.tif')
alignment_source_raster_paths[
rewritten_raster_path] = aligned_raster_path
# Habitats/stressors must have pixel values of 0 or 1.
# Criteria may be between [0, max_criteria_score]
if name in spatial_criteria_attrs:
# Spatial criteria rasters can represent any positive real
# values, though they're supposed to be in the range [0, max
# criteria score], inclusive.
prep_raster_task = graph.add_task(
func=_prep_input_criterion_raster,
kwargs={
'source_raster_path': source_filepath,
'target_filepath': rewritten_raster_path,
},
task_name=(
f'Rewrite {name} criteria raster for consistency'),
target_path_list=[rewritten_raster_path],
dependent_task_list=[]
)
else:
# habitat/stressor rasters represent presence/absence in the
# form of 1 or 0 pixel values.
prep_raster_task = graph.add_task(
func=_mask_binary_presence_absence_rasters,
kwargs={
'source_raster_paths': [source_filepath],
'target_mask_path': rewritten_raster_path,
},
task_name=(
f'Rewrite {name} habitat/stressor raster for '
'consistency'),
target_path_list=[rewritten_raster_path],
dependent_task_list=[]
)
alignment_dependent_tasks.append(prep_raster_task)
# If the input is a vector, reproject to the AOI SRS and simplify.
# Rasterization happens in the alignment step.
elif gis_type == pygeoprocessing.VECTOR_TYPE:
# Habitats and stressors are rasterized with ALL_TOUCHED=TRUE
if name in habitats_info or name in stressors_info:
habitat_stressor_vectors.add(source_filepath)
# Using Shapefile here because its driver appears to not raise a
# warning if a MultiPolygon geometry is inserted into a Polygon
# layer, which was happening on a real-world sample dataset while
# in development.
target_reprojected_vector = os.path.join(
intermediate_dir, f'reprojected_{name}{suffix}.shp')
reprojected_vector_task = graph.add_task(
pygeoprocessing.reproject_vector,
kwargs={
'base_vector_path': source_filepath,
'target_projection_wkt': target_srs_wkt,
'target_path': target_reprojected_vector,
},
task_name=f'Reproject {name} to AOI',
target_path_list=[target_reprojected_vector],
dependent_task_list=[]
)
# Spatial habitats/stressors are rasterized as presence/absence, so
# we don't need to preserve any columns.
fields_to_preserve = None
if name in spatial_criteria_attrs:
# In spatial criteria vectors, the 'rating' field contains the
# numeric rating that needs to be rasterized.
fields_to_preserve = ['rating']
target_simplified_vector = os.path.join(
intermediate_dir, f'simplified_{name}{suffix}.gpkg')
alignment_source_vector_paths[
target_simplified_vector] = aligned_raster_path
alignment_dependent_tasks.append(graph.add_task(
func=_simplify,
kwargs={
'source_vector_path': source_filepath,
'tolerance': resolution / 2, # by the nyquist theorem.
'target_vector_path': target_simplified_vector,
'preserve_columns': fields_to_preserve,
},
task_name=f'Simplify {name}',
target_path_list=[target_simplified_vector],
dependent_task_list=[reprojected_vector_task]
))
# Later operations make use of the habitats rasters or the stressors
# rasters, so it's useful to collect those here now.
if name in habitats_info:
aligned_habitat_raster_paths[name] = aligned_raster_path
elif name in stressors_info:
aligned_stressor_raster_paths[name] = aligned_raster_path
alignment_task = graph.add_task(
func=_align,
kwargs={
'raster_path_map': alignment_source_raster_paths,
'vector_path_map': alignment_source_vector_paths,
'target_pixel_size': (resolution, -resolution),
'target_srs_wkt': target_srs_wkt,
'all_touched_vectors': habitat_stressor_vectors,
},
task_name='Align raster stack',
target_path_list=list(itertools.chain(
alignment_source_raster_paths.values(),
alignment_source_vector_paths.values())),
dependent_task_list=alignment_dependent_tasks,
)
# --> Create a binary mask of habitat pixels.
all_habitats_mask_path = os.path.join(
intermediate_dir, f'habitat_mask{suffix}.tif')
all_habitats_mask_task = graph.add_task(
_mask_binary_presence_absence_rasters,
kwargs={
'source_raster_paths':
list(aligned_habitat_raster_paths.values()),
'target_mask_path': all_habitats_mask_path,
},
task_name='Create mask of all habitats',
target_path_list=[all_habitats_mask_path],
dependent_task_list=[alignment_task]
)
# --> for stressor in stressors, do a decayed EDT.
decayed_edt_paths = {} # {stressor name: decayed EDT raster}
decayed_edt_tasks = {} # {stressor name: decayed EDT task}
for stressor, stressor_path in aligned_stressor_raster_paths.items():
decayed_edt_paths[stressor] = os.path.join(
intermediate_dir, f'decayed_edt_{stressor}{suffix}.tif')
decayed_edt_tasks[stressor] = graph.add_task(
_calculate_decayed_distance,
kwargs={
'stressor_raster_path': stressor_path,
'decay_type': args['decay_eq'],
'buffer_distance': stressors_info[stressor]['buffer'],
'target_edt_path': decayed_edt_paths[stressor],
},
task_name=f'Make decayed EDT for {stressor}',
target_path_list=[decayed_edt_paths[stressor]],
dependent_task_list=[alignment_task]
)
# Save this dataframe to make indexing in this loop a little cheaper
# Resilience/recovery calculations are only done for Consequence criteria.
reclassed_habitat_risk_paths = {}
reclassed_habitat_risk_tasks = []
cumulative_risk_to_habitat_paths = []
cumulative_risk_to_habitat_tasks = []
reclassified_rasters = [] # For visualization geojson, if requested
pairwise_summary_data = [] # for the later summary statistics.
all_pairwise_risk_tasks = []
for habitat in habitats_info:
pairwise_risk_tasks = []
pairwise_risk_paths = []
reclassified_pairwise_risk_paths = []
reclassified_pairwise_risk_tasks = []
for stressor in stressors_info:
criteria_tasks = {} # {criteria type: task}
criteria_rasters = {} # {criteria type: score raster path}
summary_data = {
'habitat': habitat,
'stressor': stressor,
}
for criteria_type in ['E', 'C']:
criteria_raster_path = os.path.join(
intermediate_dir,
f'{criteria_type}_{habitat}_{stressor}{suffix}.tif')
criteria_rasters[criteria_type] = criteria_raster_path
summary_data[
f'{criteria_type.lower()}_path'] = criteria_raster_path
# This rather complicated filter just grabs the rows matching
# this habitat, stressor and criteria type. It's the pandas
# equivalent of SELECT * FROM criteria_df WHERE the habitat,
# stressor and criteria type match the current habitat,
# stressor and criteria type.
local_criteria_df = criteria_df[
(criteria_df['habitat'] == habitat) &
(criteria_df['stressor'] == stressor) &
(criteria_df['e/c'] == criteria_type)]
# If we are doing consequence calculations, add in the
# resilience/recovery parameters for this habitat as additional
# criteria.
# Note that if a user provides an E-type RESILIENCE criterion,
# it will be ignored in all criteria calculations.
if criteria_type == 'C':
local_resilience_df = criteria_df[
(criteria_df['habitat'] == habitat) &
(criteria_df['stressor'] == 'RESILIENCE') &
(criteria_df['e/c'] == 'C')]
local_criteria_df = pandas.concat(
[local_criteria_df, local_resilience_df])
# This produces a list of dicts in the form:
# [{'rating': (score), 'weight': (score), 'dq': (score)}],
# which is what _calc_criteria() expects.
attributes_list = []
for attrs in local_criteria_df[
['rating', 'weight', 'dq']].to_dict(orient='records'):
try:
float(attrs['rating'])
except ValueError:
# When attrs['rating'] is not a number, we should
# assume it's a spatial file.
attrs['rating'] = user_files_to_aligned_raster_paths[
attrs['rating']]
attributes_list.append(attrs)
criteria_tasks[criteria_type] = graph.add_task(
_calc_criteria,
kwargs={
'attributes_list': attributes_list,
'habitat_mask_raster_path':
aligned_habitat_raster_paths[habitat],
'target_criterion_path':
criteria_rasters[criteria_type],
'decayed_edt_raster_path':
decayed_edt_paths[stressor],
},
task_name=(
f'Calculate {criteria_type} score for '
f'{habitat} / {stressor}'),
target_path_list=[criteria_rasters[criteria_type]],
dependent_task_list=[
decayed_edt_tasks[stressor],
all_habitats_mask_task
])
pairwise_risk_path = os.path.join(
intermediate_dir, f'RISK_{habitat}_{stressor}{suffix}.tif')
pairwise_risk_paths.append(pairwise_risk_path)
summary_data['risk_path'] = pairwise_risk_path
pairwise_risk_task = graph.add_task(
_calculate_pairwise_risk,
kwargs={
'habitat_mask_raster_path':
aligned_habitat_raster_paths[habitat],
'exposure_raster_path': criteria_rasters['E'],
'consequence_raster_path': criteria_rasters['C'],
'risk_equation': args['risk_eq'],
'target_risk_raster_path': pairwise_risk_path,
},
task_name=f'Calculate pairwise risk for {habitat}/{stressor}',
target_path_list=[pairwise_risk_path],
dependent_task_list=sorted(criteria_tasks.values())
)
pairwise_risk_tasks.append(pairwise_risk_task)
reclassified_pairwise_risk_path = os.path.join(
intermediate_dir, f'reclass_{habitat}_{stressor}{suffix}.tif')
reclassified_pairwise_risk_paths.append(
reclassified_pairwise_risk_path)
reclassified_pairwise_risk_tasks.append(graph.add_task(
pygeoprocessing.raster_calculator,
kwargs={
'base_raster_path_band_const_list': [
(aligned_habitat_raster_paths[habitat], 1),
(max_pairwise_risk, 'raw'),
(pairwise_risk_path, 1)],
'local_op': _reclassify_score,
'target_raster_path': reclassified_pairwise_risk_path,
'datatype_target': _TARGET_GDAL_TYPE_BYTE,
'nodata_target': _TARGET_NODATA_BYTE
},
task_name=f'Reclassify risk for {habitat}/{stressor}',
target_path_list=[reclassified_pairwise_risk_path],
dependent_task_list=[pairwise_risk_task]
))
summary_data['classification_path'] = (
reclassified_pairwise_risk_path)
pairwise_summary_data.append(summary_data)
# Sum the pairwise risk scores to get cumulative risk to the habitat.
cumulative_risk_path = os.path.join(
output_dir, f'TOTAL_RISK_{habitat}{suffix}.tif')
cumulative_risk_to_habitat_paths.append(cumulative_risk_path)
cumulative_risk_task = graph.add_task(
_sum_rasters,
kwargs={
'raster_path_list': pairwise_risk_paths,
'target_nodata': _TARGET_NODATA_FLOAT32,
'target_datatype': _TARGET_GDAL_TYPE_FLOAT32,
'target_result_path': cumulative_risk_path,
'normalize': False,
},
task_name=f'Cumulative risk to {habitat}',
target_path_list=[cumulative_risk_path],
dependent_task_list=pairwise_risk_tasks
)
cumulative_risk_to_habitat_tasks.append(cumulative_risk_task)
reclassified_cumulative_risk_path = os.path.join(
intermediate_dir, f'reclass_total_risk_{habitat}{suffix}.tif')
reclassed_habitat_risk_paths[
habitat] = reclassified_cumulative_risk_path
reclassified_cumulative_risk_task = graph.add_task(
pygeoprocessing.raster_calculator,
kwargs={
'base_raster_path_band_const_list': [
(aligned_habitat_raster_paths[habitat], 1),
(max_pairwise_risk * max_n_stressors, 'raw'),
(cumulative_risk_path, 1)],
'local_op': _reclassify_score,
'target_raster_path': reclassified_cumulative_risk_path,
'datatype_target': _TARGET_GDAL_TYPE_BYTE,
'nodata_target': _TARGET_NODATA_BYTE,
},
task_name=f'Reclassify risk for {habitat}/{stressor}',
target_path_list=[reclassified_cumulative_risk_path],
dependent_task_list=[cumulative_risk_task]
)
reclassed_habitat_risk_tasks.append(reclassified_cumulative_risk_task)
max_risk_classification_path = os.path.join(
output_dir, f'RECLASS_RISK_{habitat}{suffix}.tif')
reclassified_rasters.append(max_risk_classification_path)
_ = graph.add_task(
pygeoprocessing.raster_calculator,
kwargs={
'base_raster_path_band_const_list': [
(aligned_habitat_raster_paths[habitat], 1),
(reclassified_cumulative_risk_path, 1),
] + [(path, 1) for path in reclassified_pairwise_risk_paths],
'local_op': _maximum_reclassified_score,
'target_raster_path': max_risk_classification_path,
'datatype_target': _TARGET_GDAL_TYPE_BYTE,
'nodata_target': _TARGET_NODATA_BYTE,
},
task_name=f'Maximum reclassification for {habitat}',
target_path_list=[max_risk_classification_path],
dependent_task_list=[
reclassified_cumulative_risk_task,
*reclassified_pairwise_risk_tasks,
]
)
all_pairwise_risk_tasks.extend(pairwise_risk_tasks)
all_pairwise_risk_tasks.extend(reclassified_pairwise_risk_tasks)
# total risk to the ecosystem is the sum of all cumulative risk rasters
# across all habitats.
# InVEST 3.3.3 has this as the cumulative risk (a straight sum).
# InVEST 3.10.2 has this as the mean risk per habitat.
# This is currently implemented as mean risk per habitat.
ecosystem_risk_path = os.path.join(
output_dir, f'TOTAL_RISK_Ecosystem{suffix}.tif')
ecosystem_risk_task = graph.add_task(
_sum_rasters,
kwargs={
'raster_path_list': cumulative_risk_to_habitat_paths,
'target_nodata': _TARGET_NODATA_FLOAT32,
'target_datatype': _TARGET_GDAL_TYPE_FLOAT32,
'target_result_path': ecosystem_risk_path,
'normalize': True,
},
task_name='Cumulative risk to ecosystem.',
target_path_list=[ecosystem_risk_path],
dependent_task_list=[
all_habitats_mask_task,
*cumulative_risk_to_habitat_tasks,
]
)
# This represents the risk across all stressors.
# I'm guessing about the risk break to use here, but since the
# `ecosystem_risk_path` here is the sum across habitats, it makes sense to
# use max_pairwise_risk * n_habitats.
reclassified_ecosystem_risk_path = os.path.join(
output_dir, f'RECLASS_RISK_Ecosystem{suffix}.tif')
_ = graph.add_task(
pygeoprocessing.raster_calculator,
kwargs={
'base_raster_path_band_const_list': [
(all_habitats_mask_path, 1),
(max_pairwise_risk * len(habitats_info), 'raw'),
(ecosystem_risk_path, 1)],
'local_op': _reclassify_score,
'target_raster_path': reclassified_ecosystem_risk_path,
'datatype_target': _TARGET_GDAL_TYPE_BYTE,
'nodata_target': _TARGET_NODATA_BYTE,
},
task_name='Reclassify risk to the Ecosystem',
target_path_list=[reclassified_cumulative_risk_path],
dependent_task_list=[all_habitats_mask_task, ecosystem_risk_task]
)
# Recovery attributes are calculated with the same numerical method as
# other criteria, but are unweighted by distance to a stressor.
for habitat in habitats_info:
resilience_criteria_df = criteria_df[
(criteria_df['habitat'] == habitat) &
(criteria_df['stressor'] == 'RESILIENCE')]
criteria_attributes_list = []
for attrs in resilience_criteria_df[
['rating', 'weight', 'dq']].to_dict(orient='records'):
try:
float(attrs['rating'])
except ValueError:
# When attrs['rating'] is not a number, we should assume it's a
# spatial file.
attrs['rating'] = user_files_to_aligned_raster_paths[
attrs['rating']]
criteria_attributes_list.append(attrs)
recovery_score_path = os.path.join(
intermediate_dir, f'RECOVERY_{habitat}{suffix}.tif')
_ = graph.add_task(
_calc_criteria,
kwargs={
'attributes_list': criteria_attributes_list,
'habitat_mask_raster_path':
aligned_habitat_raster_paths[habitat],
'target_criterion_path': recovery_score_path,
'decayed_edt_raster_path': None, # not a stressor so no EDT
},
task_name=f'Calculate recovery score for {habitat}',
target_path_list=[recovery_score_path],
dependent_task_list=[all_habitats_mask_task]
)
simplified_aoi_path = os.path.join(
intermediate_dir, 'simplified_aoi.gpkg')
aoi_simplify_task = graph.add_task(
func=_simplify,
kwargs={
'source_vector_path': args['aoi_vector_path'],
'tolerance': resolution / 2, # by the nyquist theorem
'target_vector_path': simplified_aoi_path,
'preserve_columns': ['name'],
},
task_name='Simplify AOI',
target_path_list=[simplified_aoi_path],
dependent_task_list=[]
)
summary_csv_path = os.path.join(
output_dir, f'SUMMARY_STATISTICS{suffix}.csv')
_ = graph.add_task(
func=_create_summary_statistics_file,
kwargs={
'subregions_vector_path': simplified_aoi_path,
'pairwise_raster_dicts': pairwise_summary_data,
'per_habitat_classification_dict': reclassed_habitat_risk_paths,
'target_summary_csv_path': summary_csv_path,
},
task_name='Create summary statistics table',
target_path_list=[summary_csv_path],
dependent_task_list=[
aoi_simplify_task,
*all_pairwise_risk_tasks,
*reclassed_habitat_risk_tasks,
]
)
graph.join()
if not args.get('visualize_outputs', False):
LOGGER.info('HRA complete!')
graph.close()
return
# Although the generation of visualization outputs could have more precise
# task dependencies, the effort involved in tracking them precisely doesn't
# feel worth it when the visualization steps are such a standalone
# component and it isn't clear if we'll end up keeping this in HRA or
# refactoring it out (should we end up doing more such visualizations).
LOGGER.info('Generating visualization outputs')
visualization_dir = os.path.join(args['workspace_dir'],
'visualization_outputs')
utils.make_directories([visualization_dir])
shutil.copy( # copy in the summary table.
summary_csv_path,
os.path.join(visualization_dir, os.path.basename(summary_csv_path)))
# For each raster in reclassified risk rasters + Reclass ecosystem risk:
# convert to geojson with fieldname "Risk Score"
reclassified_rasters.append(reclassified_ecosystem_risk_path)
for raster_paths, fieldname, geojson_prefix in [
(reclassified_rasters, 'Risk Score', 'RECLASS_RISK'),
(aligned_stressor_raster_paths.values(), 'Stressor', 'STRESSOR')]:
for source_raster_path in raster_paths:
basename = os.path.splitext(
os.path.basename(source_raster_path))[0]
# clean up the filename to what the viz webapp expects.
for pattern in (f'^{geojson_prefix}_',
'^aligned_'):
basename = re.sub(pattern, '', basename)
polygonize_mask_raster_path = os.path.join(
intermediate_dir, f'polygonize_mask_{basename}.tif')
rewrite_for_polygonize_task = graph.add_task(
func=_create_mask_for_polygonization,
kwargs={
'source_raster_path': source_raster_path,
'target_raster_path': polygonize_mask_raster_path,
},
task_name=f'Rewrite {basename} for polygonization',
target_path_list=[polygonize_mask_raster_path],
dependent_task_list=[]
)
polygonized_gpkg = os.path.join(
intermediate_dir, f'polygonized_{basename}.gpkg')
polygonize_task = graph.add_task(
func=_polygonize,
kwargs={
'source_raster_path': source_raster_path,
'mask_raster_path': polygonize_mask_raster_path,
'target_polygonized_vector': polygonized_gpkg,
'field_name': fieldname,
'layer_name': f'{geojson_prefix}_{basename}',
},
task_name=f'Polygonizing {basename}',
target_path_list=[polygonized_gpkg],
dependent_task_list=[rewrite_for_polygonize_task]
)
target_geojson_path = os.path.join(
visualization_dir,
f'{geojson_prefix}_{basename}.geojson')
_ = graph.add_task(
pygeoprocessing.reproject_vector,
kwargs={
'base_vector_path': polygonized_gpkg,
'target_projection_wkt': _WGS84_WKT,
'target_path': target_geojson_path,
'driver_name': 'GeoJSON',
},
task_name=f'Reproject {name} to AOI',
target_path_list=[target_geojson_path],
dependent_task_list=[polygonize_task]
)
LOGGER.info(
'HRA model completed. Please visit http://marineapps.'
'naturalcapitalproject.org/ to visualize your outputs.')
graph.close()
graph.join()
LOGGER.info('HRA complete!')
def _create_mask_for_polygonization(source_raster_path, target_raster_path):
"""Create a mask of non-nodata pixels.
This mask raster is intended to be used as a mask input for GDAL's
polygonization function.
Args:
source_raster_path (string): The source raster from which the mask
raster will be created. This raster is assumed to be an integer
raster.
target_raster_path (string): The path to where the target raster should
be written.
Returns:
``None``
"""
nodata = pygeoprocessing.get_raster_info(source_raster_path)['nodata'][0]
def _rewrite(raster_values):
"""Convert any non-nodata values to 1, all other values to 0.
Args:
raster_values (numpy.array): Integer pixel values from the source
raster.
Returns:
out_array (numpy.array): An unsigned byte mask with pixel values of
0 (on nodata pixels) or 1 (on non-nodata pixels).
"""
return (raster_values != nodata).astype(numpy.uint8)
pygeoprocessing.raster_calculator(
[(source_raster_path, 1)], _rewrite, target_raster_path,
gdal.GDT_Byte, 0)
def _polygonize(source_raster_path, mask_raster_path,
target_polygonized_vector, field_name, layer_name):
"""Polygonize a raster.
Args:
source_raster_path (string): The source raster to polygonize. This
raster must be an integer raster.
mask_raster_path (string): The path to a mask raster, where pixel
values are 1 where polygons should be created, 0 otherwise.
target_polygonized_vector (string): The path to a vector into which the
new polygons will be inserted. A new GeoPackage will be created at
this location.
field_name (string): The name of a field into which the polygonized
region's numerical value should be recorded. A new field with this
name will be created in ``target_polygonized_vector``, and with an
Integer field type.
layer_name (string): The layer name to use in the target vector.
Returns:
``None``
"""
LOGGER.info(f'Polygonizing {source_raster_path} --> '
f'{target_polygonized_vector} using mask {mask_raster_path}')
raster = gdal.OpenEx(source_raster_path, gdal.OF_RASTER)
band = raster.GetRasterBand(1)
mask_raster = gdal.OpenEx(mask_raster_path, gdal.OF_RASTER)
mask_band = mask_raster.GetRasterBand(1)
raster_srs = osr.SpatialReference()
raster_srs.ImportFromWkt(raster.GetProjectionRef())
driver = gdal.GetDriverByName('GPKG')
vector = driver.Create(
target_polygonized_vector, 0, 0, 0, gdal.GDT_Unknown)
layer = vector.CreateLayer(layer_name, raster_srs, ogr.wkbPolygon)
# Create an integer field that contains values from the raster
field_defn = ogr.FieldDefn(str(field_name), ogr.OFTInteger)
field_defn.SetWidth(3)
field_defn.SetPrecision(0)