In [55]:
import azure.batch
import azure.storage.blob
print(f"working on azure batch {azure.batch.__version__} and azure storage blob {azure.storage.blob.__version__}")

working on azure batch 12.0.0 and azure storage blob 12.9.0


In [56]:
%load_ext autoreload
%autoreload 2

import datetime
import os
import sys
import json

import azure.storage.blob as azureblob
import azure.batch._batch_service_client as batch
import azure.batch.batch_auth as batch_auth
import azure.batch.models as batchmodels

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)
from azbatch import main

from dotenv import load_dotenv
load_dotenv()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


True

In [57]:
start_time = datetime.datetime.now().replace(microsecond=0)

config = {
    "POOL_ID": "ADA_pool_NC6", # f"job_{start_time.strftime('%Y%m%d%H%M%S')}",
    "JOB_ID":  f"job_{start_time.strftime('%Y%m%d%H%M')}",
    "POOL_NODE_COUNT": 52,  # max 312 cores of NCpromo --> 26 x NC12 or 52 x NC6
    "POOL_VM_SIZE": "Standard_NC6_Promo",
    "TASK_SLOTS_PER_NODE": 1,  # keep <= cores per machine

    "BATCH_ACCOUNT_NAME": os.environ.get("_BATCH_ACCOUNT_NAME"),
    "BATCH_ACCOUNT_KEY": os.environ.get("_BATCH_ACCOUNT_KEY"),
    "BATCH_ACCOUNT_URL": os.environ.get("_BATCH_ACCOUNT_URL"),

    "CR_PASSWORD": os.environ.get("_CR_PASSWORD"),  # container registry
    
    "ADA_STORAGE_ACCOUNT_NAME": os.environ.get("_ADA_STORAGE_ACCOUNT_NAME"),
    "ADA_STORAGE_ACCOUNT_KEY": os.environ.get("_ADA_STORAGE_ACCOUNT_KEY"),
    "510_STORAGE_ACCOUNT_NAME": os.environ.get("_510_STORAGE_ACCOUNT_NAME"),
    "510_STORAGE_ACCOUNT_KEY": os.environ.get("_510_STORAGE_ACCOUNT_KEY"),
    
    "510_DLS_CONNECTION_STRING": os.environ.get("_510_DLS_CONNECTION_STRING"),
    "XCCTEST_CONNECTION_STRING": os.environ.get("_XCCTEST_CONNECTION_STRING")
}

print(f'working on batch account {config["BATCH_ACCOUNT_NAME"]}')

working on batch account 510adagpu


### Connect to batch & storage accounts

In [58]:
# Create a Batch service client. We'll now be interacting with the Batch
# service in addition to Storage
batch_client = batch.BatchServiceClient(
    credentials=batch_auth.SharedKeyCredentials(
        account_name=config["BATCH_ACCOUNT_NAME"], 
        key=config["BATCH_ACCOUNT_KEY"],
    ),
    batch_url=config["BATCH_ACCOUNT_URL"]
)

blob_client_xcctest = azureblob.BlobServiceClient.from_connection_string(config["XCCTEST_CONNECTION_STRING"])
blob_client_510 = azureblob.BlobServiceClient.from_connection_string(config["510_DLS_CONNECTION_STRING"])

### Create pool & job

In [59]:
# Create the pool that will contain the compute nodes that will execute the
# tasks.
if not batch_client.pool.exists(config['POOL_ID']):
    pool = main.create_pool(batch_client, config)
    print(f"Created pool {config['POOL_ID']}.")
else:
    print(f"Pool {config['POOL_ID']} already exists.")

Creating pool [ADA_pool_NC6]...
Created pool ADA_pool_NC6.


In [83]:
# Create the job that will run the tasks.
if not config['JOB_ID'] in [j.id for j in batch_client.job.list()]:
    main.create_job(batch_client, config)
    print(f"Created job {config['JOB_ID']}.")
else:
    print(f"Job {config['JOB_ID']} already exists.")

Creating job [job_202203221521]...
Created job job_202203221521.


## Adding tasks

### Container & storage settings

In [84]:
# common settings 
task_container_settings = batchmodels.TaskContainerSettings(
    image_name='ada510.azurecr.io/ada:latest',
    # ipc=host needed for pytorch to share memory 
    # https://discuss.pytorch.org/t/unable-to-write-to-file-torch-18692-1954506624/9990
    container_run_options='--rm --ipc=host'
)
# needed to create folders inside running container
admin_identity = batchmodels.UserIdentity(
    auto_user=batchmodels.AutoUserSpecification(
        scope='pool',
        elevation_level='admin',
    )
)
task_common_args = {
    "container_settings": task_container_settings,
    "user_identity": admin_identity,
}

upload_opts = batchmodels.OutputFileUploadOptions(
    upload_condition=batchmodels.OutputFileUploadCondition.task_success
)

In [85]:
# commonly used tokens & urls
adafiles_read_token = main.create_sas_token(config["ADA_STORAGE_ACCOUNT_NAME"], config["ADA_STORAGE_ACCOUNT_KEY"], "adafiles", ["read", "list"])
adafiles_write_token = main.create_sas_token(config["ADA_STORAGE_ACCOUNT_NAME"], config["ADA_STORAGE_ACCOUNT_KEY"], "adafiles", ["write"])
_510_read_token = main.create_sas_token(config["510_STORAGE_ACCOUNT_NAME"], config["510_STORAGE_ACCOUNT_KEY"], "automated-damage-assessment", ["read", "list"])
adafiles_output_url = main.create_resource_url("xcctest", "adafiles", adafiles_write_token)

### Set data directory and date

In [86]:
data_dir = "tropical-storm-ana"  # relative to container
date = "2022-01-22" # YYYY-MM-DD
tile_index_filename = "tile_index_maxar.geojson"

### Download images and create index

In [11]:
batch_name = datetime.datetime.now().strftime('%Y%m%d%H%M%S')  # necessary to match dependencies
tasks = []

tasks += [
    # # download images ==> TO BE DONE MANUALLY
    # batchmodels.TaskAddParameter(
    #     id=f"download-images",
    #     depends_on=None,
    #     command_line=f'/bin/bash -c "load-images '\
    #                  f'--disaster {data_dir} '\
    #                  f'--maxthreads 4 '\
    #                  f'--dest {data_dir}"',
    #     output_files=[
    #         batchmodels.OutputFile(
    #             file_pattern=f"{data_dir}/**",
    #             destination=batchmodels.OutputFileDestination(
    #                 container=batchmodels.OutputFileBlobContainerDestination(
    #                     container_url=adafiles_output_url,
    #                     path=f"{data_dir}",
    #                 )
    #             ),
    #             upload_options=upload_opts,
    #         )
    #     ],
    #     **task_common_args,
    # ),
    
#     # create index
#     batchmodels.TaskAddParameter(
#         id=f"create-index",
#         depends_on=None,
#         command_line=f'/bin/bash -c "create-index '\
#                      f'--data {data_dir} '\
#                      f'--date {date} '\
#                      f'--zoom 13 '\
#                      f'--dest tile_index.geojson'\
#                      f'--exte extents"',
#         resource_files=[
#             batchmodels.ResourceFile(
#                 storage_container_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token),
#                 blob_prefix=f"{data_dir}"
#             )
#         ],
#         output_files=[
#             batchmodels.OutputFile(
#                 file_pattern=f"tile_index.geojson",
#                 destination=batchmodels.OutputFileDestination(
#                     container=batchmodels.OutputFileBlobContainerDestination(
#                         container_url=adafiles_output_url,
#                         path=f"{data_dir}/tile_index.geojson",
#                     )
#                 ),
#                 upload_options=upload_opts,
#             )
#         ],
#         **task_common_args,
#     ),
# ]


# # Add tasks to job
# res = batch_client.task.add_collection(config['JOB_ID'], tasks)

### Inspect index

In [87]:
index_client = blob_client_xcctest.get_blob_client(container="adafiles", blob=f"{data_dir}/{tile_index_filename}")
try:
    with open(tile_index_filename, "wb") as download_file:
        download_file.write(index_client.download_blob().readall())
except:
   print("No blob found.")

with open(tile_index_filename) as file:
    index = json.load(file)
    
# remove duplicates
features = index['features'].copy()
index.pop('features', None)
index['features'] = []
for feature in features:
    if not any([feature['properties']['tile'] == x['properties']['tile'] for x in index['features']]):
        index['features'].append(feature)
    
print(f"index length: {len(index['features'])}")

index length: 75


In [88]:
batch_name = datetime.datetime.now().strftime('%Y%m%d%H%M%S')  # necessary to match dependencies
unique_ids = [tile['properties']['tile'] for tile in index['features']]
tasks = []
print(f"unique ids {len(unique_ids)}")

unique ids 75


### building detection tasks

In [89]:
def tasks_abd(batch_name, num_id, id_, dependencies_start, data="raw/pre-event/merged.tif", dest="pre-event"):
    
    tasks = [
    
        # abd cover: create cover file with metadata of mini-tiles
        batchmodels.TaskAddParameter(
            id=f"cover-{batch_name}-{num_id}-{dest}",
            depends_on=dependencies_start,
            command_line=f'/bin/bash -c "abd cover --raster merged.tif --zoom 17 --out cover.csv"',
            resource_files=[
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, container_path=f"{id_}/{data}"),
                    file_path='merged.tif'
                )
            ],
            output_files=[
                batchmodels.OutputFile(
                    file_pattern="cover.csv",
                    destination=batchmodels.OutputFileDestination(
                        container=batchmodels.OutputFileBlobContainerDestination(
                            container_url=adafiles_output_url,
                            path=f"{id_}/{dest}/cover.csv",
                        )
                    ),
                    upload_options=upload_opts,
                )
            ],
            **task_common_args,
        ),

        # abd tile: split tiles in mini-tiles
        batchmodels.TaskAddParameter(
            id=f"tile-{batch_name}-{num_id}-{dest}",
            depends_on=batchmodels.TaskDependencies(task_ids=[f"cover-{batch_name}-{num_id}-{dest}"]),
            command_line=f'/bin/bash -c "abd tile --raster merged.tif --zoom 17 --cover cover.csv --config config.toml --out images --format tif --keep_borders"',
            resource_files=[
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, container_path=f"{id_}/{data}"),
                    file_path='merged.tif'
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "config.toml"),
                    file_path='config.toml'
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, container_path=f"{id_}/{dest}/cover.csv"),
                    file_path='cover.csv'
                )
            ],
            output_files=[
                batchmodels.OutputFile(
                    file_pattern="images/**/*.tif",
                    destination=batchmodels.OutputFileDestination(
                        container=batchmodels.OutputFileBlobContainerDestination(
                            container_url=adafiles_output_url,
                            path=f"{id_}/{dest}/images",
                        )
                    ),
                    upload_options=upload_opts,
                )
            ],
            **task_common_args,
        ),

        # abd predict: predict buildings on mini-tiles -- only runnable on a GPU instance !!!
        batchmodels.TaskAddParameter(
            id=f"predict-{batch_name}-{num_id}-{dest}",
            depends_on=batchmodels.TaskDependencies(task_ids=[f"tile-{batch_name}-{num_id}-{dest}"]),
            command_line=f'/bin/bash -c "abd predict --config config.toml --cover cover.csv --dataset {id_}/{dest} '\
                         f'--checkpoint neat-fullxview-epoch75.pth --out predictions --metatiles --keep_borders"',
            resource_files=[
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "neat-fullxview-epoch75.pth"),
                    file_path='neat-fullxview-epoch75.pth'
                ),
                batchmodels.ResourceFile(
                    storage_container_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token),
                    blob_prefix=f"{id_}/{dest}/"
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "config.toml"),
                    file_path='config.toml'
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, container_path=f"{id_}/{dest}/cover.csv"),
                    file_path='cover.csv'
                )
            ],
            output_files=[batchmodels.OutputFile(
                file_pattern="predictions/**/*.png",
                destination=batchmodels.OutputFileDestination(
                    container=batchmodels.OutputFileBlobContainerDestination(
                        container_url=adafiles_output_url,
                        path=f"{id_}/{dest}/predictions",
                    )
                ),
                upload_options=upload_opts,
            )],
            **task_common_args,
        ),

        # abd vectorize: convert pixel-level predictions into polygons (.geojson)
        batchmodels.TaskAddParameter(
            id=f"vectorize-{batch_name}-{num_id}-{dest}",
            depends_on=batchmodels.TaskDependencies(task_ids=[f"predict-{batch_name}-{num_id}-{dest}"]),
            command_line=f'/bin/bash -c "abd vectorize --config config.toml --masks {id_}/{dest}/predictions --out buildings.geojson --type Building"',
            resource_files=[
                batchmodels.ResourceFile(
                    storage_container_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token),
                    blob_prefix=f"{id_}/{dest}/predictions/"
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "config.toml"),
                    file_path='config.toml'
                )
            ],
            output_files=[
                batchmodels.OutputFile(
                    file_pattern="buildings.geojson",
                    destination=batchmodels.OutputFileDestination(
                        container=batchmodels.OutputFileBlobContainerDestination(
                            container_url=adafiles_output_url,
                            path=f"{id_}/{dest}/buildings.geojson",
                        )
                    ),
                    upload_options=upload_opts,
                )
            ],
            **task_common_args,
        ),    

        # filter buildings
        batchmodels.TaskAddParameter(
            id=f"filter-buildings-{batch_name}-{num_id}-{dest}",
            depends_on=batchmodels.TaskDependencies(task_ids=[f"vectorize-{batch_name}-{num_id}-{dest}"]),
            command_line=f'/bin/bash -c "filter-buildings --data buildings.geojson --dest buildings-clean.geojson --waterbodies hydropolys.gpkg"',
            resource_files=[
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, container_path=f"{id_}/{dest}/buildings.geojson"),
                    file_path='buildings.geojson'
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "hydropolys.gpkg"),
                    file_path='hydropolys.gpkg'
                )
            ],
            output_files=[
                batchmodels.OutputFile(
                    file_pattern="buildings-clean.geojson",
                    destination=batchmodels.OutputFileDestination(
                        container=batchmodels.OutputFileBlobContainerDestination(
                            container_url=adafiles_output_url,
                            path=f"{id_}/{dest}/buildings-clean.geojson",
                        )
                    ),
                    upload_options=upload_opts,
                )
            ],
            **task_common_args,
        ),
    ]
    return tasks
    

### Prepare workdir, building detection, filter and align

In [90]:
# add tasks separately for each tile
for num_id, id_ in enumerate(unique_ids): #  39, 40, 41
    
    num_id = num_id#+start_
    
    images_to_process = list(index['features'][num_id]['properties']['pre-event'].values()) + list(index['features'][num_id]['properties']['post-event'].values())
    images_to_process_resource_files = []
    for image in images_to_process:
        images_to_process_resource_files.append(batchmodels.ResourceFile(
            http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, container_path=f"{data_dir}/{image}"),
            file_path=f'{data_dir}/{image}'
        ))
    
    tasks += [
        
        # set up working directory and create raster mosaic (--> merged.tif)
        batchmodels.TaskAddParameter(
            id=f"setup-{batch_name}-{num_id}",
            depends_on=None,
            command_line=f'/bin/bash -c "setup-wd --data {data_dir} --index tile_index.geojson --id {id_} --dest raw --maxar-tiling"',
            resource_files=[
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"{data_dir}/{tile_index_filename}"),
                    file_path='tile_index.geojson'
                )
            ] + images_to_process_resource_files,
            output_files=[
                batchmodels.OutputFile(
                    file_pattern="raw/**/*.tif",
                    destination=batchmodels.OutputFileDestination(
                        container=batchmodels.OutputFileBlobContainerDestination(
                            container_url=adafiles_output_url,
                            path=f"temp/{id_}/raw",
                        )
                    ),
                    upload_options=upload_opts,
                )
            ],
            **task_common_args,
        )
    ]
    
    # adding prefix to id_ path
    id_ = f"temp/{id_}"
    
    # detect buildings in pre-disaster raster
    tasks += tasks_abd(batch_name,
                       num_id,
                       id_,
                       dependencies_start=batchmodels.TaskDependencies(task_ids=[f"setup-{batch_name}-{num_id}"]),
                       data="raw/pre-event/merged.tif",
                       dest="pre-event")
    
    # check if buildings already exist, if yes substitute
    tasks += [
        batchmodels.TaskAddParameter(
            id=f"check-alternative-buildings-{batch_name}-{num_id}",
            depends_on=batchmodels.TaskDependencies(task_ids=[f"filter-buildings-{batch_name}-{num_id}-pre-event"]),
            command_line=f'/bin/bash -c "check-alternative-buildings '\
                         f'--ext extents.geojson '\
                         f'--builds google-africa-buildings-split '\
                         f'--container adafiles '\
                         f'--raster pre-merged.tif '\
                         f'--refbuilds pre-buildings.geojson '\
                         f'--dest new-pre-buildings.geojson ' \
                         f'--secret {config["XCCTEST_CONNECTION_STRING"]}"',
            resource_files=[
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"google-africa-buildings-split/extents.geojson"),
                    file_path='extents.geojson'
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"{id_}/pre-event/buildings-clean.geojson"),
                    file_path='pre-buildings.geojson'
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"{id_}/raw/pre-event/merged.tif"),
                    file_path='pre-merged.tif'
                )
            ],
            output_files=[
                batchmodels.OutputFile(
                    file_pattern="new-pre-buildings.geojson",
                    destination=batchmodels.OutputFileDestination(
                        container=batchmodels.OutputFileBlobContainerDestination(
                            container_url=adafiles_output_url,
                            path=f"{id_}/pre-event/buildings-clean.geojson",
                        )
                    ),
                    upload_options=upload_opts,
                )
            ],
            **task_common_args,
        )
    ]
    
    # detect buildings in post-disaster raster
    tasks += tasks_abd(batch_name,
                       num_id, id_,
                       dependencies_start=batchmodels.TaskDependencies(task_ids=[f"check-alternative-buildings-{batch_name}-{num_id}"]),
                       data="raw/post-event/merged.tif",
                       dest="post-event")
    
    tasks += [
        # align post-disaster raster
        batchmodels.TaskAddParameter(
            id=f"align-raster-{batch_name}-{num_id}",
            depends_on=batchmodels.TaskDependencies(task_ids=[f"filter-buildings-{batch_name}-{num_id}-post-event"]),
            command_line=f'/bin/bash -c "align-raster '\
                         f'--targetbuild post-buildings.geojson '\
                         f'--referencebuild pre-buildings.geojson '\
                         f'--alignedbuild post-buildings-aligned.geojson '\
                         f'--targetraster post-merged.tif '\
                         f'--alignedraster post-merged.tif"',
            resource_files=[
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"{id_}/post-event/buildings-clean.geojson"),
                    file_path='post-buildings.geojson'
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"{id_}/pre-event/buildings-clean.geojson"),
                    file_path='pre-buildings.geojson'
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"{id_}/raw/post-event/merged.tif"),
                    file_path='post-merged.tif'
                )
            ],
            output_files=[
                batchmodels.OutputFile(
                    file_pattern="post-buildings-aligned.geojson",
                    destination=batchmodels.OutputFileDestination(
                        container=batchmodels.OutputFileBlobContainerDestination(
                            container_url=adafiles_output_url,
                            path=f"{id_}/post-event/buildings-clean-aligned.geojson",
                        )
                    ),
                    upload_options=upload_opts,
                ),
                batchmodels.OutputFile(
                    file_pattern="post-merged-aligned.tif",
                    destination=batchmodels.OutputFileDestination(
                        container=batchmodels.OutputFileBlobContainerDestination(
                            container_url=adafiles_output_url,
                            path=f"{id_}/raw/post-event/merged.tif",
                        )
                    ),
                    upload_options=upload_opts,
                ),
            ],
            **task_common_args,
        )
    ]

### damage classification

In [91]:
model_type = "attentive"

In [92]:
for num_id, id_ in enumerate(unique_ids):
    
    num_id = num_id#+start_
    # adding prefix to id_ path
    id_ = f"temp/{id_}"
    
    tasks += [
        
        # prepare for caladrius
        batchmodels.TaskAddParameter(
            id=f"prepare-data-{batch_name}-{num_id}",
            depends_on=batchmodels.TaskDependencies(task_ids=[f"align-raster-{batch_name}-{num_id}"]),
            command_line=f'/bin/bash -c "prepare-data --data {id_}/raw --datapre merged --datapost merged '\
                         f'--buildings buildings-clean.geojson --dest caladrius"',
            resource_files=[
                batchmodels.ResourceFile(
                    storage_container_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token),
                    blob_prefix=f"{id_}/raw/"
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"{id_}/pre-event/buildings-clean.geojson"),
                    file_path='buildings-clean.geojson'
                )
            ],
            output_files=[batchmodels.OutputFile(
                file_pattern="caladrius/**/*.png",
                destination=batchmodels.OutputFileDestination(
                    container=batchmodels.OutputFileBlobContainerDestination(
                        container_url=adafiles_output_url,
                        path=f"{id_}/caladrius",
                    )
                ),
                upload_options=upload_opts,
            )],
            **task_common_args,
        ),
        
        # run caladrius
        batchmodels.TaskAddParameter(
            id=f"run-caladrius-{batch_name}-{num_id}",
            depends_on=batchmodels.TaskDependencies(task_ids=[f"prepare-data-{batch_name}-{num_id}"]),
            command_line=f'/bin/bash -c "source ~/.bashrc && source activate cal && '\
                         f'CUDA_VISIBLE_DEVICES="0" python /caladrius/caladrius/run.py --run-name run --data-path {id_}/caladrius '\
                         f'--model-type {model_type} '\
                         f'--model-path model_weights.pkl '\
                         f'--batch-size 2 '\
                         f'--classification-loss-type f1 '\
                         f'--checkpoint-path caladrius --output-type classification --inference"',
            resource_files=[
                batchmodels.ResourceFile(
                    storage_container_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token),
                    blob_prefix=f"{id_}/caladrius"
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "caladrius_att_effnet4_v1.pkl"),
                    file_path=f'model_weights.pkl'
                )
            ],
            output_files=[batchmodels.OutputFile(
                file_pattern="caladrius/**/*",
                destination=batchmodels.OutputFileDestination(
                    container=batchmodels.OutputFileBlobContainerDestination(
                        container_url=adafiles_output_url,
                        path=f"{id_}/caladrius",
                    )
                ),
                upload_options=upload_opts,
            )],
            **task_common_args,
        ),
        
        # merge buildings and damage labels
        batchmodels.TaskAddParameter(
            id=f"final-layer-{batch_name}-{num_id}",
            depends_on=batchmodels.TaskDependencies(task_ids=[f"run-caladrius-{batch_name}-{num_id}"]),
            command_line=f'/bin/bash -c "final-layer --builds buildings-clean.geojson --damage damage-labels.txt --out buildings-predictions.geojson"',
            resource_files=[
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"{id_}/pre-event/buildings-clean.geojson"),
                    file_path='buildings-clean.geojson'
                ),
                batchmodels.ResourceFile(
                    http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, 
                                                      container_path=f"{id_}/caladrius/run-input_size_32-learning_rate_0.001-batch_size_2/predictions/run-split_inference-epoch_001-model_{model_type}-predictions.txt"),
                    file_path='damage-labels.txt'
                )
            ],
            output_files=[
                batchmodels.OutputFile(
                    file_pattern="buildings-predictions.geojson",
                    destination=batchmodels.OutputFileDestination(
                        container=batchmodels.OutputFileBlobContainerDestination(
                            container_url=adafiles_output_url,
                            path=f"{id_}/buildings-predictions.geojson",
                        )
                    ),
                    upload_options=upload_opts,
                )
            ],
            **task_common_args,
        )
    ]


In [93]:
# Add tasks to job
res = batch_client.task.add_collection(config['JOB_ID'], tasks)
# res.as_dict()

In [94]:
print(len(tasks))

1200


## Merge all outputs
### N.B. wait for tasks to be finished!!!

In [70]:
# merge all outputs
task = [
    batchmodels.TaskAddParameter(
        id=f"merge-all-outputs",
        depends_on=None,
        command_line=f'/bin/bash -c "merge-output '\
                     f'--dir temp '\
                     f'--dest buildings-predictions.geojson"',
        resource_files=[
            batchmodels.ResourceFile(
                storage_container_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token),
                blob_prefix=f"temp"
            )
        ],
        output_files=[
            batchmodels.OutputFile(
                file_pattern="buildings-predictions.geojson",
                destination=batchmodels.OutputFileDestination(
                    container=batchmodels.OutputFileBlobContainerDestination(
                        container_url=adafiles_output_url,
                        path=f"{data_dir}/buildings-predictions.geojson",
                    )
                ),
                upload_options=upload_opts,
            )
        ],
        **task_common_args,
    )
]


# Add tasks to job
res = batch_client.task.add_collection(config['JOB_ID'], task)

## Clean up Batch resources
### N.B. wait for tasks to be finished!!!

In [95]:
# delete current job
batch_client.job.delete(config['JOB_ID'])

In [96]:
# delete current pool
batch_client.pool.delete(config['POOL_ID'])

In [76]:
# delete all jobs
for job in batch_client.job.list():
    batch_client.job.delete(job.id)

# delete all pools
for pool in batch_client.pool.list():
    batch_client.pool.delete(pool.id)