In [None]:
%load_ext autoreload
%autoreload 2

import datetime
import os
import sys

import azure.storage.blob as azureblob
import azure.batch.batch_service_client as batch
import azure.batch.batch_auth as batch_auth
import azure.batch.models as batchmodels

from azbatch import main
from azbatch.utils import query_yes_no, print_batch_exception, wait_for_tasks_to_complete, print_task_output, \
    upload_file_to_container

from dotenv import load_dotenv
load_dotenv()

In [None]:
start_time = datetime.datetime.now().replace(microsecond=0)

config = {
    "POOL_ID": f"pool_20200921103445",  # f"job_{start_time.strftime('%Y%m%d%H%M%S')}"
    "JOB_ID":  "job_20200928112316", # f"job_{start_time.strftime('%Y%m%d%H%M%S')}",
    "POOL_NODE_COUNT": 1,
    "POOL_VM_SIZE": "STANDARD_D1_V2",  # or "Standard_NC6"
    "STANDARD_OUT_FILE_NAME": "stdout.txt",

    "BATCH_ACCOUNT_NAME": os.environ.get("_BATCH_ACCOUNT_NAME"),
    "BATCH_ACCOUNT_KEY": os.environ.get("_BATCH_ACCOUNT_KEY"),
    "BATCH_ACCOUNT_URL": os.environ.get("_BATCH_ACCOUNT_URL"),
    "STORAGE_ACCOUNT_NAME": os.environ.get("_STORAGE_ACCOUNT_NAME"),
    "STORAGE_ACCOUNT_KEY": os.environ.get("_STORAGE_ACCOUNT_KEY"),
    "CR_PASSWORD": os.environ.get("_CR_PASSWORD"),
    "510_DLS_CONNECTION_STRING": os.environ.get("_510_DLS_CONNECTION_STRING"),
}

In [None]:
# Create a Batch service client. We'll now be interacting with the Batch
# service in addition to Storage
credentials = batch_auth.SharedKeyCredentials(
    config["BATCH_ACCOUNT_NAME"], config["BATCH_ACCOUNT_KEY"]
)
batch_client = batch.BatchServiceClient(
    credentials, batch_url=config["BATCH_ACCOUNT_URL"]
)

blob_client = azureblob.BlockBlobService(
    account_name=config["STORAGE_ACCOUNT_NAME"],
    account_key=config["STORAGE_ACCOUNT_KEY"]
)

blob_client2 = azureblob.BlockBlobService(connection_string=config["510_DLS_CONNECTION_STRING"])
container_sas_token2 = blob_client2.generate_container_shared_access_signature(
    "automated-damage-assessment",
    permission=azureblob.ContainerPermissions(read=True, list=True, _str="read_list"),
    expiry=datetime.datetime.utcnow() + datetime.timedelta(days=1)
)

In [None]:
# Create the pool that will contain the compute nodes that will execute the
# tasks.
main.create_pool(batch_client, config)

In [None]:
# Create the job that will run the tasks.
main.create_job(batch_client, config)

## Adding tasks

neo cover --raster ~/datalake/maxar/typhoon-mangkhut/processed/pre-event/*-ntl.tif --zoom 17 --out ~/datalake/maxar/typhoon-mangkhut/neo/cover.csv

neo tile --raster ~/datalake/maxar/typhoon-mangkhut/processed/pre-event/*-ntl.tif --zoom 17 --cover ~/datalake/maxar/typhoon-mangkhut/neo/cover.csv --config ~/neateo/config.toml --out ~/datalake/maxar/typhoon-mangkhut/neo/images --format tif

mkdir ~/datalake/maxar/typhoon-mangkhut/neo/predictions

neo predict --config ~/neateo/config.toml --dataset ~/datalake/maxar/typhoon-mangkhut/neo --cover ~/datalake/maxar/typhoon-mangkhut/neo/cover.csv --checkpoint ~/datalake/neateo-models/neat-fullxview-epoch75.pth --out ~/datalake/maxar/typhoon-mangkhut/neo/predictions --metatiles --keep_borders

neo vectorize --masks ~/datalake/maxar/typhoon-mangkhut/neo/predictions --type Building --config ~/neateo/config.toml --out ~/datalake/maxar/typhoon-mangkhut/processed/buildings.geojson

python filter_buildings.py

In [None]:
# Set up container
task_container_settings = batchmodels.TaskContainerSettings(
    image_name=main.NEO_IMAGE,
    container_run_options='--rm --workdir /ada_tools'  #  --gpus all
)
admin_identity = batchmodels.UserIdentity(
    auto_user=batchmodels.AutoUserSpecification(
        scope='pool',
        elevation_level='admin',
    )
)

# Set up blob client
input_container_name = 'adafiles'
blob_client.create_container(input_container_name, fail_on_exist=False)
container_sas_token = blob_client.generate_container_shared_access_signature(
    input_container_name,
    permission=azureblob.BlobPermissions.WRITE,
    expiry=datetime.datetime.utcnow() + datetime.timedelta(days=1))
container_sas_token_read = blob_client.generate_container_shared_access_signature(
    input_container_name,
    permission=azureblob.ContainerPermissions(read=True, list=True, _str="read_list"),
    expiry=datetime.datetime.utcnow() + datetime.timedelta(days=1))

container_url = "https://{}.blob.core.windows.net/{}?{}".format(
        config["STORAGE_ACCOUNT_NAME"], input_container_name, container_sas_token)

processed_url = f"https://510datalakestorage.blob.core.windows.net/automated-damage-assessment/maxar/typhoon-mangkhut/processed/pre-event/103001007E413300-3013213-ntl.tif?{container_sas_token2}"
model_pth_url = f"https://510datalakestorage.blob.core.windows.net/automated-damage-assessment/neateo-models/neat-fullxview-epoch75.pth?{container_sas_token2}"
adafiles_url = f"https://xcctest.blob.core.windows.net/adafiles?{container_sas_token_read}"
predictions_url = f"https://510datalakestorage.blob.core.windows.net/automated-damage-assessment/maxar/typhoon-mangkhut-2/neo/predictions/?{container_sas_token2}"

In [None]:
# A) create test tasks - for linking with blob storage
tasks = [
    batchmodels.TaskAddParameter(
        id='001-create-file',
        command_line="touch $AZ_BATCH_TASK_WORKING_DIR/test.txt",
        container_settings=task_container_settings,
        user_identity=admin_identity,
        output_files=[batchmodels.OutputFile(
            file_pattern="**/*.txt",
            destination=batchmodels.OutputFileDestination(
                container=output_file_dest
            ),
            upload_options=upload_opts
        )]
    ),
    batchmodels.TaskAddParameter(
        id='002-check-file',
        command_line="cat /test.txt",
        container_settings=task_container_settings,
        user_identity=admin_identity,
        resource_files=[batchmodels.ResourceFile(storage_container_url=container_url)],
        # output_files=[batchmodels.OutputFile(file_pattern="/*.txt",
        #                                      destination=output_file_dest)]
    ),
]

In [None]:
upload_opts = batchmodels.OutputFileUploadOptions(
    upload_condition=batchmodels.OutputFileUploadCondition.task_success
)

# create neo related tasks    
tasks = [    
#     batchmodels.TaskAddParameter(
#         id=f"cover-{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
#         command_line='/bin/bash -c "neo cover --raster $AZ_BATCH_TASK_WORKING_DIR/processed.tif --zoom 17 --out $AZ_BATCH_TASK_WORKING_DIR/cover.csv"',
#         container_settings=task_container_settings,
#         user_identity=admin_identity,
#         resource_files=[batchmodels.ResourceFile(
#             http_url=processed_url,
#             file_path='processed.tif'
#         )],
#         output_files=[batchmodels.OutputFile(
#             file_pattern="cover.csv",
#             destination=batchmodels.OutputFileDestination(
#                 container=batchmodels.OutputFileBlobContainerDestination(
#                     container_url=container_url,
#                     path="cover.csv",
#                 )
#             ),
#             upload_options=upload_opts,
#         )]   
#     ),

#     # neo tile --raster processed.tif --zoom 17 --cover cover.csv --config ~/neateo/config.toml --out ~/datalake/maxar/typhoon-mangkhut/neo/images --format tif
#     batchmodels.TaskAddParameter(
#         id=f"tile-{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
#         command_line='/bin/bash -c "wd=$AZ_BATCH_TASK_WORKING_DIR && neo tile --raster $wd/processed.tif --zoom 17 --cover $wd/cover.csv --config $wd/config.toml --out $wd/images --format tif"',
#         container_settings=task_container_settings,
#         user_identity=admin_identity,
#         resource_files=[
#             batchmodels.ResourceFile(
#                 http_url=processed_url,
#                 file_path='processed.tif'
#             ),
#             batchmodels.ResourceFile(
#                 http_url='https://xcctest.blob.core.windows.net/adafiles/config.toml?sp=r&st=2020-09-22T12:45:53Z&se=2020-10-22T20:45:53Z&spr=https&sv=2019-12-12&sr=b&sig=Q6n6vpiJY4iPbACa6OHsvcy1HOdLCBj%2FYYUZdS0QBY4%3D',
#                 file_path='config.toml'
#             ),
#             batchmodels.ResourceFile(
#                 http_url='https://xcctest.blob.core.windows.net/adafiles/cover.csv?sp=r&st=2020-09-22T12:52:25Z&se=2021-09-22T20:52:25Z&spr=https&sv=2019-12-12&sr=b&sig=QtsBzRNZOZk1Oyh4WIWeEAHfp2KTPZo7eXtIqC%2BApw0%3D',
#                 file_path='cover.csv'
#             ),
#         ],
#         output_files=[batchmodels.OutputFile(
#             file_pattern="images/**/*.tif",
#             destination=batchmodels.OutputFileDestination(
#                 container=batchmodels.OutputFileBlobContainerDestination(
#                     container_url=container_url,
#                     path="images",
#                 )
#             ),
#             upload_options=upload_opts,
#         )]   
#     ),

#     # neo predict --config config.toml --dataset ~/datalake/maxar/typhoon-mangkhut/neo --cover /cover.csv --checkpoint ~/datalake/neateo-models/neat-fullxview-epoch75.pth --out ~/datalake/maxar/typhoon-mangkhut/neo/predictions --metatiles --keep_borders
#     batchmodels.TaskAddParameter(
#         id=f"predict-{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
#         command_line='/bin/bash -c "wd=$AZ_BATCH_TASK_WORKING_DIR && neo predict --config $wd/config.toml --cover $wd/cover.csv --dataset XXXX --checkpoint $wd/neat-fullxview-epoch75.pth --out $wd/predictions  --metatiles --keep_borders"',
#         container_settings=task_container_settings,
#         user_identity=admin_identity,
#         resource_files=[
#             batchmodels.ResourceFile(
#                 http_url=processed_url,
#                 file_path='processed.tif'
#             ),
#             batchmodels.ResourceFile(
#                 http_url=model_pth_url,
#                 file_path='neat-fullxview-epoch75.pth'
#             ),
#             batchmodels.ResourceFile(
#                 storage_container_url=adafiles_url,
#                 blob_prefix='images/',
#                 file_path='images/',
#             ),
#             batchmodels.ResourceFile(
#                 http_url='https://xcctest.blob.core.windows.net/adafiles/config.toml?sp=r&st=2020-09-22T12:45:53Z&se=2020-10-22T20:45:53Z&spr=https&sv=2019-12-12&sr=b&sig=Q6n6vpiJY4iPbACa6OHsvcy1HOdLCBj%2FYYUZdS0QBY4%3D',
#                 file_path='config.toml'
#             ),
#             batchmodels.ResourceFile(
#                 http_url='https://xcctest.blob.core.windows.net/adafiles/cover.csv?sp=r&st=2020-09-22T12:52:25Z&se=2021-09-22T20:52:25Z&spr=https&sv=2019-12-12&sr=b&sig=QtsBzRNZOZk1Oyh4WIWeEAHfp2KTPZo7eXtIqC%2BApw0%3D',
#                 file_path='cover.csv'
#             ),
#         ],
#         output_files=[batchmodels.OutputFile(
#             file_pattern="predictions/*",
#             destination=batchmodels.OutputFileDestination(
#                 container=batchmodels.OutputFileBlobContainerDestination(
#                     container_url=container_url,
#                     path="predictions",
#                 )
#             ),
#             upload_options=upload_opts,
#         )]   
#     ),

    # neo vectorize --masks /predictions --type Building --config config.toml --out ~/buildings.geojson
    batchmodels.TaskAddParameter(
        id=f"vectorize-{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
        command_line='/bin/bash -c "wd=$AZ_BATCH_TASK_WORKING_DIR && neo vectorize --config $wd/config.toml --masks $wd/predictions --out $wd/buildings.geojson --type Building"',
        container_settings=task_container_settings,
        user_identity=admin_identity,
        resource_files=[
            batchmodels.ResourceFile(
                storage_container_url=predictions_url,
                file_path='predictions/',
            ),
            batchmodels.ResourceFile(
                http_url='https://xcctest.blob.core.windows.net/adafiles/config.toml?sp=r&st=2020-09-22T12:45:53Z&se=2020-10-22T20:45:53Z&spr=https&sv=2019-12-12&sr=b&sig=Q6n6vpiJY4iPbACa6OHsvcy1HOdLCBj%2FYYUZdS0QBY4%3D',
                file_path='config.toml'
            ),
        ],
        output_files=[batchmodels.OutputFile(
            file_pattern="buildings.geojson",
            destination=batchmodels.OutputFileDestination(
                container=batchmodels.OutputFileBlobContainerDestination(
                    container_url=container_url,
                    path="buildings.geojson",
                )
            ),
            upload_options=upload_opts,
        )]   
    ),

    
]


In [None]:
# Add tasks to job
res = batch_client.task.add_collection(config['JOB_ID'], tasks)

res.as_dict()

In [None]:
# Clean up Batch resources (if the user so chooses).

# delete current job / pool
batch_client.job.delete(config['JOB_ID'])
batch_client.pool.delete(config['POOL_ID'])

In [None]:
# delete all jobs
for job in batch_client.job.list():
    batch_client.job.delete(job.id)

# delete all pools
for pool in batch_client.pool.list():
    batch_client.pool.delete(pool.id)