In [None]:
%load_ext autoreload
%autoreload 2

import datetime
import os
import sys

import azure.storage.blob as azureblob
import azure.batch.batch_service_client as batch
import azure.batch.batch_auth as batch_auth
import azure.batch.models as batchmodels

from azbatch import main

from dotenv import load_dotenv
load_dotenv()

In [None]:
start_time = datetime.datetime.now().replace(microsecond=0)

config = {
    "POOL_ID": "pool_20200921103445", # f"job_{start_time.strftime('%Y%m%d%H%M%S')}",
    "JOB_ID":  f"job_{start_time.strftime('%Y%m%d%H%M%S')}",
    "POOL_NODE_COUNT": 1,
    "POOL_VM_SIZE": "STANDARD_D2_V2",  # or "Standard_NC6"

    "BATCH_ACCOUNT_NAME": os.environ.get("_BATCH_ACCOUNT_NAME"),
    "BATCH_ACCOUNT_KEY": os.environ.get("_BATCH_ACCOUNT_KEY"),
    "BATCH_ACCOUNT_URL": os.environ.get("_BATCH_ACCOUNT_URL"),

    "CR_PASSWORD": os.environ.get("_CR_PASSWORD"),  # container registry
    
    "STORAGE_ACCOUNT_NAME": os.environ.get("_STORAGE_ACCOUNT_NAME"),
    "STORAGE_ACCOUNT_KEY": os.environ.get("_STORAGE_ACCOUNT_KEY"),
    
    "510_DLS_CONNECTION_STRING": os.environ.get("_510_DLS_CONNECTION_STRING"),
    "XCCTEST_CONNECTION_STRING": os.environ.get("_XCCTEST_CONNECTION_STRING"),

}

### Connect to batch & storage accounts

In [None]:
# Create a Batch service client. We'll now be interacting with the Batch
# service in addition to Storage
batch_client = batch.BatchServiceClient(
    credentials=batch_auth.SharedKeyCredentials(
        account_name=config["BATCH_ACCOUNT_NAME"], 
        key=config["BATCH_ACCOUNT_KEY"],
    ),
    batch_url=config["BATCH_ACCOUNT_URL"]
)

blob_client_xcctest = azureblob.BlockBlobService(connection_string=config["XCCTEST_CONNECTION_STRING"])
blob_client_510 = azureblob.BlockBlobService(connection_string=config["510_DLS_CONNECTION_STRING"])

### Create pool & job

In [None]:
# Create the pool that will contain the compute nodes that will execute the
# tasks.
if not batch_client.pool.exists(config['POOL_ID']):
    main.create_pool(batch_client, config)
    print(f"Created pool {config['POOL_ID']}.")
else:
    print(f"Pool {config['POOL_ID']} already exists.")

In [None]:
# Create the job that will run the tasks.
if not config['JOB_ID'] in [j.id for j in batch_client.job.list()]:
    main.create_job(batch_client, config)
    print(f"Created job {config['JOB_ID']}.")
else:
    print(f"Job {config['JOB_ID']} already exists.")

## Adding tasks

Replicating the following steps from the pipeline:
```
neo cover --raster ~/datalake/maxar/typhoon-mangkhut/processed/pre-event/*-ntl.tif --zoom 17 --out ~/datalake/maxar/typhoon-mangkhut/neo/cover.csv

neo tile --raster ~/datalake/maxar/typhoon-mangkhut/processed/pre-event/*-ntl.tif --zoom 17 --cover ~/datalake/maxar/typhoon-mangkhut/neo/cover.csv --config ~/neateo/config.toml --out ~/datalake/maxar/typhoon-mangkhut/neo/images --format tif

mkdir ~/datalake/maxar/typhoon-mangkhut/neo/predictions

neo predict --config ~/neateo/config.toml --dataset ~/datalake/maxar/typhoon-mangkhut/neo --cover ~/datalake/maxar/typhoon-mangkhut/neo/cover.csv --checkpoint ~/datalake/neateo-models/neat-fullxview-epoch75.pth --out ~/datalake/maxar/typhoon-mangkhut/neo/predictions --metatiles --keep_borders

neo vectorize --masks ~/datalake/maxar/typhoon-mangkhut/neo/predictions --type Building --config ~/neateo/config.toml --out ~/datalake/maxar/typhoon-mangkhut/processed/buildings.geojson
```

### Container & storage settings

In [None]:
# common settings 
task_container_settings = batchmodels.TaskContainerSettings(
    image_name=main.NEO_IMAGE,
    container_run_options='--rm'  # maybe necessary to use `--gpus all`?
)
admin_identity = batchmodels.UserIdentity(
    auto_user=batchmodels.AutoUserSpecification(
        scope='pool',
        elevation_level='admin',
    )
)
task_common_args = {
    "container_settings": task_container_settings,
    "user_identity": admin_identity,
}

upload_opts = batchmodels.OutputFileUploadOptions(
    upload_condition=batchmodels.OutputFileUploadCondition.task_success
)

In [None]:
# commonly used tokens & urls
adafiles_read_token = main.create_sas_token(blob_client_xcctest, "adafiles", ["read", "list"])
adafiles_write_token = main.create_sas_token(blob_client_xcctest, "adafiles", ["write"])
_510_read_token = main.create_sas_token(blob_client_510, "automated-damage-assessment", ["read", "list"])

adafiles_output_url = main.create_resource_url("xcctest", "adafiles", adafiles_write_token)

### Actual task specification

In [None]:
batch_name = datetime.datetime.now().strftime('%Y%m%d%H%M%S')  # necessary to match dependencies

tasks = [    
    # neo cover
    batchmodels.TaskAddParameter(
        id=f"cover-{batch_name}",
        depends_on=None,
        command_line='/bin/bash -c "wd=$AZ_BATCH_TASK_WORKING_DIR && neo cover --raster $wd/processed.tif --zoom 17 --out $wd/cover.csv"',
        resource_files=[
            batchmodels.ResourceFile(
                http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, container_path="taskout/ada/pre-event/103001007E413300-3013212.tif"),
                file_path='processed.tif'
            )
        ],
        output_files=[
            batchmodels.OutputFile(
                file_pattern="cover.csv",
                destination=batchmodels.OutputFileDestination(
                    container=batchmodels.OutputFileBlobContainerDestination(
                        container_url=adafiles_output_url,
                        path="cover.csv",
                    )
                ),
                upload_options=upload_opts,
            )
        ],
        **task_common_args,
    ),

    # neo tile
    batchmodels.TaskAddParameter(
        id=f"tile-{batch_name}",
#         depends_on=batchmodels.TaskDependencies(task_ids=[f"cover-{batch_name}"]),
        command_line='/bin/bash -c "wd=$AZ_BATCH_TASK_WORKING_DIR && neo tile --raster $wd/processed.tif --zoom 17 --cover $wd/cover.csv --config $wd/config.toml --out $wd/images --format tif"',
        resource_files=[
            batchmodels.ResourceFile(
                http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, container_path="taskout/ada/pre-event/103001007E413300-3013212.tif"),
                file_path='processed.tif'
            ),
            batchmodels.ResourceFile(
                http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "config.toml"),
                file_path='config.toml'
            ),
            batchmodels.ResourceFile(
                http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "cover.csv"),
                file_path='cover.csv'
            ),
        ],
        output_files=[
            batchmodels.OutputFile(
                file_pattern="images/**/*.tif",
                destination=batchmodels.OutputFileDestination(
                    container=batchmodels.OutputFileBlobContainerDestination(
                        container_url=adafiles_output_url,
                        path="images",
                    )
                ),
                upload_options=upload_opts,
            )
        ],
        **task_common_args,
    ),

#     # neo predict -- !!! only runnable on a GPU instance
#     batchmodels.TaskAddParameter(
#         id=f"predict-{batch_name}",
#         depends_on=batchmodels.TaskDependencies(task_ids=[f"tile-{batch_name}"]),
#         command_line='/bin/bash -c "wd=$AZ_BATCH_TASK_WORKING_DIR && neo predict --config $wd/config.toml --cover $wd/cover.csv --dataset XXXX --checkpoint $wd/neat-fullxview-epoch75.pth --out $wd/predictions  --metatiles --keep_borders"',
#         resource_files=[
#             batchmodels.ResourceFile(
#                 http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, container_path="taskout/ada/pre-event/103001007E413300-3013212.tif"),
#                 file_path='processed.tif'
#             ),
#             batchmodels.ResourceFile(
#                 http_url=main.create_resource_url("510datalakestorage", "automated-damage-assessment", _510_read_token, container_path="neateo-models/neat-fullxview-epoch75.pth"),
#                 file_path='neat-fullxview-epoch75.pth'
#             ),
#             batchmodels.ResourceFile(
#                 storage_container_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token),
#                 blob_prefix='images/',
#                 file_path='images/',
#             ),
#             batchmodels.ResourceFile(
#                 http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "config.toml"),
#                 file_path='config.toml'
#             ),
#             batchmodels.ResourceFile(
#                 http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "cover.csv"),
#                 file_path='cover.csv'
#             ),
#         ],
#         output_files=[batchmodels.OutputFile(
#             file_pattern="predictions/*",
#             destination=batchmodels.OutputFileDestination(
#                 container=batchmodels.OutputFileBlobContainerDestination(
#                     container_url=adafiles_output_url,
#                     path="predictions",
#                 )
#             ),
#             upload_options=upload_opts,
#         )],
#         **task_common_args,
#     ),

    # neo vectorize
    batchmodels.TaskAddParameter(
        id=f"vectorize-{batch_name}",
        depends_on=None,  # batchmodels.TaskDependencies(task_ids=[f"predict-{batch_name}"]),
        command_line='/bin/bash -c "wd=$AZ_BATCH_TASK_WORKING_DIR && neo vectorize --config $wd/config.toml --masks $wd/testpredict/predictions --out $wd/buildings.geojson --type Building"',
        resource_files=[
            batchmodels.ResourceFile(
#                 storage_container_url=main.create_resource_url("510datalakestorage", "automated-damage-assessment", _510_read_token),
#                 blob_prefix="maxar/typhoon-mangkhut-2/neo/predictions/",
                storage_container_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token),
                blob_prefix="testpredict/predictions/",
            ),
            batchmodels.ResourceFile(
                http_url=main.create_resource_url("xcctest", "adafiles", adafiles_read_token, "config.toml"),
                file_path='config.toml'
            ),
        ],
        output_files=[
            batchmodels.OutputFile(
                file_pattern="buildings.geojson",
                destination=batchmodels.OutputFileDestination(
                    container=batchmodels.OutputFileBlobContainerDestination(
                        container_url=adafiles_output_url,
                        path="buildings.geojson",
                    )
                ),
                upload_options=upload_opts,
            )
        ],
        **task_common_args,
    ),    
]

# Add tasks to job
res = batch_client.task.add_collection(config['JOB_ID'], tasks)
res.as_dict()

## Clean up Batch resources

In [None]:
# delete current job / pool
batch_client.job.delete(config['JOB_ID'])
batch_client.pool.delete(config['POOL_ID'])

In [None]:
# delete all jobs
for job in batch_client.job.list():
    batch_client.job.delete(job.id)

# delete all pools
for pool in batch_client.pool.list():
    batch_client.pool.delete(pool.id)