# Setup instructions

Before running this notebook:
* Add Azure credentials specific to your Azure account in the configuration.json file. The configuration file should be in the same directory as this Jupyter notebook.

# Connect

In [None]:
from __future__ import print_function

import sys
from os import listdir
from os.path import isfile, join

import azure.mgmt.batchai.models as models
from azure.storage.blob import BlockBlobService
from azure.storage.file import FileService

import utilities as utils
from utilities.job_factory import ParameterSweep, NumericParameter, DiscreteParameter

import numpy as np
import matplotlib.pyplot as plt

cfg = utils.config.Configuration('configuration.json')
client = utils.config.create_batchai_client(cfg)

# Create resource group & batch AI workspace

In [None]:
utils.config.create_resource_group(cfg)
_ = client.workspaces.create(cfg.resource_group, cfg.workspace, cfg.location).result()

# Create file share 

In [None]:
# File share
azure_file_share_name = 'sat-solver'
file_service = FileService(cfg.storage_account_name, cfg.storage_account_key)
file_service.create_share(azure_file_share_name, fail_on_exist=False)

# Upload problem set

In [None]:
local_dir = 'max3sat-problems'
azure_file_share_prefix_path = 'max3sat-problems'

# Create the directory path in the cluster file share if necessary.
if azure_file_share_prefix_path:
    file_service.create_directory(
        azure_file_share_name, azure_file_share_prefix_path, fail_on_exist=False)
    
def upload_all_files(directory):
    for filename in [f for f in listdir(directory) if isfile(join(directory, f))]:
        print(filename, end=' ')
        file_service.create_file_from_path(
            azure_file_share_name, azure_file_share_prefix_path, filename, local_dir + '/' + filename)
    print()

upload_all_files(local_dir)

# Upload algorithm

In [None]:
local_dir = 'algorithm'
azure_file_share_prefix_path = ''

# Create the directory path in the cluster file share if necessary.
if azure_file_share_prefix_path:
    file_service.create_directory(
        azure_file_share_name, azure_file_share_prefix_path, fail_on_exist=False)

def upload_file(filename):
    file_service.create_file_from_path(
        azure_file_share_name, azure_file_share_prefix_path, filename, local_dir + '/' + filename)

upload_file('algorithm.tar.gz')

# Configure compute cluster

In [None]:
nodes_count = 10
cluster_name = 'sat-solver'
vm_type = 'STANDARD_D1'
setup_output_prefix = '/node-setup-logs'

parameters = models.ClusterCreateParameters(
    vm_size=vm_type,
    scale_settings=models.ScaleSettings(
        manual=models.ManualScaleSettings(target_node_count=nodes_count)
    ),
    user_account_settings=models.UserAccountSettings(
        admin_user_name=cfg.admin,
        admin_user_password=cfg.admin_password or None,
        admin_user_ssh_public_key=cfg.admin_ssh_key or None,
    ),
    node_setup=models.NodeSetup(
        setup_task=models.SetupTask(
            command_line='apt -y install gcc make bc',
            std_out_err_path_prefix=setup_output_prefix
        )
    )
)

# Create Compute Cluster

In [None]:
_ = client.clusters.create(cfg.resource_group, cfg.workspace, cluster_name, parameters).result()

# Monitor Cluster Creation

In [None]:
cluster = client.clusters.get(cfg.resource_group, cfg.workspace, cluster_name)
utils.cluster.print_cluster_status(cluster)

# Create experiment

In [None]:
experiment_name = 'sweep001'
experiment = client.experiments.create(cfg.resource_group, cfg.workspace, experiment_name).result()

# Define sweep paramteres

In [None]:
# param_specs = [
#     NumericParameter(
#         parameter_name="N",
#         data_type="INTEGER",
#         start=1,
#         end=2,
#         step=2,
#         scale="LINEAR"
#     )
# ]

param_specs = [
    DiscreteParameter(
        parameter_name="DURATION",
        values=[20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000, 110000]
    )
]

parameters = ParameterSweep(param_specs)

# Create job template

In [None]:
azure_file_share_mount_path = 'afs'
relative_code_path = f'{azure_file_share_mount_path}/{azure_file_share_prefix_path}'
relative_data_path = f'{azure_file_share_mount_path}/max3sat-problems'
#problem_set_selector = 'v90c700'
problem_set_selector = 'v90c900'

jcp = models.JobCreateParameters(
    cluster=models.ResourceId(id=cluster.id),
    
    node_count=1,
    
    std_out_err_path_prefix = f'$AZ_BATCHAI_JOB_MOUNT_ROOT/{azure_file_share_mount_path}',
    
    output_directories = [
        models.OutputDirectory(
            id='ALL',
            path_prefix=f'$AZ_BATCHAI_JOB_MOUNT_ROOT/{azure_file_share_mount_path}'
        )
    ],
    
    mount_volumes = models.MountVolumes(
        azure_file_shares=[
            models.AzureFileShareReference(
                account_name=cfg.storage_account_name,
                credentials=models.AzureStorageCredentialsInfo(
                    account_key=cfg.storage_account_key),
                azure_file_url=f'https://{cfg.storage_account_name}.file.core.windows.net/{azure_file_share_name}',
                relative_mount_path=azure_file_share_mount_path)
        ]
    ),
    
    custom_toolkit_settings=models.CustomToolkitSettings(
        command_line=f'tar xzf $AZ_BATCHAI_JOB_MOUNT_ROOT/{relative_code_path}/algorithm.tar.gz -C $AZ_BATCH_TASK_WORKING_DIR ; ./run.sh $AZ_BATCHAI_JOB_MOUNT_ROOT/{relative_data_path} {problem_set_selector} {parameters["DURATION"]} $AZ_BATCHAI_OUTPUT_ALL'
    )
    
    #, container_settings = 

) 

# Create jobs with specific parameters

In [None]:
try:
    job_count = job_count + 1
except NameError:
    job_count = 1

job_prefix = f'job{job_count:03d}'

# Generate Jobs
jobs_to_submit, param_combinations = parameters.generate_jobs(jcp)

# Print the parameter combinations generated
for idx, comb in enumerate(param_combinations):
    print(f"Parameters {idx + 1}: {comb}")

# Submit Jobs
experiment_utils = utils.experiment.ExperimentUtils(client, cfg.resource_group, cfg.workspace, experiment_name)
jobs = experiment_utils.submit_jobs(jobs_to_submit, job_prefix).result()

# Extract metric

In [None]:
# Wait for all jobs to complete
experiment_utils.wait_all_jobs()

def print_metrics(metrics, metric_name='metric-val'):
    for metric in metrics:
        print(f'name: {metric["job_name"]}, {metric_name}: {metric["metric_value"]}, params: {[f"{ev.name}: {ev.value}" for ev in metric["job"].environment_variables]}')

def get_metric_values(metrics):
    return [m["metric_value"] for m in metrics]

def get_metric_parameter_values(metrics, param_name='PARAM_DURATION'):
    values = []
    for env_vars in [ m["job"].environment_variables for m in metrics ]:
        for ev in env_vars:
            if ev.name == param_name:
                values.append(ev.value)
    return values
              
def parameter_key(metric_instance, param_name='PARAM_DURATION'):
    return [int(ev.value) for ev in metric_instance["job"].environment_variables if ev.name == param_name][0]

In [None]:
# Print success ratios
success_ratios_metric = experiment_utils.get_metrics_for_jobs(jobs, 
            utils.job.MetricExtractor(
                output_dir_id='stdouterr',
                logfile='stdout.txt',
                regex='instance-success-ratio: ([0-9]+\.?[0-9]*)',
                calculate_method='all'
            )
        )

success_ratios_metric.sort(key=parameter_key)
print_metrics(success_ratios_metric, 'success-ratios')

In [None]:
# Print scores
scores_metric = experiment_utils.get_metrics_for_jobs(jobs, 
            utils.job.MetricExtractor(
                output_dir_id='stdouterr',
                logfile='stdout.txt',
                regex='instance-score: ([0-9]+\.?[0-9]*)',
                calculate_method='all'
            )
        )

scores_metric.sort(key=parameter_key)
print_metrics(scores_metric, 'scores')


In [None]:
# Extract data to plot from metrics
success_ratios_data = get_metric_values(success_ratios_metric)
print(scores_data)

success_ratios_durations = get_metric_parameter_values(success_ratios_metric)
print(scores_durations)


scores_data = get_metric_values(scores_metric)
print(scores_data)

scores_durations = get_metric_parameter_values(scores_metric)
print(scores_durations)

# Plots

In [None]:
def standard_errors(stds):
    return list(map(lambda x: x / np.sqrt(len(stds)), stds))

def std_err(data):
    stds = np.std(data, axis=1)
    return list(map(lambda x: x / np.sqrt(len(stds)), stds))

def mean(data):
    return np.mean(data, axis=1)

def plot_mean_with_stderr(xrange, data, title, xlabel='', ylabel=''):
    means = mean(data)
    errors = std_err(data)
    
    plt.figure(figsize=(13.0, 6.0))
    plt.errorbar(xrange, means, yerr=errors, fmt='o')
    if xlabel:
        plt.xlabel(xlabel)
    if ylabel:
        plt.ylabel(ylabel)
    plt.title(title)
    
    

In [None]:
plot_mean_with_stderr(success_ratios_durations, success_ratios_data, f"Success ratio vs. Duration for problem set '{problem_set_selector}'", 'Durations', 'Success ratio')

plot_mean_with_stderr(scores_durations, scores_data, f"Score vs. Duration for problem set '{problem_set_selector}'", 'Durations', 'Scores')
