In [None]:
# td: parse args, refactor
from azureml.core import Workspace, Experiment, Run

sub_id = "6560575d-fa06-4e7d-95fb-f962e74efd7a"
resource_group = "UW-Embeddings"
workspace_name = "TxtsumDemo"

experiment_name = "hf-pytorch-demo"
run_id = "bart-samsum-pytorch"

ws = Workspace.get(
    name=workspace_name,
    subscription_id=sub_id,
    resource_group=resource_group
)

experiment = Experiment(ws, experiment_name)
run = Run(experiment, run_id)
run_details = run.get_details()

In [139]:
command = run_details.get("runDefinition").get("command")
command_args = command.rstrip("\n").replace(" ", ": ").split(": --")

# temp args to ignore
ignore_args = (
    "python",
    "deepspeed",
    "model_name_or_path",
    "config_name",
    "dataset_name",
    "dataset_path",
    "evaluation_strategy",
    "logging_strategy",
    "save_strategy",
    "do_train",
    "do_eval",
    "do_predict",
    "predict_with_generate",
    "overwrite_output_dir",
    "output_dir",
    "logging_dir",
    "ddp_find_unused_parameters"
)

hyperparams = "\n".join(f"- {arg}" for arg in command_args if not any(i in arg for i in ignore_args))

In [69]:
compute_details = {
    "size": ws.compute_targets.get(run_details.get("target")).vm_size,
    "node_count": run_details.get("runDefinition").get("nodeCount")
}

# region = westus2
# gpu device, dedicated, low priority ($/hr)
sku_mapping = {
    "STANDARD_ND96ASR_V4": ("8 x NVIDIA A100 40GB (NVLink 3.0)", 27.20, 5.44),
    "STANDARD_ND40RS_V2": ("8 x NVIDIA V100 32GB (NVLink)", 22.03, 4.41),
    "STANDARD_NC24S_V3": ("4 x NVIDIA V100 16GB", 12.24, 2.45),
    "STANDARD_NC6": ("1 x NVIDIA K80 12GB", 0.90, 0.18)
}

compute_table = f"""
| Region | US West 2 |
| AzureML Compute SKU | {compute_details["size"]} |
| Compute SKU GPU Device | {sku_mapping.get(compute_details["size"])[0]} |
| Compute Node Count | {compute_details["node_count"]} |
""".lstrip("\n")

In [70]:
#%pip install azure-identity
#%pip install azure-mgmt-monitor
import datetime
from azure.mgmt.monitor import MonitorManagementClient
from azure.identity import AzureCliCredential#, DefaultAzureCredential

ws_resource_id = (
    f"subscriptions/{sub_id}/"
    f"resourceGroups/{resource_group}/"
    f"providers/Microsoft.MachineLearningServices/workspaces/{ws_name}"
)

monitor_client = MonitorManagementClient(AzureCliCredential(), sub_id)

In [71]:
start_datetime = datetime.datetime.strptime(run_details.get("startTimeUtc"), "%Y-%m-%dT%H:%M:%S.%fZ")
end_datetime = datetime.datetime.strptime(run_details.get("endTimeUtc"), "%Y-%m-%dT%H:%M:%S.%fZ")

run_duration = end_datetime - start_datetime

# run duration in minutes, seconds
ts = run_duration.seconds
m = ts // 60
s = ts % 60

# cost = sku_price * nodes * run_duration
compute_cost = []
compute_cost.append(sku_mapping.get(compute_details["size"])[1] * compute_details["node_count"] * ts / 3600)
compute_cost.append(sku_mapping.get(compute_details["size"])[2] * compute_details["node_count"] * ts / 3600)

cost_table = f"""
| Run Duration | {m}m {s}s |
| Compute Cost (Dedicated/LowPriority) | ${"{:,.2f}".format(compute_cost[0])} / ${"{:,.2f}".format(compute_cost[1])} USD |
""".lstrip("\n")

In [72]:
# metrics to retrieve & aggregation
monitor_metrics = {
    "CpuUtilizationPercentage": "Average",
    "GpuUtilizationPercentage": "Average",
    "GpuMemoryUtilizationMegabytes": "Average",
    "GpuEnergyJoules": "Total"
}

monitor_results = []
for k, v in monitor_metrics.items():
    results = monitor_client.metrics.list(
        ws_resource_id,
        # add 1 min buffer to end time, td: hr/d depending on interval
        timespan=f"{start_datetime}/{end_datetime + datetime.timedelta(minutes=1)}",
        interval="P1D",
        metricnames=k,
        aggregation=v,
        filter=f"RunID eq '{run_id}'"
    )

    for item in results.value:
        for timeserie in item.timeseries:
            for data in timeserie.data:
                monitor_results.append(getattr(data, v.lower()))

monitor_table = f"""
| Average CPU Utilization | {"{:,.1f}".format(monitor_results[0])}% |
| Average GPU Utilization | {"{:,.1f}".format(monitor_results[1])}% |
| Average GPU Memory Usage | {"{:,.2f}".format(monitor_results[2]/1000)} GB |
| Total GPU Energy Usage | {"{:,.2f}".format(monitor_results[3]/1000)} kJ |
""".lstrip("\n")

In [73]:
resource_table = f"{compute_table}{cost_table}{monitor_table}"

In [None]:
from transformers import (
    #AutoConfig,
    AutoModelForSeq2SeqLM,
    AutoTokenizer
)

In [None]:
model_path = f"./ExperimentRun/dcid.{run_id}/outputs"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

In [137]:
model_name = "bart-large-samsum"
repo_id = "linydub"

In [76]:
# td: retrieve metrics from mlflow/aml instead, log predict/test metrics to mlflow
import json

# read trainer results
rouge_results = {}
metric_results = {}
with open(f"{model_path}/all_results.json") as f:
    results_raw = json.load(f)
    for k in results_raw.keys():
        if "rouge" in k:
            rouge_results[k] = results_raw[k]
        else:
            metric_results[k] = results_raw[k]

ts_results={}
with open(f"{model_path}/trainer_state.json") as f:
    trainer_state_raw = json.load(f)
    ts_results["total_steps"] = trainer_state_raw["max_steps"]
    ts_results["total_flops"] = trainer_state_raw["total_flos"]

In [77]:
# td: check if report_to codecarbon, log codecarbon metrics/params to mlflow
import csv
# read codecarbon results
carbon_keys = ["timestamp", "duration", "emissions", "energy_consumed", "country_name", "region", "cloud_provider", "cloud_region"]
with open(f"{model_path}/emissions.csv") as f:
    emissions_raw = csv.DictReader(f)
    emissions_results={}
    for row in emissions_raw:
        for k in carbon_keys:
            emissions_results[k] = row[k]

In [127]:
# logged metric name format: {dataset_set}_{metric}
def md_metrics(k, v, metrics):
  if any(i in k for i in metrics):
    dataset_set, metric = k.split("_")
    metric_name = ""

    if "train" in dataset_set:
      metric_name = "Training "
    elif any(i in dataset_set for i in ["eval", "val"]):
      metric_name = "Validation "
    elif any(i in dataset_set for i in ["test", "predict"]):
      metric_name = "Test "
    
    if (i := "rouge") in metrics:
      metric = metric.replace(i, i + "-")
      metric_name += metric.replace(i, i.upper())

    return f"""
    - name: {metric_name}
      type: {metric}
      value: {v}""".lstrip("\n")
  else:
    return ""

In [129]:
# td: generate arm template in outputs after run
# td: generate & upload arm template to repo
import urllib.parse

def generate_arm_buttons(url):
    encoded_url = urllib.parse.quote(url, safe='')

    arm_deploy = f"[![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/{encoded_url})"
    arm_visualize = f"[![Visualize](https://raw.githubusercontent.com/Azure/azure-quickstart-templates/master/1-CONTRIBUTION-GUIDE/images/visualizebutton.svg?sanitize=true)](http://armviz.io/#/?load={url})"

    return f"{arm_deploy} {arm_visualize}"

In [80]:
def md_table(results):
    return "\n".join(f"| {k} | {v} |" for k, v in results.items())

In [None]:
# td: retrieve from config path or ds params
deepspeed_config = """### DeepSpeed Config
Optimizer = `AdamW`, Scheduler = `WarmupDecayLR`, Offload = `none`
```json
  "zero_optimization": {
    "stage": 2,
    "allgather_partitions": true,
    "allgather_bucket_size": 1000000000,
    "overlap_comm": true,
    "reduce_scatter": true,
    "reduce_bucket_size": 1000000000,
    "contiguous_gradients": true
  }
```
"""

In [142]:
# generate model card (summarization)
MODEL_CARD_TEMPLATE = """
---
language:
- en
license: apache-2.0
tags:
{tag_list}
datasets:
- {dataset_id}
metrics:
{metric_list}
model-index:
- name: {model_id}
  results:
  - task: 
      name: Abstractive Text Summarization
      type: abstractive-text-summarization
    dataset:
      name: "{dataset_name}" 
      type: {dataset_id}
    metrics:
{metric_details}
widget:
- text: | 
{sample_text}
---

## `{model_id}`
This model was trained using Microsoft's [`Azure Machine Learning Service`](https://azure.microsoft.com/en-us/services/machine-learning). It was fine-tuned on the [`{dataset_id}`](https://huggingface.co/datasets/{dataset_id}) corpus from [`{base_model_id}`](https://huggingface.co/{base_model_id}) checkpoint.

## Usage (Inference)
```python
from transformers import pipeline
summarizer = pipeline("summarization", model="{repo_id}/{model_id}")

input_text = '''
{sample_text}
'''
summarizer(input_text)
```

## Reproduce this model on AzureML
{arm_buttons}

More information about the fine-tuning process (including samples and benchmarks):  
**[Preview]** https://github.com/linydub/azureml-greenai-txtsum

## Resource Usage
These results were retrieved from [`Azure Monitor Metrics`](https://docs.microsoft.com/en-us/azure/azure-monitor/essentials/data-platform-metrics). All experiments were ran on AzureML low priority compute clusters.

| Key | Value |
| --- | ----- |
{resource_table}

*Compute cost ($) is estimated from the run duration, number of compute nodes utilized, and SKU's price per hour. Updated SKU pricing could be found [here](https://azure.microsoft.com/en-us/pricing/details/machine-learning).  

### Carbon Emissions
These results were obtained using [`CodeCarbon`](https://github.com/mlco2/codecarbon). The carbon emissions are estimated from training runtime only (excl. setup and evaluation runtimes).  

| Key | Value |
| --- | ----- |
{emissions_table}

## Hyperparameters

{hyperparams}

{deepspeed_config}

## Results
| ROUGE | Score |
| ----- | ----- |
{rouge_table}

| Metric | Value |
| ------ | ----- |
{results_table}
{ts_table}
""".lstrip("\n")

tags = ["summarization", "azureml", "azure", "codecarbon", "bart"]
metrics = ["rouge"]
arm_template_url = f"https://raw.githubusercontent.com/linydub/azureml-greenai-txtsum/main/.cloud/template-hub/{repo_id}/arm-{model_name}.json"

model_card = MODEL_CARD_TEMPLATE.format(
    rouge_table=md_table(rouge_results),
    results_table=md_table(metric_results),
    ts_table=md_table(ts_results),
    emissions_table=md_table(emissions_results),
    tag_list="\n".join(f"- {tag}" for tag in tags),
    metric_list="\n".join(f"- {metric}" for metric in metrics),
    metric_details="\n".join(md_metrics(k, v, metrics) for k, v in rouge_results.items()),
    sample_text="""
    Henry: Hey, is Nate coming over to watch the movie tonight?
    Kevin: Yea, he said he'll be arriving a bit later at around 7 since he gets off of work at 6. Have you taken out the garbage yet?
    Henry: Oh I forgot. I'll do that once I'm finished with my assignment for my math class.
    Kevin: Yea, you should take it out as soon as possible. And also, Nate is bringing his girlfriend.
    Henry: Nice, I'm really looking forward to seeing them again.""".lstrip("\n"),
    repo_id=repo_id,
    model_id=model_name,
    base_model_id="facebook/bart-large",
    dataset_name="SAMSum Corpus: A Human-annotated Dialogue Dataset for Abstractive Summarization",
    dataset_id="samsum",
    arm_buttons=generate_arm_buttons(arm_template_url),
    resource_table=resource_table,
    hyperparams=hyperparams,
    deepspeed_config=""
)

with open(f"{model_path}/README.md", "w") as f:
    f.write(model_card)

In [None]:
#%curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
#%sudo apt-get install git-lfs
#%git lfs install
#%transformers-cli login

In [4]:
#model.push_to_hub(model_name, organization=repo_id)
#tokenizer.push_to_hub(model_name, organization=repo_id)

model.push_to_hub(model_name)
tokenizer.push_to_hub(model_name)