In [None]:
#!transformers-cli login
!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
!sudo apt-get install git-lfs
!git lfs install

In [None]:
!az ml datastore download -n txtsumstorage_azureml -d ../ -p ExperimentRun/dcid.5f86907b-2602-4a2a-b547-4aa8d5493368/outputs

In [1]:
from transformers import (
    AutoConfig,
    AutoModelForSeq2SeqLM,
    AutoTokenizer
)

In [2]:
model_path = "../ExperimentRun/t5-3b-samsum-deepspeed/outputs"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

In [4]:
model_n = "t5-3b-samsum-deepspeed"
model.push_to_hub(model_n)
tokenizer.push_to_hub(model_n)

In [21]:
import json
# read trainer results 
with open(f"{model_path}/eval_results.json") as f:
    eval_results_raw = json.load(f)
    eval_results={}
    eval_results["eval_rouge1"] = eval_results_raw["eval_rouge1"]
    eval_results["eval_rouge2"] = eval_results_raw["eval_rouge2"]
    eval_results["eval_rougeL"] = eval_results_raw["eval_rougeL"]
    eval_results["eval_rougeLsum"] = eval_results_raw["eval_rougeLsum"]

with open(f"{model_path}/predict_results.json") as f:
    test_results_raw = json.load(f)
    test_results={}
    test_results["predict_rouge1"] = test_results_raw["predict_rouge1"]
    test_results["predict_rouge2"] = test_results_raw["predict_rouge2"]
    test_results["predict_rougeL"] = test_results_raw["predict_rougeL"]
    test_results["predict_rougeLsum"] = test_results_raw["predict_rougeLsum"]

with open(f"{model_path}/all_results.json") as f:
    results_raw = json.load(f)
    all_results={}
    all_results["eval_gen_len"] = results_raw["eval_gen_len"]
    all_results["predict_gen_len"] = results_raw["predict_gen_len"]
    all_results["train_loss"] = results_raw["train_loss"]
    all_results["eval_loss"] = results_raw["eval_loss"]
    all_results["predict_loss"] = results_raw["predict_loss"]
    all_results["train_runtime"] = results_raw["train_runtime"]
    all_results["train_samples"] = results_raw["train_samples"]
    all_results["train_samples_per_second"] = results_raw["train_samples_per_second"]
    all_results["train_steps_per_second"] = results_raw["train_steps_per_second"]
    all_results["eval_runtime"] = results_raw["eval_runtime"]
    all_results["eval_samples"] = results_raw["eval_samples"]
    all_results["eval_samples_per_second"] = results_raw["eval_samples_per_second"]
    all_results["eval_steps_per_second"] = results_raw["eval_steps_per_second"]
    all_results["predict_runtime"] = results_raw["predict_runtime"]
    all_results["predict_samples"] = results_raw["predict_samples"]
    all_results["predict_samples_per_second"] = results_raw["predict_samples_per_second"]
    all_results["predict_steps_per_second"] = results_raw["predict_steps_per_second"]

with open(f"{model_path}/trainer_state.json") as f:
    trainer_state_raw = json.load(f)
    trainer_state={}
    trainer_state["total_steps"] = trainer_state_raw["max_steps"]
    trainer_state["total_flos"] = trainer_state_raw["total_flos"]

In [22]:
import csv
# read emissions results
with open(f"{model_path}/emissions.csv") as f:
    emissions_raw = csv.DictReader(f)
    emissions={}
    for row in emissions_raw:
        emissions["timestamp"] = row["timestamp"]
        emissions["duration"] = row["duration"]
        emissions["emissions"] = row["emissions"]
        emissions["energy_consumed"] = row["energy_consumed"]
        emissions["country_name"] = row["country_name"]
        emissions["region"] = row["region"]
        emissions["cloud_provider"] = row["cloud_provider"]
        emissions["cloud_region"] = row["cloud_region"]

In [24]:
MODEL_CARD_TEMPLATE = """
---
language: en
tags:
- azureml
- bart
- summarization
license: apache-2.0
datasets:
- samsum
model-index:
- name: {model_name}
  results:
  - task: 
      name: Abstractive Text Summarization
      type: abstractive-text-summarization
    dataset:
      name: "SAMSum Corpus: A Human-annotated Dialogue Dataset for Abstractive Summarization" 
      type: samsum
widget:
- text: | 
    Henry: Hey, is Nate coming over to watch the movie tonight?
    Kevin: Yea, he said he'll be arriving a bit later at around 7 since he gets off of work at 6. Have you taken out the garbage yet?
    Henry: Oh I forgot. I'll do that once I'm finished with my assignment for my math class.
    Kevin: Yea, you should take it out as soon as possible. And also, Nate is bringing his girlfriend.
    Henry: Nice, I'm really looking forward to seeing them again.
---

## `{model_name}`
This model was trained using Microsoft's `AzureML`. It was fine-tuned on the `SAMSum` corpus from `facebook/bart-large` checkpoint.

More information on the fine-tuning process (includes samples and benchmarks):  
*(currently still WIP, major updates coming soon: 7/6/21~7/9/21)*

## Resource Usage
These results are retrieved from AzureML Studio's resource monitoring module. All experiments were ran on AzureML's low priority clusters.

| key | value |
| --- | ----- |
| AzureML SKU | ND40rs_v2 (8 X V100 32GB) |
| Region | US West 2 |
| Run Duration | X |
| Compute Cost (LowPriority/Dedicated) | $X/$X (USD) |
| Average CPU Utilization | X |
| Average GPU Utilization | X |
| GPU Memory Usage (Avg/Peak) | X/X (GB) |
| Total GPU Energy Usage | X (kJ) |

*Compute cost is calculated from run duration and SKU's price per hour. Updated SKU pricing could be found here: https://azure.microsoft.com/en-us/pricing/details/machine-learning/  
*Peak memory usage is calculated from average peak across all utilized GPUs.  

### Carbon Emissions
These results are obtained using `codecarbon`. The carbon emission is estimated from training runtime only (excluding setup and evaluation runtime).  
CodeCarbon: https://github.com/mlco2/codecarbon  

| key | value |
| --- | ----- |
{carbon_table}

## Hyperparameters
```yaml
fp16: True
per device batch size: 16
effective batch size: 128
epoch: 3.0
learning rate: 5e-5
weight decay: 0.1
seed: 1
```

## Usage
```python
from transformers import pipeline
summarizer = pipeline("summarization", model="henryu-lin/{model_name}")

conversation = '''Henry: Hey, is Nate coming over to watch the movie tonight?
    Kevin: Yea, he said he'll be arriving a bit later at around 7 since he gets off of work at 6. Have you taken out the garbage yet?
    Henry: Oh I forgot. I'll do that once I'm finished with my assignment for my math class.
    Kevin: Yea, you should take it out as soon as possible. And also, Nate is bringing his girlfriend.
    Henry: Nice, I'm really looking forward to seeing them again.
'''
summarizer(conversation)
```

## Results
| ROUGE | Score |
| ----- | ----- |
{eval_table}
{test_table}

| Metric | Value |
| ------ | ----- |
{all_table}
{state_table}

"""

# Generate model card
model_card = MODEL_CARD_TEMPLATE.format(
    model_name=model_n,
    eval_table="\n".join(f"| {k} | {v} |" for k, v in eval_results.items()),
    test_table="\n".join(f"| {k} | {v} |" for k, v in test_results.items()),
    all_table="\n".join(f"| {k} | {v} |" for k, v in all_results.items()),
    state_table="\n".join(f"| {k} | {v} |" for k, v in trainer_state.items()),
    carbon_table="\n".join(f"| {k} | {v} |" for k, v in emissions.items()),
)

with open(f"{model_path}/README.md", "w") as f:
    f.write(model_card)