In [None]:
%pip install s3fs==2024.5.0 datasets==2.20.0

In [1]:
import lumigator_demo as ld
from datasets import load_dataset

In [2]:
# allow groups to provide a team name so we can track their jobs more easily
team_name = "testing_lumigator"

# Running the Experiment

## Dataset Preprocessing

### Loading data
The following dataset is already in the format that we need as input: 
- one field called `examples` containing the text to summarize
- one field called `ground_truth` containing the summaries to the models' outputs against

Despite being a CSV file, we still specify `split="train"` as this is the default split name for non-split datasets.

Note that you can load many different types of file formats in a similar way (see https://huggingface.co/docs/datasets/loading#local-and-remote-files)

In [3]:
ds = load_dataset("csv", data_files = "dialogsum.csv", split="train")
ds.to_pandas()

Unnamed: 0,examples,ground_truth
0,"#Person1#: Hello, how are you doing today?\n#P...",#Person2# has trouble breathing. The doctor as...
1,#Person1#: Hey Jimmy. Let's go workout later t...,#Person1# invites Jimmy to go workout and pers...
2,#Person1#: I need to stop eating such unhealth...,#Person1# plans to stop eating unhealthy foods...
3,#Person1#: Do you believe in UFOs?\n#Person2#:...,#Person2# believes in UFOs and can see them in...
4,#Person1#: Did you go to school today?\n#Perso...,#Person1# didn't go to school today. #Person2#...
...,...,...
495,"#Person1#: Now that it's the new year, I've de...",#Person1# decides to stop smoking and come out...
496,"#Person1#: You married Joe, didn't you? \n#Per...",#Person1# thought #Person2# married Joe. #Pers...
497,#Person1#: How can I help you mam?\n#Person2#:...,#Person2#'s car makes noises. #Person1# thinks...
498,"#Person1#: Hello, Amazon's customer service. H...",#Person2# calls Amazon's customer service beca...


### Converting data

The following dataset *does not* contain fields in the expected format. The code below shows how you can convert them to make the dataset compatible.

In this specific case we load the `validation` split as it contains fewer samples (500 instead of 12.5K).

In [4]:
### PREPARING DATASET (WITH GROUND TRUTH) FOR LM-BUDDY JOB
dataset='knkarthick/dialogsum'
ds = load_dataset(dataset, split='validation')

# first, show what the dataset looks like
ds.to_pandas()

Unnamed: 0,id,dialogue,summary,topic
0,dev_0,"#Person1#: Hello, how are you doing today?\n#P...",#Person2# has trouble breathing. The doctor as...,see a doctor
1,dev_1,#Person1#: Hey Jimmy. Let's go workout later t...,#Person1# invites Jimmy to go workout and pers...,do exercise
2,dev_2,#Person1#: I need to stop eating such unhealth...,#Person1# plans to stop eating unhealthy foods...,healthy foods
3,dev_3,#Person1#: Do you believe in UFOs?\n#Person2#:...,#Person2# believes in UFOs and can see them in...,UFOs and aliens
4,dev_4,#Person1#: Did you go to school today?\n#Perso...,#Person1# didn't go to school today. #Person2#...,go to school
...,...,...,...,...
495,dev_495,"#Person1#: Now that it's the new year, I've de...",#Person1# decides to stop smoking and come out...,the new year
496,dev_496,"#Person1#: You married Joe, didn't you? \n#Per...",#Person1# thought #Person2# married Joe. #Pers...,fall in love
497,dev_497,#Person1#: How can I help you mam?\n#Person2#:...,#Person2#'s car makes noises. #Person1# thinks...,noises
498,dev_498,"#Person1#: Hello, Amazon's customer service. H...",#Person2# calls Amazon's customer service beca...,a missing page


In [5]:
# the following code removes unwanted fields and renames the remaining ones
# so they are compatible
ds = ds.remove_columns(["id", "topic"])
ds = ds.rename_column("dialogue", "examples")
ds = ds.rename_column("summary", "ground_truth")

ds.to_pandas()

Unnamed: 0,examples,ground_truth
0,"#Person1#: Hello, how are you doing today?\n#P...",#Person2# has trouble breathing. The doctor as...
1,#Person1#: Hey Jimmy. Let's go workout later t...,#Person1# invites Jimmy to go workout and pers...
2,#Person1#: I need to stop eating such unhealth...,#Person1# plans to stop eating unhealthy foods...
3,#Person1#: Do you believe in UFOs?\n#Person2#:...,#Person2# believes in UFOs and can see them in...
4,#Person1#: Did you go to school today?\n#Perso...,#Person1# didn't go to school today. #Person2#...
...,...,...
495,"#Person1#: Now that it's the new year, I've de...",#Person1# decides to stop smoking and come out...
496,"#Person1#: You married Joe, didn't you? \n#Per...",#Person1# thought #Person2# married Joe. #Pers...
497,#Person1#: How can I help you mam?\n#Person2#:...,#Person2#'s car makes noises. #Person1# thinks...
498,"#Person1#: Hello, Amazon's customer service. H...",#Person2# calls Amazon's customer service beca...


### Saving the dataset

Once data is converted, you can save the dataset locally so it is ready for the upload

In [6]:
dataset_name = "dialogsum_converted.csv"
ds.to_csv(dataset_name)

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

430961

## Dataset Upload

In [7]:
r = ld.dataset_upload(dataset_name)
dataset_id = ld.get_resource_id(r)

{
  "id": "104d4f1e-c6ce-4d55-9258-4341111daf7c",
  "filename": "dialogsum_converted.csv",
  "format": "experiment",
  "size": 430961,
  "created_at": "2024-07-24T11:29:48.321797Z"
}


### Check dataset info

At any point, one can get dataset info by just providing its UUID:

In [8]:
r = ld.dataset_info(dataset_id)

{
  "id": "104d4f1e-c6ce-4d55-9258-4341111daf7c",
  "filename": "dialogsum_converted.csv",
  "format": "experiment",
  "size": 430961,
  "created_at": "2024-07-24T11:29:48.321797Z"
}


## Model Choice

For now we just provide models as a list, later we'll have an API endpoint that returns models for a given task (still as a list)

In [9]:
enc_dec_models = [
    'hf://facebook/bart-large-cnn',
    'hf://mikeadimech/longformer-qmsum-meeting-summarization', 
    'hf://mrm8488/t5-base-finetuned-summarize-news',
    'hf://Falconsai/text_summarization',
]

dec_models = [
    'hf://mistralai/Mistral-7B-v0.1',
    'hf://meta-llama/Meta-Llama-3-8B',
    'hf://microsoft/Phi-3-mini-4k-instruct',
]

# models = enc_dec_models + dec_models
models = enc_dec_models

In [10]:
models

['hf://facebook/bart-large-cnn',
 'hf://mikeadimech/longformer-qmsum-meeting-summarization',
 'hf://mrm8488/t5-base-finetuned-summarize-news',
 'hf://Falconsai/text_summarization']

## Run Evaluations

In [11]:
max_samples = 10

responses = []
for model in models:
    name = team_name
    descr = f"Testing {model} summarization model on {dataset_name}"
    responses.append(ld.experiments_submit(model, name, descr, dataset_id, max_samples))

{
  "id": "639b0430-b66d-4604-8347-17a58132e5aa",
  "name": "testing_lumigator",
  "description": "Testing hf://facebook/bart-large-cnn summarization model on dialogsum_converted.csv",
  "status": "created",
  "created_at": "2024-07-24T11:29:52.646691Z",
  "updated_at": null
}
{
  "id": "8a48753d-9486-4782-a163-aa840f206700",
  "name": "testing_lumigator",
  "description": "Testing hf://mikeadimech/longformer-qmsum-meeting-summarization summarization model on dialogsum_converted.csv",
  "status": "created",
  "created_at": "2024-07-24T11:29:53.219070Z",
  "updated_at": null
}
{
  "id": "11c74d42-988c-409f-851f-5d3602a27d11",
  "name": "testing_lumigator",
  "description": "Testing hf://mrm8488/t5-base-finetuned-summarize-news summarization model on dialogsum_converted.csv",
  "status": "created",
  "created_at": "2024-07-24T11:29:53.791749Z",
  "updated_at": null
}
{
  "id": "51f5751b-6d8d-40c0-bfd5-4253e7b16cf9",
  "name": "testing_lumigator",
  "description": "Testing hf://Falconsai/

### Track evaluation jobs

You can track the jobs at the following URLs:

In [12]:
job_ids = [ld.get_resource_id(r) for r in responses]

for job_id in job_ids:
    print(ld.get_ray_link(job_id, ld.RAY_SERVER_URL))

http://10.144.20.102:8265/#/jobs/639b0430-b66d-4604-8347-17a58132e5aa
http://10.144.20.102:8265/#/jobs/8a48753d-9486-4782-a163-aa840f206700
http://10.144.20.102:8265/#/jobs/11c74d42-988c-409f-851f-5d3602a27d11
http://10.144.20.102:8265/#/jobs/51f5751b-6d8d-40c0-bfd5-4253e7b16cf9


## See / Compare Evaluation Results

In [13]:
# after the jobs complete, gather evaluation results
eval_results = []
for job_id in job_ids:
    eval_results.append(ld.experiments_result_download(job_id))

# convert results into a pandas dataframe
eval_table = ld.eval_results_to_table(models, eval_results)

In [14]:
eval_table

Unnamed: 0,Model,Meteor,BERT Precision,BERT Recall,BERT F1,ROUGE-1,ROUGE-2,ROUGE-L
0,facebook/bart-large-cnn,0.327604,0.867442,0.864527,0.865518,0.291195,0.140441,0.232568
1,mikeadimech/longformer-qmsum-meeting-summariza...,0.252314,0.879634,0.871928,0.875584,0.287025,0.126523,0.27449
2,mrm8488/t5-base-finetuned-summarize-news,0.33607,0.855851,0.881096,0.868124,0.23831,0.112606,0.213369
3,Falconsai/text_summarization,0.350005,0.858572,0.884689,0.871319,0.290522,0.089349,0.225717


# Observability Tools

This section contains tools to inspect the system, e.g. check the filesystem, the DB, etc.

## S3

### Check (local) AWS credentials

In [None]:
import os

for k in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_ENDPOINT_URL"]:
    print(f"[{k}]: {os.environ[k]}")

### Look into S3 storage

In [18]:
!s5cmd --endpoint-url $AWS_ENDPOINT_URL ls s3://lumigator-storage

                                  DIR  datasets/
                                  DIR  experiments/
