## Setup

In [None]:
! rm -r biotldr/
! git clone https://github.com/mahta-r/biotldr.git

In [None]:
! rm -r fairseq/
! git clone https://github.com/pytorch/fairseq

In [None]:
! cd fairseq/ ; pip install --editable .

In [None]:
! cd biotldr/ ; pip install -r requirements.txt

In [None]:
! pip install -U git+https://github.com/pltrdy/pyrouge

In [None]:
! git clone https://github.com/pltrdy/files2rouge.git
% cd files2rouge
! python setup_rouge.py
! python setup.py install
% cd /content

In [None]:
! sudo apt-get install libxml-parser-perl

In [None]:
! pip install transformers datasets

## Data Preprocesing - SciTLDR

In [None]:
%%bash

cd biotldr/SciTLDR-Data
chmod +x make_datafiles.sh

export TASK=SciTLDR-A
./make_datafiles.sh

export TASK=SciTLDR-AIC
./make_datafiles.sh

## Data Preprocessing - BioTLDR

In [None]:
%%bash

cd biotldr/BioTLDR-Data

python create_data_folders.py BioTLDR.csv

chmod +x make_datafiles.sh

export TASK=BioTLDR-A
./make_datafiles.sh 

export TASK=BioTLDR-I
./make_datafiles.sh 

export TASK=BioTLDR-C
./make_datafiles.sh 

export TASK=BioTLDR-AIC
./make_datafiles.sh

## Baselines

In [None]:
%%bash

mkdir biotldr/outputs
cd biotldr/
python scripts/baselines.py

In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart-large_a_baseline.hypo BioTLDR-Data/BioTLDR-A/test.jsonl --workers 1

In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart-large_i_baseline.hypo BioTLDR-Data/BioTLDR-I/test.jsonl --workers 1

In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart-large_c_baseline.hypo BioTLDR-Data/BioTLDR-C/test.jsonl --workers 1

In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart-large_aic_baseline.hypo BioTLDR-Data/BioTLDR-AIC/test.jsonl --workers 1

In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart-large-xsum_a_baseline.hypo BioTLDR-Data/BioTLDR-A/test.jsonl --workers 1

In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart-large-xsum_i_baseline.hypo BioTLDR-Data/BioTLDR-I/test.jsonl --workers 1

In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart-large-xsum_c_baseline.hypo BioTLDR-Data/BioTLDR-C/test.jsonl --workers 1

In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart-large-xsum_aic_baseline.hypo BioTLDR-Data/BioTLDR-AIC/test.jsonl --workers 1

## Experiments - Domain Transfer

In [None]:
! mkdir biotldr/models 

### BART-large ▶ fine-tuned on SciTLDR (abstract only):

In [None]:
! gsutil cp gs://skiff-models/scitldr/bart.tldr-ao.pt biotldr/models/bart.tldr-ao.pt

Copying gs://skiff-models/scitldr/bart.tldr-ao.pt...
- [1 files][  3.8 GiB/  3.8 GiB]   50.1 MiB/s                                   
Operation completed over 1 objects/3.8 GiB.                                      


In [None]:
%%bash

cd biotldr/
cp -r SciTLDR-Data/SciTLDR-A-bin/ BioTLDR-Data/BioTLDR-A-bin/
python scripts/generate.py models/ BioTLDR-Data/BioTLDR-A ./outputs/ --checkpoint_file bart.tldr-ao.pt --beam 6 --lenpen 1.0 --test_fname bart_ao_domain_transfer.hypo

loading archive file models/
loading archive file BioTLDR-Data/BioTLDR-A-bin
| [source] dictionary: 50264 types
| [target] dictionary: 50264 types
Time to run script: 59.78709292411804 sec


0it [00:00, ?it/s]32it [00:09,  3.21it/s]64it [00:17,  3.47it/s]96it [00:25,  3.56it/s]128it [00:33,  3.75it/s]152it [00:33,  4.56it/s]


In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart_ao_domain_transfer.hypo BioTLDR-Data/BioTLDR-A/test.jsonl --workers 1

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.16667 (95%-conf.int. 0.16667 - 0.16667)
1 ROUGE-1 Average_P: 0.41176 (95%-conf.int. 0.41176 - 0.41176)
1 ROUGE-1 Average_F: 0.23729 (95%-conf.int. 0.23729 - 0.23729)
---------------------------------------------
1 ROUGE-2 Average_R: 0.02439 (95%-conf.int. 0.02439 - 0.02439)
1 ROUGE-2 Average_P: 0.06250 (95%-conf.int. 0.06250 - 0.06250)
1 ROUGE-2 Average_F: 0.03509 (95%-conf.int. 0.03509 - 0.03509)
---------------------------------------------
1 ROUGE-L Average_R: 0.14286 (95%-conf.int. 0.14286 - 0.14286)
1 ROUGE-L Average_P: 0.35294 (95%-conf.int. 0.35294 - 0.35294)
1 ROUGE-L Average_F: 0.20339 (95%-conf.int. 0.20339 - 0.20339)

Elapsed time: 0.132 seconds
Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.21053 (95%-conf.int. 0.21053 - 0.21053)
1 ROUGE-1 Average_P: 0.38095 (95%-conf.in

  0%|          | 0/153 [00:00<?, ?it/s]  1%|          | 1/153 [00:00<00:20,  7.48it/s]  2%|▏         | 3/153 [00:00<00:18,  8.19it/s]  3%|▎         | 5/153 [00:00<00:16,  8.73it/s]  5%|▍         | 7/153 [00:00<00:15,  9.21it/s]  6%|▌         | 9/153 [00:00<00:14,  9.66it/s]  7%|▋         | 11/153 [00:01<00:14,  9.82it/s]  8%|▊         | 13/153 [00:01<00:13, 10.13it/s] 10%|▉         | 15/153 [00:01<00:13, 10.40it/s] 11%|█         | 17/153 [00:01<00:13, 10.38it/s] 12%|█▏        | 19/153 [00:01<00:12, 10.50it/s] 14%|█▎        | 21/153 [00:02<00:12, 10.51it/s] 15%|█▌        | 23/153 [00:02<00:12, 10.63it/s] 16%|█▋        | 25/153 [00:02<00:11, 10.78it/s] 18%|█▊        | 27/153 [00:02<00:11, 10.80it/s] 19%|█▉        | 29/153 [00:02<00:11, 10.79it/s] 20%|██        | 31/153 [00:02<00:11, 10.79it/s] 22%|██▏       | 33/153 [00:03<00:11, 10.69it/s] 23%|██▎       | 35/153 [00:03<00:10, 10.77it/s] 24%|██▍       | 37/153 [00:03<00:10, 10.79it/s] 25%|██▌       | 39/153 [00:03<00

### BART-large ▶ fine-tuned on SciTLDR (abstract, introduction, conclusion):

In [None]:
! gsutil cp gs://skiff-models/scitldr/bart.tldr-aic.pt biotldr/models/bart.tldr-aic.pt

Copying gs://skiff-models/scitldr/bart.tldr-aic.pt...
| [1 files][  3.8 GiB/  3.8 GiB]   43.4 MiB/s                                   
Operation completed over 1 objects/3.8 GiB.                                      


In [None]:
%%bash

cd biotldr/
cp -r SciTLDR-Data/SciTLDR-AIC-bin/ BioTLDR-Data/BioTLDR-AIC-bin/
python scripts/generate.py models/ BioTLDR-Data/BioTLDR-AIC ./outputs/ --checkpoint_file bart.tldr-aic.pt --beam 2 --lenpen 0.2 --test_fname bart_aic_domain_transfer.hypo

loading archive file models/
loading archive file BioTLDR-Data/BioTLDR-AIC-bin
| [source] dictionary: 50264 types
| [target] dictionary: 50264 types
Time to run script: 46.255047082901 sec


0it [00:00, ?it/s]32it [00:05,  5.39it/s]64it [00:11,  5.44it/s]96it [00:17,  5.50it/s]128it [00:22,  5.60it/s]151it [00:22,  6.61it/s]


In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart_aic_domain_transfer.hypo BioTLDR-Data/BioTLDR-AIC/test.jsonl --workers 1

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.16667 (95%-conf.int. 0.16667 - 0.16667)
1 ROUGE-1 Average_P: 0.33333 (95%-conf.int. 0.33333 - 0.33333)
1 ROUGE-1 Average_F: 0.22222 (95%-conf.int. 0.22222 - 0.22222)
---------------------------------------------
1 ROUGE-2 Average_R: 0.02439 (95%-conf.int. 0.02439 - 0.02439)
1 ROUGE-2 Average_P: 0.05000 (95%-conf.int. 0.05000 - 0.05000)
1 ROUGE-2 Average_F: 0.03279 (95%-conf.int. 0.03279 - 0.03279)
---------------------------------------------
1 ROUGE-L Average_R: 0.09524 (95%-conf.int. 0.09524 - 0.09524)
1 ROUGE-L Average_P: 0.19048 (95%-conf.int. 0.19048 - 0.19048)
1 ROUGE-L Average_F: 0.12699 (95%-conf.int. 0.12699 - 0.12699)

Elapsed time: 0.094 seconds
Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.21053 (95%-conf.int. 0.21053 - 0.21053)
1 ROUGE-1 Average_P: 0.34783 (95%-conf.in

  0%|          | 0/152 [00:00<?, ?it/s]  1%|▏         | 2/152 [00:00<00:14, 10.28it/s]  3%|▎         | 4/152 [00:00<00:14, 10.33it/s]  4%|▍         | 6/152 [00:00<00:13, 10.43it/s]  5%|▍         | 7/152 [00:00<00:14, 10.26it/s]  6%|▌         | 9/152 [00:00<00:13, 10.40it/s]  7%|▋         | 11/152 [00:01<00:13, 10.47it/s]  9%|▊         | 13/152 [00:01<00:13, 10.47it/s] 10%|▉         | 15/152 [00:01<00:13, 10.49it/s] 11%|█         | 17/152 [00:01<00:12, 10.43it/s] 12%|█▎        | 19/152 [00:01<00:12, 10.54it/s] 14%|█▍        | 21/152 [00:01<00:12, 10.64it/s] 15%|█▌        | 23/152 [00:02<00:12, 10.72it/s] 16%|█▋        | 25/152 [00:02<00:11, 10.77it/s] 18%|█▊        | 27/152 [00:02<00:11, 10.70it/s] 19%|█▉        | 29/152 [00:02<00:11, 10.55it/s] 20%|██        | 31/152 [00:02<00:11, 10.61it/s] 22%|██▏       | 33/152 [00:03<00:11, 10.65it/s] 23%|██▎       | 35/152 [00:03<00:11, 10.56it/s] 24%|██▍       | 37/152 [00:03<00:10, 10.60it/s] 26%|██▌       | 39/152 [00:03<00

### BART-large ▶ fine-tuned on XSum ▶ fine-tuned on SciTLDR (abstract only):


In [None]:
! gsutil cp gs://skiff-models/scitldr/bart-xsum.tldr-ao.pt biotldr/models/bart-xsum.tldr-ao.pt

Copying gs://skiff-models/scitldr/bart-xsum.tldr-ao.pt...
- [1 files][  3.8 GiB/  3.8 GiB]   66.3 MiB/s                                   
Operation completed over 1 objects/3.8 GiB.                                      


In [None]:
%%bash

cd biotldr/
cp -r SciTLDR-Data/SciTLDR-A-bin/ BioTLDR-Data/BioTLDR-A-bin/
python scripts/generate.py models/ BioTLDR-Data/BioTLDR-A ./outputs/ --checkpoint_file bart-xsum.tldr-ao.pt --beam 2 --lenpen 0.8 --test_fname bart_xsum_ao_domain_transfer.hypo

loading archive file models/
loading archive file BioTLDR-Data/BioTLDR-A-bin
| [source] dictionary: 50264 types
| [target] dictionary: 50264 types
Time to run script: 39.34453225135803 sec


0it [00:00, ?it/s]32it [00:04,  6.82it/s]64it [00:08,  7.44it/s]96it [00:11,  7.69it/s]128it [00:15,  8.21it/s]152it [00:15, 10.00it/s]


In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart_xsum_ao_domain_transfer.hypo BioTLDR-Data/BioTLDR-A/test.jsonl --workers 1

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.19048 (95%-conf.int. 0.19048 - 0.19048)
1 ROUGE-1 Average_P: 0.33333 (95%-conf.int. 0.33333 - 0.33333)
1 ROUGE-1 Average_F: 0.24243 (95%-conf.int. 0.24243 - 0.24243)
---------------------------------------------
1 ROUGE-2 Average_R: 0.02439 (95%-conf.int. 0.02439 - 0.02439)
1 ROUGE-2 Average_P: 0.04348 (95%-conf.int. 0.04348 - 0.04348)
1 ROUGE-2 Average_F: 0.03125 (95%-conf.int. 0.03125 - 0.03125)
---------------------------------------------
1 ROUGE-L Average_R: 0.09524 (95%-conf.int. 0.09524 - 0.09524)
1 ROUGE-L Average_P: 0.16667 (95%-conf.int. 0.16667 - 0.16667)
1 ROUGE-L Average_F: 0.12121 (95%-conf.int. 0.12121 - 0.12121)

Elapsed time: 0.095 seconds
Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.21053 (95%-conf.int. 0.21053 - 0.21053)
1 ROUGE-1 Average_P: 0.34783 (95%-conf.in

  0%|          | 0/153 [00:00<?, ?it/s]  1%|▏         | 2/153 [00:00<00:14, 10.68it/s]  3%|▎         | 4/153 [00:00<00:13, 10.71it/s]  4%|▍         | 6/153 [00:00<00:13, 10.58it/s]  5%|▌         | 8/153 [00:00<00:13, 10.65it/s]  7%|▋         | 10/153 [00:00<00:13, 10.72it/s]  8%|▊         | 12/153 [00:01<00:13, 10.83it/s]  9%|▉         | 14/153 [00:01<00:12, 10.82it/s] 10%|█         | 16/153 [00:01<00:12, 10.89it/s] 12%|█▏        | 18/153 [00:01<00:12, 10.73it/s] 13%|█▎        | 20/153 [00:01<00:12, 10.81it/s] 14%|█▍        | 22/153 [00:02<00:12, 10.84it/s] 16%|█▌        | 24/153 [00:02<00:11, 10.88it/s] 17%|█▋        | 26/153 [00:02<00:11, 10.90it/s] 18%|█▊        | 28/153 [00:02<00:11, 10.66it/s] 20%|█▉        | 30/153 [00:02<00:11, 10.76it/s] 21%|██        | 32/153 [00:02<00:11, 10.87it/s] 22%|██▏       | 34/153 [00:03<00:10, 10.87it/s] 24%|██▎       | 36/153 [00:03<00:10, 10.89it/s] 25%|██▍       | 38/153 [00:03<00:10, 10.97it/s] 26%|██▌       | 40/153 [00:03<0

### BART-large ▶ fine-tuned on XSum ▶ fine-tuned on SciTLDR (abstract, introduction, conclusion):

In [None]:
! gsutil cp gs://skiff-models/scitldr/bart-xsum.tldr-aic.pt biotldr/models/bart-xsum.tldr-aic.pt

Copying gs://skiff-models/scitldr/bart-xsum.tldr-aic.pt...
\ [1 files][  3.8 GiB/  3.8 GiB]   91.4 MiB/s                                   
Operation completed over 1 objects/3.8 GiB.                                      


In [None]:
%%bash

cd biotldr/
cp -r SciTLDR-Data/SciTLDR-AIC-bin/ BioTLDR-Data/BioTLDR-AIC-bin/
python scripts/generate.py models/ BioTLDR-Data/BioTLDR-AIC ./outputs/ --checkpoint_file bart-xsum.tldr-aic.pt --beam 5 --lenpen 0.8 --test_fname bart_xsum_aic_domain_transfer.hypo

loading archive file models/
loading archive file BioTLDR-Data/BioTLDR-AIC-bin
| [source] dictionary: 50264 types
| [target] dictionary: 50264 types
Time to run script: 68.50383019447327 sec


0it [00:00, ?it/s]32it [00:10,  3.09it/s]64it [00:20,  3.10it/s]96it [00:30,  3.10it/s]128it [00:40,  3.13it/s]151it [00:40,  3.69it/s]


In [None]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/bart_xsum_aic_domain_transfer.hypo BioTLDR-Data/BioTLDR-AIC/test.jsonl --workers 1

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.16667 (95%-conf.int. 0.16667 - 0.16667)
1 ROUGE-1 Average_P: 0.41176 (95%-conf.int. 0.41176 - 0.41176)
1 ROUGE-1 Average_F: 0.23729 (95%-conf.int. 0.23729 - 0.23729)
---------------------------------------------
1 ROUGE-2 Average_R: 0.02439 (95%-conf.int. 0.02439 - 0.02439)
1 ROUGE-2 Average_P: 0.06250 (95%-conf.int. 0.06250 - 0.06250)
1 ROUGE-2 Average_F: 0.03509 (95%-conf.int. 0.03509 - 0.03509)
---------------------------------------------
1 ROUGE-L Average_R: 0.14286 (95%-conf.int. 0.14286 - 0.14286)
1 ROUGE-L Average_P: 0.35294 (95%-conf.int. 0.35294 - 0.35294)
1 ROUGE-L Average_F: 0.20339 (95%-conf.int. 0.20339 - 0.20339)

Elapsed time: 0.085 seconds
Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.13158 (95%-conf.int. 0.13158 - 0.13158)
1 ROUGE-1 Average_P: 0.27778 (95%-conf.in

  0%|          | 0/152 [00:00<?, ?it/s]  1%|▏         | 2/152 [00:00<00:13, 11.27it/s]  3%|▎         | 4/152 [00:00<00:12, 11.44it/s]  4%|▍         | 6/152 [00:00<00:12, 11.53it/s]  5%|▌         | 8/152 [00:00<00:12, 11.46it/s]  7%|▋         | 10/152 [00:00<00:12, 11.61it/s]  8%|▊         | 12/152 [00:01<00:11, 11.69it/s]  9%|▉         | 14/152 [00:01<00:11, 11.77it/s] 11%|█         | 16/152 [00:01<00:11, 11.80it/s] 12%|█▏        | 18/152 [00:01<00:11, 11.76it/s] 13%|█▎        | 20/152 [00:01<00:11, 11.66it/s] 14%|█▍        | 22/152 [00:01<00:11, 11.76it/s] 16%|█▌        | 24/152 [00:02<00:10, 11.82it/s] 17%|█▋        | 26/152 [00:02<00:10, 11.88it/s] 18%|█▊        | 28/152 [00:02<00:10, 11.88it/s] 20%|█▉        | 30/152 [00:02<00:10, 11.84it/s] 21%|██        | 32/152 [00:02<00:10, 11.73it/s] 22%|██▏       | 34/152 [00:02<00:10, 11.79it/s] 24%|██▎       | 36/152 [00:03<00:09, 11.88it/s] 25%|██▌       | 38/152 [00:03<00:09, 11.89it/s] 26%|██▋       | 40/152 [00:03<0

### BART-large ▶ fine-tuned on SciTLDR using CATTS framework (abstract only):

In [None]:
! gsutil cp gs://skiff-models/scitldr/catts.tldr-ao.pt biotldr/models/catts.tldr-ao.pt

Copying gs://skiff-models/scitldr/catts.tldr-ao.pt...
| [1 files][  3.8 GiB/  3.8 GiB]   47.0 MiB/s                                   
Operation completed over 1 objects/3.8 GiB.                                      


In [None]:
%%bash

cd biotldr/
cp -r SciTLDR-Data/SciTLDR-A/ctrl-bin/ BioTLDR-Data/BioTLDR-A/ctrl-bin/
python scripts/generate.py models/ BioTLDR-Data/BioTLDR-A/ctrl ./outputs/ --checkpoint_file catts.tldr-ao.pt --beam 2 --lenpen 0.4 --test_fname catts_ao_domain_transfer.hypo

loading archive file models/
loading archive file BioTLDR-Data/BioTLDR-A/ctrl-bin
| [source] dictionary: 50264 types
| [target] dictionary: 50264 types
Time to run script: 41.834041118621826 sec


0B [00:00, ?B/s]4303B [00:00, 21893.48B/s]56709B [00:00, 30255.01B/s]126260B [00:00, 41686.44B/s]297351B [00:00, 58395.43B/s]613571B [00:00, 82117.89B/s]1042301B [00:01, 1017566.28B/s]
0B [00:00, ?B/s]5593B [00:00, 29231.28B/s]57955B [00:00, 39913.45B/s]127411B [00:00, 54402.86B/s]284054B [00:00, 75575.96B/s]440665B [00:00, 103822.33B/s]456318B [00:00, 472602.10B/s]
0it [00:00, ?it/s]32it [00:04,  6.81it/s]64it [00:08,  7.44it/s]96it [00:11,  7.70it/s]128it [00:15,  8.28it/s]152it [00:15, 10.09it/s]


In [61]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/catts_ao_domain_transfer.hypo BioTLDR-Data/BioTLDR-A/test.jsonl --workers 1

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.19048 (95%-conf.int. 0.19048 - 0.19048)
1 ROUGE-1 Average_P: 0.33333 (95%-conf.int. 0.33333 - 0.33333)
1 ROUGE-1 Average_F: 0.24243 (95%-conf.int. 0.24243 - 0.24243)
---------------------------------------------
1 ROUGE-2 Average_R: 0.02439 (95%-conf.int. 0.02439 - 0.02439)
1 ROUGE-2 Average_P: 0.04348 (95%-conf.int. 0.04348 - 0.04348)
1 ROUGE-2 Average_F: 0.03125 (95%-conf.int. 0.03125 - 0.03125)
---------------------------------------------
1 ROUGE-L Average_R: 0.09524 (95%-conf.int. 0.09524 - 0.09524)
1 ROUGE-L Average_P: 0.16667 (95%-conf.int. 0.16667 - 0.16667)
1 ROUGE-L Average_F: 0.12121 (95%-conf.int. 0.12121 - 0.12121)

Elapsed time: 0.101 seconds
Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.21053 (95%-conf.int. 0.21053 - 0.21053)
1 ROUGE-1 Average_P: 0.34783 (95%-conf.in

  0%|          | 0/153 [00:00<?, ?it/s]  1%|          | 1/153 [00:00<00:15,  9.71it/s]  2%|▏         | 3/153 [00:00<00:14, 10.00it/s]  3%|▎         | 5/153 [00:00<00:14, 10.19it/s]  5%|▍         | 7/153 [00:00<00:14, 10.42it/s]  5%|▌         | 8/153 [00:00<00:14, 10.26it/s]  7%|▋         | 10/153 [00:00<00:13, 10.38it/s]  8%|▊         | 12/153 [00:01<00:13, 10.58it/s]  9%|▉         | 14/153 [00:01<00:13, 10.67it/s] 10%|█         | 16/153 [00:01<00:12, 10.81it/s] 12%|█▏        | 18/153 [00:01<00:12, 10.75it/s] 13%|█▎        | 20/153 [00:01<00:12, 10.70it/s] 14%|█▍        | 22/153 [00:02<00:12, 10.69it/s] 16%|█▌        | 24/153 [00:02<00:12, 10.75it/s] 17%|█▋        | 26/153 [00:02<00:11, 10.75it/s] 18%|█▊        | 28/153 [00:02<00:11, 10.79it/s] 20%|█▉        | 30/153 [00:02<00:11, 10.75it/s] 21%|██        | 32/153 [00:02<00:11, 10.82it/s] 22%|██▏       | 34/153 [00:03<00:11, 10.81it/s] 24%|██▎       | 36/153 [00:03<00:10, 10.84it/s] 25%|██▍       | 38/153 [00:03<00

### BART-large ▶ fine-tuned on SciTLDR using CATTS framework (abstract, introduction, conclusion):

In [None]:
! gsutil cp gs://skiff-models/scitldr/catts.tldr-aic.pt biotldr/models/catts.tldr-aic.pt

Copying gs://skiff-models/scitldr/catts.tldr-aic.pt...
\ [1 files][  3.8 GiB/  3.8 GiB]   23.2 MiB/s                                   
Operation completed over 1 objects/3.8 GiB.                                      


In [None]:
%%bash

cd biotldr/
cp -r SciTLDR-Data/SciTLDR-AIC/ctrl-bin/ BioTLDR-Data/BioTLDR-AIC/ctrl-bin/
python scripts/generate.py models/ BioTLDR-Data/BioTLDR-AIC/ctrl ./outputs/ --checkpoint_file catts.tldr-aic.pt --beam 4 --lenpen 0.4 --test_fname catts_aic_domain_transfer.hypo

loading archive file models/
loading archive file BioTLDR-Data/BioTLDR-AIC/ctrl-bin
| [source] dictionary: 50264 types
| [target] dictionary: 50264 types
Time to run script: 58.41555333137512 sec


0it [00:00, ?it/s]32it [00:08,  3.75it/s]64it [00:16,  3.78it/s]96it [00:24,  3.84it/s]128it [00:32,  3.91it/s]151it [00:32,  4.62it/s]


In [62]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/catts_aic_domain_transfer.hypo BioTLDR-Data/BioTLDR-AIC/test.jsonl --workers 1

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.19048 (95%-conf.int. 0.19048 - 0.19048)
1 ROUGE-1 Average_P: 0.33333 (95%-conf.int. 0.33333 - 0.33333)
1 ROUGE-1 Average_F: 0.24243 (95%-conf.int. 0.24243 - 0.24243)
---------------------------------------------
1 ROUGE-2 Average_R: 0.02439 (95%-conf.int. 0.02439 - 0.02439)
1 ROUGE-2 Average_P: 0.04348 (95%-conf.int. 0.04348 - 0.04348)
1 ROUGE-2 Average_F: 0.03125 (95%-conf.int. 0.03125 - 0.03125)
---------------------------------------------
1 ROUGE-L Average_R: 0.09524 (95%-conf.int. 0.09524 - 0.09524)
1 ROUGE-L Average_P: 0.16667 (95%-conf.int. 0.16667 - 0.16667)
1 ROUGE-L Average_F: 0.12121 (95%-conf.int. 0.12121 - 0.12121)

Elapsed time: 0.094 seconds
Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.07895 (95%-conf.int. 0.07895 - 0.07895)
1 ROUGE-1 Average_P: 0.33333 (95%-conf.in

  0%|          | 0/152 [00:00<?, ?it/s]  1%|▏         | 2/152 [00:00<00:14, 10.58it/s]  3%|▎         | 4/152 [00:00<00:13, 10.64it/s]  4%|▍         | 6/152 [00:00<00:13, 10.65it/s]  5%|▌         | 8/152 [00:00<00:13, 10.71it/s]  7%|▋         | 10/152 [00:00<00:13, 10.80it/s]  8%|▊         | 12/152 [00:01<00:12, 10.82it/s]  9%|▉         | 14/152 [00:01<00:12, 10.79it/s] 11%|█         | 16/152 [00:01<00:12, 10.72it/s] 12%|█▏        | 18/152 [00:01<00:12, 10.79it/s] 13%|█▎        | 20/152 [00:01<00:12, 10.85it/s] 14%|█▍        | 22/152 [00:02<00:12, 10.74it/s] 16%|█▌        | 24/152 [00:02<00:11, 10.68it/s] 17%|█▋        | 26/152 [00:02<00:11, 10.59it/s] 18%|█▊        | 28/152 [00:02<00:11, 10.43it/s] 20%|█▉        | 30/152 [00:02<00:11, 10.48it/s] 21%|██        | 32/152 [00:03<00:11, 10.50it/s] 22%|██▏       | 34/152 [00:03<00:11, 10.54it/s] 24%|██▎       | 36/152 [00:03<00:10, 10.59it/s] 25%|██▌       | 38/152 [00:03<00:10, 10.43it/s] 26%|██▋       | 40/152 [00:03<0

### BART-large ▶ fine-tuned on XSum ▶ fine-tuned on SciTLDR using CATTS framework (abstract only):

In [None]:
! gsutil cp gs://skiff-models/scitldr/catts-xsum.tldr-ao.pt biotldr/models/catts-xsum.tldr-ao.pt

Copying gs://skiff-models/scitldr/catts-xsum.tldr-ao.pt...
- [1 files][  3.8 GiB/  3.8 GiB]   45.7 MiB/s                                   
Operation completed over 1 objects/3.8 GiB.                                      


In [None]:
%%bash

cd biotldr/
cp -r SciTLDR-Data/SciTLDR-A/ctrl-bin/ BioTLDR-Data/BioTLDR-A/ctrl-bin/
python scripts/generate.py models/ BioTLDR-Data/BioTLDR-A/ctrl ./outputs/ --checkpoint_file catts-xsum.tldr-ao.pt --beam 4 --lenpen 0.2 --test_fname catts_xsum_ao_domain_transfer.hypo

loading archive file models/
loading archive file BioTLDR-Data/BioTLDR-A/ctrl-bin
| [source] dictionary: 50264 types
| [target] dictionary: 50264 types
Time to run script: 49.01143288612366 sec


0it [00:00, ?it/s]32it [00:07,  4.44it/s]64it [00:12,  4.81it/s]96it [00:18,  4.92it/s]128it [00:23,  5.23it/s]152it [00:23,  6.34it/s]


In [63]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/catts_xsum_ao_domain_transfer.hypo BioTLDR-Data/BioTLDR-A/test.jsonl --workers 1

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.19048 (95%-conf.int. 0.19048 - 0.19048)
1 ROUGE-1 Average_P: 0.33333 (95%-conf.int. 0.33333 - 0.33333)
1 ROUGE-1 Average_F: 0.24243 (95%-conf.int. 0.24243 - 0.24243)
---------------------------------------------
1 ROUGE-2 Average_R: 0.02439 (95%-conf.int. 0.02439 - 0.02439)
1 ROUGE-2 Average_P: 0.04348 (95%-conf.int. 0.04348 - 0.04348)
1 ROUGE-2 Average_F: 0.03125 (95%-conf.int. 0.03125 - 0.03125)
---------------------------------------------
1 ROUGE-L Average_R: 0.09524 (95%-conf.int. 0.09524 - 0.09524)
1 ROUGE-L Average_P: 0.16667 (95%-conf.int. 0.16667 - 0.16667)
1 ROUGE-L Average_F: 0.12121 (95%-conf.int. 0.12121 - 0.12121)

Elapsed time: 0.097 seconds
Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.13158 (95%-conf.int. 0.13158 - 0.13158)
1 ROUGE-1 Average_P: 0.29412 (95%-conf.in

  0%|          | 0/153 [00:00<?, ?it/s]  1%|▏         | 2/153 [00:00<00:14, 10.54it/s]  2%|▏         | 3/153 [00:00<00:14, 10.37it/s]  3%|▎         | 5/153 [00:00<00:14, 10.39it/s]  5%|▍         | 7/153 [00:00<00:13, 10.47it/s]  6%|▌         | 9/153 [00:00<00:13, 10.62it/s]  7%|▋         | 11/153 [00:01<00:13, 10.61it/s]  8%|▊         | 13/153 [00:01<00:13, 10.75it/s] 10%|▉         | 15/153 [00:01<00:12, 10.67it/s] 11%|█         | 17/153 [00:01<00:12, 10.67it/s] 12%|█▏        | 19/153 [00:01<00:12, 10.81it/s] 14%|█▎        | 21/153 [00:01<00:12, 10.77it/s] 15%|█▌        | 23/153 [00:02<00:11, 10.85it/s] 16%|█▋        | 25/153 [00:02<00:11, 10.76it/s] 18%|█▊        | 27/153 [00:02<00:11, 10.81it/s] 19%|█▉        | 29/153 [00:02<00:11, 10.84it/s] 20%|██        | 31/153 [00:02<00:11, 10.84it/s] 22%|██▏       | 33/153 [00:03<00:11, 10.89it/s] 23%|██▎       | 35/153 [00:03<00:10, 10.90it/s] 24%|██▍       | 37/153 [00:03<00:10, 10.74it/s] 25%|██▌       | 39/153 [00:03<00

### BART-large ▶ fine-tuned on XSum ▶ fine-tuned on SciTLDR using CATTS framework (abstract, introduction, conclusion):

In [None]:
! gsutil cp gs://skiff-models/scitldr/catts-xsum.tldr-aic.pt biotldr/models/catts-xsum.tldr-aic.pt

Copying gs://skiff-models/scitldr/catts-xsum.tldr-aic.pt...
- [1 files][  3.8 GiB/  3.8 GiB]   46.5 MiB/s                                   
Operation completed over 1 objects/3.8 GiB.                                      


In [None]:
%%bash

cd biotldr/
cp -r SciTLDR-Data/SciTLDR-AIC/ctrl-bin/ BioTLDR-Data/BioTLDR-AIC/ctrl-bin/
python scripts/generate.py models/ BioTLDR-Data/BioTLDR-AIC/ctrl ./outputs/ --checkpoint_file catts-xsum.tldr-aic.pt --beam 2 --lenpen 0.2 --test_fname catts_xsum_aic_domain_transfer.hypo

loading archive file models/
loading archive file BioTLDR-Data/BioTLDR-AIC/ctrl-bin
| [source] dictionary: 50264 types
| [target] dictionary: 50264 types
Time to run script: 46.40085506439209 sec


0it [00:00, ?it/s]32it [00:05,  5.53it/s]64it [00:11,  5.57it/s]96it [00:16,  5.64it/s]128it [00:22,  5.74it/s]151it [00:22,  6.77it/s]


In [64]:
%%bash 

cd biotldr/
python scripts/cal-rouge.py outputs/catts_xsum_aic_domain_transfer.hypo BioTLDR-Data/BioTLDR-AIC/test.jsonl --workers 1

Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.21429 (95%-conf.int. 0.21429 - 0.21429)
1 ROUGE-1 Average_P: 0.40909 (95%-conf.int. 0.40909 - 0.40909)
1 ROUGE-1 Average_F: 0.28125 (95%-conf.int. 0.28125 - 0.28125)
---------------------------------------------
1 ROUGE-2 Average_R: 0.02439 (95%-conf.int. 0.02439 - 0.02439)
1 ROUGE-2 Average_P: 0.04762 (95%-conf.int. 0.04762 - 0.04762)
1 ROUGE-2 Average_F: 0.03226 (95%-conf.int. 0.03226 - 0.03226)
---------------------------------------------
1 ROUGE-L Average_R: 0.11905 (95%-conf.int. 0.11905 - 0.11905)
1 ROUGE-L Average_P: 0.22727 (95%-conf.int. 0.22727 - 0.22727)
1 ROUGE-L Average_F: 0.15625 (95%-conf.int. 0.15625 - 0.15625)

Elapsed time: 0.097 seconds
Preparing documents... 0 line(s) ignored
Running ROUGE...
---------------------------------------------
1 ROUGE-1 Average_R: 0.07895 (95%-conf.int. 0.07895 - 0.07895)
1 ROUGE-1 Average_P: 0.60000 (95%-conf.in

  0%|          | 0/152 [00:00<?, ?it/s]  1%|▏         | 2/152 [00:00<00:14, 10.59it/s]  3%|▎         | 4/152 [00:00<00:13, 10.62it/s]  4%|▍         | 6/152 [00:00<00:13, 10.59it/s]  5%|▌         | 8/152 [00:00<00:13, 10.66it/s]  7%|▋         | 10/152 [00:00<00:13, 10.66it/s]  8%|▊         | 12/152 [00:01<00:13, 10.61it/s]  9%|▉         | 14/152 [00:01<00:12, 10.63it/s] 11%|█         | 16/152 [00:01<00:12, 10.69it/s] 12%|█▏        | 18/152 [00:01<00:12, 10.73it/s] 13%|█▎        | 20/152 [00:01<00:12, 10.74it/s] 14%|█▍        | 22/152 [00:02<00:12, 10.67it/s] 16%|█▌        | 24/152 [00:02<00:11, 10.76it/s] 17%|█▋        | 26/152 [00:02<00:11, 10.80it/s] 18%|█▊        | 28/152 [00:02<00:11, 10.79it/s] 20%|█▉        | 30/152 [00:02<00:11, 10.85it/s] 21%|██        | 32/152 [00:02<00:10, 10.93it/s] 22%|██▏       | 34/152 [00:03<00:10, 10.79it/s] 24%|██▎       | 36/152 [00:03<00:10, 10.82it/s] 25%|██▌       | 38/152 [00:03<00:10, 10.89it/s] 26%|██▋       | 40/152 [00:03<0

## Human Evaluation

In [None]:
! pip install xlsxwriter
! pip install xlwt



In [1]:
from os.path import join
import pandas as pd

def prepare_human_evaluation_sheet(gen_fname, data_dir, exp, excel_writer):
  with open(gen_fname, 'r') as file:
    generated_TLDRs = [line.strip() for line in file.readlines()]
  with open(join(data_dir, 'test.source')) as file:
    inputs = [line.strip() for line in file.readlines()]
  with open(join(data_dir, 'test.target')) as file:
    gold_TLDRs = [line.strip() for line in file.readlines()]

  human_eval_dict = {
      'predicted': generated_TLDRs,
      'gold': gold_TLDRs,
      'input': inputs
  }

  df = pd.DataFrame.from_dict(human_eval_dict)
  df.to_excel(excel_writer, sheet_name=exp, index=False)

In [None]:
writer = pd.ExcelWriter('human-evaluation.xlsx', engine='xlsxwriter')

prepare_human_evaluation_sheet('biotldr/outputs/catts_ao_domain_transfer.hypo',
                                'biotldr/BioTLDR-Data/BioTLDR-A/',
                                'catts-ao',
                                writer)
prepare_human_evaluation_sheet('biotldr/outputs/catts_aic_domain_transfer.hypo',
                                'biotldr/BioTLDR-Data/BioTLDR-AIC/',
                                'catts-aic',
                                writer)
prepare_human_evaluation_sheet('biotldr/outputs/catts_xsum_ao_domain_transfer.hypo',
                                'biotldr/BioTLDR-Data/BioTLDR-A/',
                                'catts-xsum-ao',
                                writer)
prepare_human_evaluation_sheet('biotldr/outputs/catts_xsum_aic_domain_transfer.hypo',
                                'biotldr/BioTLDR-Data/BioTLDR-AIC/',
                                'catts-xsum-aic',
                                writer)

writer.save()