# Load the generation results

## Original BART-SWiPE models

In [1]:
import pandas as pd

# BART-SWiPE : generation results 
df_bs_swipe_clean = pd.read_csv("../data/gen_predictions/predictions_bart-large-swipe-bart-tokenizer-fast-adjust-num-beams4-no-use-fast!_swipe-clean-test.csv")
df_bs_asset = pd.read_csv("../data/gen_predictions/predictions_bart-large-swipe_asset-test.csv")


# BART-SWiPE-cleaned: generation results 
df_bs_clean_swipe_clean = pd.read_csv("../data/gen_predictions/predictions_bart-large-swipe-clean-bart-tokenizer-fast-adjust-num-beams4-no-use-fast!_swipe-clean-test.csv")
df_bs_clean_asset = pd.read_csv("../data/gen_predictions/predictions_bart-large-swipe-clean_asset-test.csv")

In [2]:
df_bs_swipe_clean.head()

Unnamed: 0,text,prediction
0,The term jargon may have the following meaning...,Jargon is a word that can mean a few different...
1,"Russian (Russkij yazyk, Русский язык) is the m...","Russian (Russkij yazyk, Русский язык) is the m..."
2,"Great Britain, also called Britain, is an isla...","Great Britain, also called Britain, is an isla..."
3,"Transport, or transportation (as it is called ...","Transport, or transportation (as it is called ..."
4,Stockholm (help·info) (IPA: ['stɔkhɔlm]; UN/LO...,Stockholm is the capital city of Sweden. It ha...


In [11]:
df_bs_asset.head()

Unnamed: 0,text,prediction
0,One side of the armed conflicts is composed ma...,"The Sudanese military and the Janjaweed, a Sud..."
1,"Jeddah is the principal gateway to Mecca, Isla...","Jeddah is the main gateway to Mecca, Islam's h..."
2,The Great Dark Spot is thought to represent a ...,The Great Dark Spot is thought to represent a ...
3,"His next work, Saturday, follows an especially...","In medicine, Saturday is the name of the next ..."
4,"The tarantula, the trickster character, spun a...","A tarantula, the trickster character, spun a b..."


## Fine-tuned BART models

In [40]:
import pandas as pd

# BART-SWiPE-ft full : generation results 
#df_bsft_full_swipe_full = pd.read_csv("../data/gen_predictions/predictions_bart-swipe-ft-full_swipe-full-test.csv")
df_bsft_full_swipe_clean = pd.read_csv("../data/gen_predictions/predictions_bart-swipe-ft-full_swipe-clean-test.csv")
df_bsft_full_asset = pd.read_csv("../data/gen_predictions/predictions_bart-swipe-ft-full_asset-test.csv")

# BART-SWiPE-ft cleaned: generation results 
df_bsft_clean_swipe_clean = pd.read_csv("../data/gen_predictions/predictions_bart-swipe-ft-clean-bart-tokenizer-512_swipe-clean-test.csv")
df_bsft_clean_asset = pd.read_csv("../data/gen_predictions/predictions_bart-swipe-ft-clean-bart-tokenizer-512_asset-test.csv")

# Load test datasets

## SWiPE - full

In [2]:
from datasets import Dataset, DatasetDict
import pandas as pd

test_id_df = pd.read_json('../data/swipe/swipe_test_id.json')
test_id_dataset = Dataset.from_pandas(test_id_df[['r_content', 's_content','annotations','edits']])

swipe_full_dataset = DatasetDict({
    'test': test_id_dataset,
})

swipe_full_dataset

DatasetDict({
    test: Dataset({
        features: ['r_content', 's_content', 'annotations', 'edits'],
        num_rows: 484
    })
})

## SWiPE - clean 

In [2]:
from datasets import load_from_disk, DatasetDict
import pandas as pd

swipe_clean_dataset = load_from_disk("../data/swipe_clean")
swipe_clean_dataset = DatasetDict({
    'test': swipe_clean_dataset['test_id']
})
swipe_clean_dataset

DatasetDict({
    test: Dataset({
        features: ['r_content', 's_content'],
        num_rows: 483
    })
})

## ASSET

In [41]:
from datasets import load_dataset

asset_dataset = load_dataset("facebook/asset", "simplification")
asset_dataset

DatasetDict({
    validation: Dataset({
        features: ['original', 'simplifications'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['original', 'simplifications'],
        num_rows: 359
    })
})

# Calculate scores

In [3]:
df_gen = df_bs_swipe_clean
dataset = swipe_clean_dataset

## SARI scores - EASSE package

### SWiPE

In [4]:
from tqdm.notebook import tqdm
from easse.sari import corpus_sari


sari_scores_easse = []

for index, row in tqdm(df_gen.iterrows(), total=len(df_gen)):
    r_content = row['text'] 
    s_content = dataset['test'][index]  
    prediction = row['prediction']  
    
    sari_score_easse = corpus_sari(
        orig_sents=[r_content],
        sys_sents=[prediction],
        refs_sents=[[s_content['s_content']]]
    )
    
    sari_scores_easse.append(sari_score_easse)

  0%|          | 0/483 [00:00<?, ?it/s]

### ASSET

In [43]:
from easse.sari import corpus_sari


sari_scores_easse = []

for index, row in tqdm(df_gen.iterrows(), total=len(df_gen)):
    r_content = row['text'] 
    s_content = dataset['test'][index]  
    prediction = row['prediction']  
    
    sari_score_easse = corpus_sari(
        orig_sents=[r_content],
        sys_sents=[prediction],
        refs_sents=[[simp] for simp in s_content['simplifications']]
    )
    
    sari_scores_easse.append(sari_score_easse)

  0%|          | 0/359 [00:00<?, ?it/s]

## Operation scores

### SWiPE

In [5]:
from easse.sari import get_corpus_sari_operation_scores
from tqdm.notebook import tqdm

add_scores = []
keep_scores = []
del_scores = []

for index, row in tqdm(df_gen.iterrows(), total=len(df_gen)):
    r_content = row['text'] 
    s_content = dataset['test'][index]  
    prediction = row['prediction']  
    
    add_score, keep_score, del_score = get_corpus_sari_operation_scores(
        orig_sents=[r_content],
        sys_sents=[prediction],
        refs_sents=[[s_content['s_content']]] 
    )
    
    add_scores.append(add_score)
    keep_scores.append(keep_score)
    del_scores.append(del_score)

  0%|          | 0/483 [00:00<?, ?it/s]

### ASSET

In [44]:
from easse.sari import get_corpus_sari_operation_scores
from tqdm.notebook import tqdm

add_scores = []
keep_scores = []
del_scores = []

for index, row in tqdm(df_gen.iterrows(), total=len(df_gen)):
    r_content = row['text'] 
    s_content = dataset['test'][index]  
    prediction = row['prediction']  
    
    add_score, keep_score, del_score = get_corpus_sari_operation_scores(
        orig_sents=[r_content],
        sys_sents=[prediction],
        refs_sents=[[simp] for simp in s_content['simplifications']] 
    )
    
    add_scores.append(add_score)
    keep_scores.append(keep_score)
    del_scores.append(del_score)

  0%|          | 0/359 [00:00<?, ?it/s]

# Save results

In [6]:
df_results = pd.DataFrame({
    'text': df_gen['text'],
    'prediction': df_gen['prediction'],
    'sari_score_easse': sari_scores_easse,
    'add_score': add_scores,
    'keep_score': keep_scores,
    'del_score': del_scores
})

df_results.head()

Unnamed: 0,text,prediction,sari_score_easse,add_score,keep_score,del_score
0,The term jargon may have the following meaning...,Jargon is a word that can mean many different ...,38.241908,7.894737,10.0,96.830986
1,"Russian (Russkij yazyk, Русский язык) is the m...","Russian (Russkij yazyk, Русский язык) is the m...",48.181582,0.0,61.326832,83.217914
2,"Great Britain, also called Britain, is an isla...","Great Britain, also called Britain, is an isla...",38.613451,20.625,16.718774,78.49658
3,"Transport, or transportation (as it is called ...","Transport, or transportation (as it is called ...",35.605954,0.0,23.963654,82.854207
4,Stockholm (help·info) (IPA: ['stɔkhɔlm]; UN/LO...,Stockholm is the capital city of Sweden. It is...,52.413688,37.276316,26.177545,93.787203


In [7]:
df_results.to_csv("../data/scores/scores_bart-large-swipe-bart-tokenizer-fast-adjust-num-beams4-no-use-fast!_swipe-clean-test.csv", index=False)