In [None]:
! pip install py-readability-metrics

import nltk
from readability import Readability

nltk.download('punkt_tab')

In [None]:
! pip install transformers

In [None]:
%%bash
git clone https://github.com/google-research/bleurt.git
cd bleurt
pip install .

In [None]:
!wget https://storage.googleapis.com/bleurt-oss-21/BLEURT-20.zip .
!unzip BLEURT-20.zip

In [None]:
from transformers import AutoTokenizer, pipeline

In [None]:
cefr_map = {'A1':0,'A2':1,'B1':2,'B2':3,'C1':4,'C2':5}
cefr_labeler = pipeline(task="text-classification",model="AbdullahBarayan/ModernBERT-base-doc_sent_en-Cefr",batch_size=16)

In [None]:
! git clone https://github.com/feralvam/easse.git

In [None]:
! mv easse/* .
! pip install -e .

In [None]:
!mv easse eaase2/

In [None]:
!mv eaase2/easse .

In [None]:
from easse import sari

In [None]:
! git clone https://github.com/ZurichNLP/BLESS.git

In [None]:
import pickle

In [None]:
! git clone https://github.com/rstodden/DEPlain.git

In [None]:
import csv
with open('/content/DEPlain/E__Sentence-level_Corpus/DEplain-web-sent/manual/open/test.csv') as csvfile:
    reader = csv.reader(csvfile)
    next(reader)
    deplain_src = [row[0] for row in reader]

with open('/content/DEPlain/E__Sentence-level_Corpus/DEplain-web-sent/manual/open/test.csv') as csvfile:
    reader = csv.reader(csvfile)
    next(reader)
    deplain_ref = [row[1] for row in reader]

deplain_sys = pickle.load(open('/content/DEPlain_tgt.pkl','rb'))


In [None]:
with open('/content/claramed_synt_simp_aligned.tsv') as csvfile:
    reader = csv.reader(csvfile,delimiter="\t")
    next(reader)
    claramed_src = [row[1] for row in reader]

with open('/content/claramed_synt_simp_aligned.tsv') as csvfile:
    reader = csv.reader(csvfile,delimiter="\t")
    next(reader)
    claramed_ref = [row[3] for row in reader]

claramed_sys = pickle.load(open('/content/CLARAMeD_tgt.pkl','rb'))

In [None]:
from bleurt import score

checkpoint = "BLEURT-20"
bleurt_scorer = score.BleurtScorer(checkpoint)

In [None]:
from tqdm.notebook import tqdm

def get_bleurt_score(refs,sys):
  scores = [bleurt_scorer.score(references=ref, candidates=sys, batch_size=32) for ref in tqdm(refs)]
  scores = [sum(s)/len(s) for s in scores]
  scores = sum(scores)/len(scores)
  return scores

def get_stats(text):
  r = Readability("\n".join(text))
  fkgl = r.flesch_kincaid()
  ari = r.ari()
  cefr_labels = cefr_labeler(text)
  cefr_labels = [cefr_map[cefr_label['label']] for cefr_label in cefr_labels]
  cefr = sum(cefr_labels)/len(cefr_labels)
  return (fkgl.score, cefr, ari.score)

def get_dataset_stats(src, refs, output):
  src_stats = get_stats(src)
  refs_stats = [get_stats(ref) for ref in refs]
  refs_stats = (sum([ref[0] for ref in refs_stats])/len(refs_stats), sum([ref[1] for ref in refs_stats])/len(refs_stats), sum([ref[2] for ref in refs_stats])/len(refs_stats))
  tgt_stats = get_stats(output)
  bleurt = get_bleurt_score(refs,output)
  sari_scores = sari.get_corpus_sari_operation_scores(src,output,refs)
  sari_agg = sari.corpus_sari(orig_sents=src,sys_sents=output,refs_sents=refs)

  print(src_stats[0], tgt_stats[0], refs_stats[0])
  print(src_stats[1], tgt_stats[1], refs_stats[1])
  print(src_stats[2], tgt_stats[2], refs_stats[2])
  print(sari_scores)
  print(sari_agg)
  print(bleurt)


In [None]:
# run get dataset stats with data from experiments using easse-sari data as src and references...
import pickle
asset_sys_tsse = pickle.load(open('/content/asset_tgt.pkl','rb'))

In [None]:
asset_src = open('/content/easse/resources/data/test_sets/asset/asset.test.orig','r').readlines()
asset_ref = [open('/content/easse/resources/data/test_sets/asset/asset.test.simp.%d' % i,'r').readlines() for i in range(10)]

In [None]:
import json
medEASi = open('/content/BLESS/model_outputs_and_evals/ground_truth/med-easi-test.jsonl','r').readlines()
medEASi_src = [json.loads(line)['source'] for line in medEASi]
medEASi_ref = [json.loads(line)['references'][0] for line in medEASi]
medEASi_sys_tsse = pickle.load(open('MedEASi_tgt.pkl', 'rb'))



In [None]:
#TSSE - ASSET
get_dataset_stats(asset_src, asset_ref, asset_sys_tsse)

In [None]:
#TSSE - MedEASI
get_dataset_stats(medEASi_src, [medEASi_ref], medEASi_sys_tsse)

In [None]:
# SEQ2SEQ - ASSET
asset_s2s = open('/content/BLESS/model_outputs_and_evals/muss/asset-test_default_muss_en_mined.jsonl','r')
asset_sys_s2s = [json.loads(line)['model_output'] for line in asset_s2s]
get_dataset_stats(asset_src, asset_ref, asset_sys_s2s)

In [None]:
# SEQ2SEQ - MedEASi
medEASi_s2s = open('/content/BLESS/model_outputs_and_evals/muss/med-easi-test_default_muss_en_mined.jsonl','r')
medEASi_sys_s2s = [json.loads(line)['model_output'] for line in medEASi_s2s]
get_dataset_stats(medEASi_src, [medEASi_ref], medEASi_sys_s2s)

In [None]:
# LLM - ASSET
asset_s2s = open('/content/BLESS/model_outputs_and_evals/openai-gpt-3.5-turbo/asset-test_asset-valid_p2_random_fs3_nr1_s723.jsonl','r')
asset_sys_s2s = [json.loads(line)['model_output'] for line in asset_s2s]
get_dataset_stats(asset_src, asset_ref, asset_sys_s2s)

In [None]:
# LLM - MedEASi
medEASi_s2s = open('/content/BLESS/model_outputs_and_evals/openai-gpt-3.5-turbo/med-easi-test_med-easi-validation_p2_random_fs3_nr1_s723.jsonl','r')
medEASi_sys_s2s = [json.loads(line)['model_output'] for line in medEASi_s2s]
get_dataset_stats(medEASi_src, [medEASi_ref], medEASi_sys_s2s)

In [None]:
# TSSE - DEPLAIN
get_dataset_stats(deplain_src, [deplain_ref], deplain_sys)

In [None]:
# TSSE - CLARAMED
get_dataset_stats(claramed_src, [claramed_ref], claramed_sys)