In [3]:
import pandas as pd
import numpy as np

In [4]:
from transform_annotations_functions import unify_salsa_processing
from transform_annotations_functions import Family, Quality

In [5]:
def run_on_file_dict(file_dict, dataset_name, compute_metrics=True):
    sentences_dataset = []
    edits_dataset = []

    for filepath, system_name in file_dict.items():
        df_sent, df_edits = unify_salsa_processing(
            file_path_or_data=filepath,
            dataset_name=dataset_name,
            system_name=system_name,
            compute_metrics=compute_metrics
        )
        sentences_dataset.append(df_sent)
        edits_dataset.append(df_edits)

    sentences_dataset = pd.concat(sentences_dataset)
    edits_dataset = pd.concat(edits_dataset)

    print("DEBUG COLS: ", edits_dataset.columns)

    # Convert enum objects to strings
    enum_objects = ["Family", "Quality"]
    for enum_object in enum_objects:
        if enum_object in edits_dataset.columns:
            edits_dataset[enum_object] = edits_dataset[enum_object].apply(
                lambda x: x.value if isinstance(x, (Family, Quality)) else x
            )

    # Add new columns for input and output text spans
    edits_dataset['Input Text'] = edits_dataset.apply(
        lambda row: get_text_from_indices(row['Source'], eval(row['Input Index']) if isinstance(row['Input Index'], str) else row['Input Index']), 
        axis=1
    )
    
    edits_dataset['Output Text'] = edits_dataset.apply(
        lambda row: get_text_from_indices(row['Target'], eval(row['Output Index']) if isinstance(row['Output Index'], str) else row['Output Index']), 
        axis=1
    )

    # Convert lists to strings for better readability
    edits_dataset['Input Text'] = edits_dataset['Input Text'].apply(lambda x: ' | '.join(x) if x else '')
    edits_dataset['Output Text'] = edits_dataset['Output Text'].apply(lambda x: ' | '.join(x) if x else '')
            
    return sentences_dataset, edits_dataset


# Add helper function to get text spans from indices
# (to show actual texts affected in output tables)
def get_text_from_indices(text: str, indices: list) -> list:
    """Extract text spans from a string based on list of index pairs."""
    if not indices or not isinstance(indices, list):
        return []
    
    text_spans = []
    for start, end in indices:
        if isinstance(start, int) and isinstance(end, int):
            text_spans.append(text[start:end])
    
    return text_spans

#  Analysis Execution - Metric Calculation and Analysis per Dataset

## Load, transform and combine data (ready for analysis)

In [None]:
global_compute_metrics = True

### Dataset 01: WikiEN (English)

In [7]:
# load data

file_dict = {
    '../data/salsa_annotations/final_annotated/data_f_wikiEN_ChatGPT.json': 'ChatGPT',
    '../data/salsa_annotations/final_annotated/data_f_wikiEN_LLAMA.json': 'LLAMA',
    '../data/salsa_annotations/final_annotated/data_f_wikiEN_MUSS.json': 'MUSS',
     '../data/salsa_annotations/final_annotated/data_f_wikiEN_DisSim.json': 'DisSim'
}

df_sentences_wikiEN, df_edits_wikiEN = run_on_file_dict(
    file_dict, 
    "wikiEN", 
    compute_metrics=global_compute_metrics)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  3.13it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 12.78it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:

DEBUG COLS:  Index(['Sentence ID', 'Source', 'Target', 'System', 'Dataset', 'Family',
       'Edit Type', 'Edit Sub-Type', 'Edit Classification',
       'Structure Sub-Type', 'Quality', 'Information Impact', 'Grammar Error',
       'Significance', 'Input Index', 'Output Index', 'Constituent Edits',
       'Is Constituent Edit', 'Parent Edit Type', 'Constituent Input Index',
       'Constituent Output Index'],
      dtype='object')





In [8]:
df_sentences_wikiEN.head(5)

Unnamed: 0,Sentence ID,Source,Target,System,Dataset,Total Edits,substitution_count,insertion_count,split_count,structure_count,...,Lexical_Diversity_Difference,Syntactic_Complexity_Source,Syntactic_Complexity_Target,Syntactic_Complexity_Difference,BERTScore_Precision,BERTScore_Recall,BERTScore_F1,GLEU,LENS_SALSA,no_edit_count
0,ChatGPT_001,"The N1 road also designated as RN1, is a road ...","The N1 road, also called RN1, is a road in the...",ChatGPT,wikiEN,4,1,1,1,1,...,0.040909,2,1,-1,0.965502,0.956754,0.961108,0.37234,68.790299,
1,ChatGPT_002,"Originating in the capital city of Bangui, the...",The road starts in Bangui and goes northwest t...,ChatGPT,wikiEN,7,1,0,0,1,...,0.166667,3,2,-1,0.963212,0.928761,0.945673,0.141509,73.989666,
2,ChatGPT_003,As a key component of the country's road netwo...,The N1 is an important part of the country's r...,ChatGPT,wikiEN,5,2,0,1,1,...,0.088889,2,2,0,0.956089,0.954746,0.955417,0.309524,76.867175,
3,ChatGPT_004,"The N1 route commences in the heart of Bangui,...","The N1 route starts in the center of Bangui, t...",ChatGPT,wikiEN,4,3,0,0,0,...,-0.007527,1,1,0,0.987401,0.983464,0.985429,0.708955,60.895467,
4,ChatGPT_005,The terrain along this stretch is characterize...,The land here is mostly flat or gently sloping...,ChatGPT,wikiEN,10,5,1,0,2,...,0.024074,3,0,-3,0.936264,0.917814,0.926947,0.145455,76.670897,


In [11]:
df_edits_wikiEN.head(5)

Unnamed: 0,Sentence ID,Source,Target,System,Dataset,Family,Edit Type,Edit Sub-Type,Edit Classification,Structure Sub-Type,...,Significance,Input Index,Output Index,Constituent Edits,Is Constituent Edit,Parent Edit Type,Constituent Input Index,Constituent Output Index,Input Text,Output Text
0,ChatGPT_001,"The N1 road also designated as RN1, is a road ...","The N1 road, also called RN1, is a road in the...",ChatGPT,wikiEN,Conceptual,substitution,,Good Substitution,,...,1,"[[17, 27]]","[[18, 24]]",0,False,,[],[],designated,called
1,ChatGPT_001,"The N1 road also designated as RN1, is a road ...","The N1 road, also called RN1, is a road in the...",ChatGPT,wikiEN,Conceptual,insertion,,Trivial Insertion,,...,0,,"[[43, 46]]",0,False,,[],[],,the
2,ChatGPT_001,"The N1 road also designated as RN1, is a road ...","The N1 road, also called RN1, is a road in the...",ChatGPT,wikiEN,Syntax,split,Sentence Split,Good Split,,...,0,,,1,False,,"[[[74, 78]]]","[[[76, 78]]]",,
3,ChatGPT_001,"The N1 road also designated as RN1, is a road ...","The N1 road, also called RN1, is a road in the...",ChatGPT,wikiEN,Syntax,structure,Part of Speech,Trivial Structure,Part of Speech,...,0,,,1,False,,"[[[79, 104]]]","[[[76, 78], [97, 101]]]",,
4,ChatGPT_002,"Originating in the capital city of Bangui, the...",The road starts in Bangui and goes northwest t...,ChatGPT,wikiEN,Conceptual,deletion,Bad Deletion,Bad Deletion,,...,0,"[[19, 34]]",,0,False,,[],[],capital city of,


In [12]:
# Verify whether all edits are being properly transformed: 
# Calculate number of edits directly

import json

df_valuecounts = df_edits_wikiEN['System'].value_counts()
df_valuecounts

def count_edits_per_file(file_dict):
    edits_per_file = {}
    
    # Iterate through each file in the dictionary
    for file_path, label in file_dict.items():
        try:
            # Load the JSON file
            with open(file_path, 'r') as file:
                data = json.load(file)

            total_edits = 0
            
            # Iterate through each entry in the JSON file
            for entry in data:
                if 'edits' in entry:
                    # Count the number of edits for the current entry
                    total_edits += len(entry['edits'])
            
            # Store the result in the edits_per_file dictionary with the label
            edits_per_file[label] = total_edits
        
        except FileNotFoundError:
            # If the file is not found, handle the error
            edits_per_file[label] = 'File not found'
        except json.JSONDecodeError:
            # If the file is not a valid JSON, handle the error
            edits_per_file[label] = 'Invalid JSON format'

    return edits_per_file

def verify_edits_transformations(df_edits, file_dict):
    # get all systems in df_edits
    systems = df_edits['System'].unique()
    
    # count edits per system
    # first remove all "Is Constituent Edit" = True entries
    df_edits_no_constituent = df_edits[df_edits['Is Constituent Edit'] == False]

    # also remove all rows where edit_type is "no_edit"
    df_edits_no_constituent = df_edits_no_constituent[df_edits_no_constituent['Edit Type'] != 'no_edit']

    edits_count_df = df_edits_no_constituent['System'].value_counts()
    
    # compare edits_count with df_valuecounts
    edits_count_file = count_edits_per_file(file_dict)
    print(edits_count_file)
    
    for system in systems:
        if edits_count_df[system] != edits_count_file[system]:
            print(f"**************************************************")
            print(f"ATTENTION Mismatch for system {system}")
            print(f"   edits in source file: {edits_count_file[system]}")
            print(f"   edits in transformed DF: {edits_count_df[system]}")
            print(f"**************************************************")
        else:
            print(f"OK for system {system}")


verify_edits_transformations(df_edits_wikiEN, file_dict)

{'ChatGPT': 213, 'LLAMA': 278, 'MUSS': 220, 'DisSim': 293}
OK for system ChatGPT
OK for system LLAMA
OK for system MUSS
OK for system DisSim


### Dataset 02: Cochrane Medical (English) 

In [13]:
# load data cochrane data

dataset_name = 'cochrane'

file_dict = {
    '../data/salsa_annotations/final_annotated/data_f_Cochrane_ChatGPT.json': 'ChatGPT',
    '../data/salsa_annotations/final_annotated/data_f_Cochrane_LLAMA.json': 'LLAMA',
    '../data/salsa_annotations/final_annotated/data_f_Cochrane_MUSS.json': 'MUSS',
    '../data/salsa_annotations/final_annotated/data_f_Cochrane_DisSim.json': 'DisSim'
}

df_sentences_cochrane, df_edits_cochrane = run_on_file_dict(
    file_dict, 
    dataset_name,
    compute_metrics=global_compute_metrics
    )

df_sentences_cochrane.tail()

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  2.44it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 12.63it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.32it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 13.00it/s]
GPU available: True (mps), used: Fal

DEBUG COLS:  Index(['Sentence ID', 'Source', 'Target', 'System', 'Dataset', 'Family',
       'Edit Type', 'Edit Sub-Type', 'Edit Classification',
       'Structure Sub-Type', 'Quality', 'Information Impact', 'Grammar Error',
       'Significance', 'Input Index', 'Output Index', 'Constituent Edits',
       'Is Constituent Edit', 'Parent Edit Type', 'Constituent Input Index',
       'Constituent Output Index'],
      dtype='object')





Unnamed: 0,Sentence ID,Source,Target,System,Dataset,Total Edits,substitution_count,deletion_count,reorder_count,structure_count,...,Lexical_Diversity_Difference,Syntactic_Complexity_Source,Syntactic_Complexity_Target,Syntactic_Complexity_Difference,BERTScore_Precision,BERTScore_Recall,BERTScore_F1,GLEU,LENS_SALSA,no_edit_count
45,DisSim_046,Low‐certainty evidence did not detect a differ...,Low‐certainty evidence did not detect a differ...,DisSim,cochrane,4,0,0,0,,...,-0.041667,2,2,0,0.946639,0.978627,0.962367,0.527273,57.879913,0.0
46,DisSim_047,No studies assessed SAEs for the following: be...,No studies assessed SAEs for the following. ||...,DisSim,cochrane,6,0,0,0,,...,-0.294643,1,4,3,0.935131,0.954521,0.944727,0.4,54.451776,0.0
47,DisSim_048,Repetitive transcranial magnetic stimulation (...,Repetitive transcranial magnetic stimulation (...,DisSim,cochrane,0,0,0,0,,...,0.0,1,1,0,1.0,1.0,1.0,1.0,62.297642,1.0
48,DisSim_049,Current pharmacological and psychotherapeutic ...,Current pharmacological and psychotherapeutic ...,DisSim,cochrane,0,0,0,0,,...,0.0,1,1,0,1.0,1.0,1.0,1.0,68.792772,1.0
49,DisSim_050,The evidence is very uncertain about the effec...,The evidence is very uncertain about the effec...,DisSim,cochrane,0,0,0,0,,...,0.0,0,0,0,1.0,1.0,1.0,1.0,71.432281,1.0


In [14]:
df_edits_cochrane.tail(5)

Unnamed: 0,Sentence ID,Source,Target,System,Dataset,Family,Edit Type,Edit Sub-Type,Edit Classification,Structure Sub-Type,...,Significance,Input Index,Output Index,Constituent Edits,Is Constituent Edit,Parent Edit Type,Constituent Input Index,Constituent Output Index,Input Text,Output Text
283,DisSim_047,No studies assessed SAEs for the following: be...,No studies assessed SAEs for the following. ||...,DisSim,cochrane,Conceptual,insertion,Repetition,Bad Insertion,,...,0,,"[[167, 186]]",0,False,,[],[],,No studies assessed
284,DisSim_047,No studies assessed SAEs for the following: be...,No studies assessed SAEs for the following. ||...,DisSim,cochrane,Syntax,split,Trivial Change,Trivial Split,,...,0,,,1,False,,[None],"[[[164, 166]]]",,
285,DisSim_048,Repetitive transcranial magnetic stimulation (...,Repetitive transcranial magnetic stimulation (...,DisSim,cochrane,,no_edit,,,,...,0,,,0,False,,,,,
286,DisSim_049,Current pharmacological and psychotherapeutic ...,Current pharmacological and psychotherapeutic ...,DisSim,cochrane,,no_edit,,,,...,0,,,0,False,,,,,
287,DisSim_050,The evidence is very uncertain about the effec...,The evidence is very uncertain about the effec...,DisSim,cochrane,,no_edit,,,,...,0,,,0,False,,,,,


In [15]:
verify_edits_transformations(df_edits_cochrane, file_dict)

{'ChatGPT': 270, 'LLAMA': 302, 'MUSS': 182, 'DisSim': 281}
OK for system ChatGPT
OK for system LLAMA
OK for system MUSS
OK for system DisSim


### Dataset 03: SCOTUS (Legal, English)

In [16]:
# load data cochrane data

dataset_name = 'scotus'

file_dict = {
    '../data/salsa_annotations/final_annotated/data_f_SCOTUS_ChatGPT.json': 'ChatGPT',
    '../data/salsa_annotations/final_annotated/data_f_SCOTUS_LLAMA.json': 'LLAMA',
    '../data/salsa_annotations/final_annotated/data_f_SCOTUS_MUSS.json': 'MUSS',
    '../data/salsa_annotations/final_annotated/data_f_SCOTUS_DisSim.json': 'DisSim'
}

df_sentences_scotus, df_edits_scotus = run_on_file_dict(
    file_dict, 
    dataset_name,
    compute_metrics=global_compute_metrics
    )

df_sentences_scotus.tail()

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.72it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.74it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.27it/s]
GPU available: True (mps), used: Fal

DEBUG COLS:  Index(['Sentence ID', 'Source', 'Target', 'System', 'Dataset', 'Family',
       'Edit Type', 'Edit Sub-Type', 'Edit Classification',
       'Structure Sub-Type', 'Quality', 'Information Impact', 'Grammar Error',
       'Significance', 'Input Index', 'Output Index', 'Constituent Edits',
       'Is Constituent Edit', 'Parent Edit Type', 'Constituent Input Index',
       'Constituent Output Index'],
      dtype='object')


Unnamed: 0,Sentence ID,Source,Target,System,Dataset,Total Edits,substitution_count,split_count,structure_count,deletion_count,...,Lexical_Diversity_Difference,Syntactic_Complexity_Source,Syntactic_Complexity_Target,Syntactic_Complexity_Difference,BERTScore_Precision,BERTScore_Recall,BERTScore_F1,GLEU,LENS_SALSA,no_edit_count
45,DisSim_046,Asencio-Cordero cannot invoke the exception hi...,Asencio-Cordero can not invoke the exception h...,DisSim,scotus,2,0,2,,0,...,0.005926,4,4,0,0.966063,0.974282,0.970155,0.484127,58.0832,0.0
46,DisSim_047,Thus in United States ex rel. Knauff v. Shaugh...,... forbid aliens or classes of aliens. || Thi...,DisSim,scotus,17,0,6,,0,...,-0.096471,4,4,0,0.893951,0.910212,0.902008,0.510638,16.050099,0.0
47,DisSim_048,The District Court granted summary judgment to...,The District Court granted summary judgment. |...,DisSim,scotus,6,0,4,,0,...,-0.214554,4,4,0,0.928381,0.963657,0.94569,0.405941,54.102457,0.0
48,DisSim_049,"Thus in Apprendi v. New Jersey, 530 U. S. 466,...",A novel `` sentencing enhancement '' was uncon...,DisSim,scotus,10,0,3,,1,...,-0.066287,5,5,0,0.909108,0.934313,0.921538,0.466443,36.632434,0.0
49,DisSim_050,But a sentencing procedure followed by a few S...,But a sentencing procedure followed by a few S...,DisSim,scotus,1,0,1,,0,...,0.005109,3,3,0,0.953538,0.951542,0.952539,0.674603,61.412752,0.0


In [17]:
verify_edits_transformations(df_edits_scotus, file_dict)

{'ChatGPT': 298, 'LLAMA': 299, 'MUSS': 206, 'DisSim': 277}
OK for system ChatGPT
OK for system LLAMA
OK for system MUSS
OK for system DisSim


### Dataset 04: wikiDE (Wikipedia, German)

In [18]:
# load wikiDE data
dataset_name = 'wikiDE'

file_dict = {
    '../data/salsa_annotations/final_annotated/data_f_wikiDE_ChatGPT.json': 'ChatGPT',
    '../data/salsa_annotations/final_annotated/data_f_wikiDE_LLAMA.json': 'LLAMA',
    '../data/salsa_annotations/final_annotated/data_f_wikiDE_MBART.json': 'MUSS',
}

df_sentences_wikiDE, df_edits_wikiDE = run_on_file_dict(
    file_dict, 
    dataset_name,
    compute_metrics=global_compute_metrics
)

# Display the first few rows
df_sentences_wikiDE.head()

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  6.58it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.37it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  6.86it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.58it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.30it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.95it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.27it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.66it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.07it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.03it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.56it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.65it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.51it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.26it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 14.29it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.20it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.69it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 13.88it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.73it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 12.49it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.34it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.27it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.50it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.83it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.62it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.47it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  6.69it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.56it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.26it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.33it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.35it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.65it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.75it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 12.08it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.83it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.95it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Debug: calculating german BERTSCORE


  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.64it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.55it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.48it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.06it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.62it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.83it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.58it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.39it/s]

Debug: calculating german BERTSCORE



GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.26it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.42it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.58it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  6.63it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.53it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.56it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.27it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.57it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.06it/s]

Debug: calculating german BERTSCORE



GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.23it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.81it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.91it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.64it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.36it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.75it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Debug: calculating german BERTSCORE


  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.42it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.33it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.65it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.56it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.72it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 13.02it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.31it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.59it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 14.40it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.31it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.26it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.81it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.55it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.60it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.43it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.72it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.26it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  6.95it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.50it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.65it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.52it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.86it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.39it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.84it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.42it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.37it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.70it/s]

Debug: calculating german BERTSCORE



GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.70it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.18it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.90it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.28it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.93it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.34it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.21it/s]

Debug: calculating german BERTSCORE



GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.15it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.50it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.68it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.11it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.22it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.50it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Debug: calculating german BERTSCORE


  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.63it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.58it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.80it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.50it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.00it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.52it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.10it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.56it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.91it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.05it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.69it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.42it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.23it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.07it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 13.50it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.64it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.72it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 14.82it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.68it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.26it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.53it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.87it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.60it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.89it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.32it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.46it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.11it/s]

Debug: calculating german BERTSCORE



GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.68it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.08it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.77it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.26it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.59it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.83it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.94it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.89it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.86it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.87it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.41it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.48it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.95it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.95it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.63it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.22it/s]

Debug: calculating german BERTSCORE



GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 13.27it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.67it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.82it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  7.88it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 11.24it/s]
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Debug: calculating german BERTSCORE


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  9.44it/s]

DEBUG COLS:  Index(['Sentence ID', 'Source', 'Target', 'System', 'Dataset', 'Family',
       'Edit Type', 'Edit Sub-Type', 'Edit Classification',
       'Structure Sub-Type', 'Quality', 'Information Impact', 'Grammar Error',
       'Significance', 'Input Index', 'Output Index', 'Constituent Edits',
       'Is Constituent Edit', 'Parent Edit Type', 'Constituent Input Index',
       'Constituent Output Index'],
      dtype='object')





Unnamed: 0,Sentence ID,Source,Target,System,Dataset,Total Edits,reorder_count,deletion_count,substitution_count,split_count,...,Lexical_Diversity_Difference,Syntactic_Complexity_Source,Syntactic_Complexity_Target,Syntactic_Complexity_Difference,BERTScore_Precision,BERTScore_Recall,BERTScore_F1,GLEU,LENS_SALSA,no_edit_count
0,ChatGPT_001,Am 22. März 1985 unterzeichnete die Europäisch...,Die Europäische Gemeinschaft unterzeichnete am...,ChatGPT,wikiDE,5,2,3,0,0,...,-0.053623,1,1,0,0.963163,0.904386,0.932849,0.507937,58.915269,
1,ChatGPT_002,Im folgenden Jahr wurde die VERORDNUNG (EWG) N...,Im nächsten Jahr wurde die Verordnung (EWG) Nr...,ChatGPT,wikiDE,4,1,0,1,1,...,-0.095238,3,3,0,0.876339,0.877588,0.876963,0.385965,59.443283,
2,ChatGPT_003,Zur Anpassung an die Änderungen des Montrealer...,"Um das Montrealer Protokoll anzupassen, ergänz...",ChatGPT,wikiDE,7,0,0,3,2,...,0.081818,2,4,2,0.848119,0.868368,0.858124,0.321918,51.670396,
3,ChatGPT_004,Unternehmen müssen die Emissionen von geregelt...,"Unternehmen müssen verhindern, dass schädliche...",ChatGPT,wikiDE,4,0,0,2,0,...,-0.0375,1,2,1,0.801097,0.792511,0.796781,0.151163,59.764546,
4,ChatGPT_005,Francesco Calzolari stammte aus einer alteinge...,Francesco Calzolari kam aus einer alten Famili...,ChatGPT,wikiDE,6,1,0,5,0,...,-0.05,2,1,-1,0.939942,0.880234,0.909109,0.393617,63.699377,


In [19]:
# replace LENS-SALSA scores with NaN, since it can only handle English
df_sentences_wikiDE['LENS-SALSA'] = np.nan

In [20]:
# Display edit level data
df_edits_wikiDE.head()

Unnamed: 0,Sentence ID,Source,Target,System,Dataset,Family,Edit Type,Edit Sub-Type,Edit Classification,Structure Sub-Type,...,Significance,Input Index,Output Index,Constituent Edits,Is Constituent Edit,Parent Edit Type,Constituent Input Index,Constituent Output Index,Input Text,Output Text
0,ChatGPT_001,Am 22. März 1985 unterzeichnete die Europäisch...,Die Europäische Gemeinschaft unterzeichnete am...,ChatGPT,wikiDE,Syntax,reorder,Component-level Reorder,Good Reorder,,...,0,"[[32, 60]]","[[0, 28]]",0,False,,[],[],die Europäische Gemeinschaft,Die Europäische Gemeinschaft
1,ChatGPT_001,Am 22. März 1985 unterzeichnete die Europäisch...,Die Europäische Gemeinschaft unterzeichnete am...,ChatGPT,wikiDE,Syntax,reorder,Trivial Change,Good Reorder,,...,0,"[[17, 31]]","[[29, 43]]",0,False,,[],[],unterzeichnete,unterzeichnete
2,ChatGPT_001,Am 22. März 1985 unterzeichnete die Europäisch...,Die Europäische Gemeinschaft unterzeichnete am...,ChatGPT,wikiDE,Conceptual,deletion,,Good Deletion,,...,1,"[[61, 99]]",,0,False,,[],[],gemeinsam mit mehreren Mitgliedstaaten,
3,ChatGPT_001,Am 22. März 1985 unterzeichnete die Europäisch...,Die Europäische Gemeinschaft unterzeichnete am...,ChatGPT,wikiDE,Conceptual,deletion,,Good Deletion,,...,0,"[[156, 170]]",,0,False,,[],[],darauf folgend,
4,ChatGPT_001,Am 22. März 1985 unterzeichnete die Europäisch...,Die Europäische Gemeinschaft unterzeichnete am...,ChatGPT,wikiDE,Conceptual,deletion,,Good Deletion,,...,0,"[[193, 197]]",,0,False,,[],[],auch,


In [21]:
# Verify the edit transformations
verify_edits_transformations(df_edits_wikiDE, file_dict)

{'ChatGPT': 272, 'LLAMA': 393, 'MUSS': 154}
OK for system ChatGPT
OK for system LLAMA
OK for system MUSS


### Combine Datasets

In [22]:
# combine the datasets (append)

#df_sentences = df_sentences_wikiEN
#df_edits = df_edits_wikiEN

df_sentences = pd.concat([
    df_sentences_wikiEN, 
    df_sentences_cochrane,
    df_sentences_scotus,
    df_sentences_wikiDE
], axis=0)

df_edits = pd.concat([
    df_edits_wikiEN, 
    df_edits_cochrane,
    df_edits_scotus,
    df_edits_wikiDE
], axis=0)


print(df_sentences.shape)
print(df_edits.shape)

(750, 31)
(3954, 23)


In [23]:
# get all column names for df_edits
df_edits.columns

Index(['Sentence ID', 'Source', 'Target', 'System', 'Dataset', 'Family',
       'Edit Type', 'Edit Sub-Type', 'Edit Classification',
       'Structure Sub-Type', 'Quality', 'Information Impact', 'Grammar Error',
       'Significance', 'Input Index', 'Output Index', 'Constituent Edits',
       'Is Constituent Edit', 'Parent Edit Type', 'Constituent Input Index',
       'Constituent Output Index', 'Input Text', 'Output Text'],
      dtype='object')

In [24]:
# get all unique values for columns:
cols = [ 'Family',
       'Edit Type', 'Edit Sub-Type',
       'Edit Classification', 'Quality', 'Information Impact',
       'Grammar Error', 'Significance']

for col in cols:
    print(f"Unique values for column {col}:")
    print(df_edits[col].unique())
    print("\n")

Unique values for column Family:
['Conceptual' 'Syntax' 'Lexical' None]


Unique values for column Edit Type:
['substitution' 'insertion' 'split' 'structure' 'deletion' 'reorder'
 'no_edit']


Unique values for column Edit Sub-Type:
[None 'Sentence Split' 'Part of Speech' 'Bad Deletion' 'Trivial Change'
 'Clausal Structure' 'Component-level Reorder' 'Information Rewrite'
 'Voice' 'Tense' 'Grammatical Number' 'Bad Component Reorder'
 'Unknown Structure Change' 'Complex Wrong' 'Factual Error' 'Repetition'
 'Bad Split' 'Contradiction' 'Coreference']


Unique values for column Edit Classification:
['Good Substitution' 'Trivial Insertion' 'Good Split' 'Trivial Structure'
 'Bad Deletion' 'Good Deletion' 'Trivial Deletion' 'Good Structure'
 'Good Reorder' 'Bad Substitution' 'Trivial Substitution' 'Good Insertion'
 'Bad Reorder' 'Bad Structure' 'Bad Insertion' 'Trivial Split' 'Bad Split'
 None]


Unique values for column Quality:
['No Error' 'Trivial' 'Error' None]


Unique values for column I

### Minor additional transformations

In [25]:
# Increase Signifiance by 1 to have levels [1, 2, 3] instead of [0, 1, 2]
df_edits['Significance'] = df_edits['Significance'] + 1

# set when Quality == Trivial  Significance to 0 (back, as it has been increased by 1)
df_edits.loc[df_edits['Quality'] == 'Trivial', 'Significance'] = 0

# Signifiance unique values
df_edits['Significance'].unique()

array([2, 1, 0, 3])

### Enhance with more metadata

In [26]:
# load csv with additional information about the sentences
df_sentences_add_metadata = pd.read_csv("../data/TS_datasets/allSets_SpecializedTermsAndEntities.csv")
# drop "Original Sentence" column
df_sentences_add_metadata = df_sentences_add_metadata.drop(columns=['Original Sentence'])

df_sentences_add_metadata.head(1)

Unnamed: 0,dataset,sentence_id,sentence_has_specialized_terms,sentence_contains_named_entity
0,scotus,1,True,True


In [27]:
# read df_edits metadata
df_edits_add_metadata = pd.read_csv("../data/TS_datasets/allEdits_SpecializedTermsAndEntities.csv")
df_edits_add_metadata

Unnamed: 0,Sentence ID,System,Dataset,Edit Type,Input Text,Output Text,Input Index,Output Index,has_specialized_terms,contains_named_entity
0,ChatGPT_001,ChatGPT,wikiEN,substitution,designated,called,"[[17, 27]]","[[18, 24]]",False,False
1,ChatGPT_001,ChatGPT,wikiEN,insertion,,the,,"[[43, 46]]",False,False
2,ChatGPT_001,ChatGPT,wikiEN,split,,,,,False,False
3,ChatGPT_001,ChatGPT,wikiEN,structure,,,,,False,False
4,ChatGPT_002,ChatGPT,wikiEN,deletion,capital city of,,"[[19, 34]]",,False,True
...,...,...,...,...,...,...,...,...,...,...
3949,MUSS_049,MUSS,wikiDE,deletion,eine Gegendemonstration statt.,,"[[109, 139]]",,False,False
3950,MUSS_049,MUSS,wikiDE,deletion,fand,,"[[86, 90]]",,False,False
3951,MUSS_050,MUSS,wikiDE,deletion,Als Reaktion auf die anhaltenden Unruhen,,"[[0, 40]]",,False,False
3952,MUSS_050,MUSS,wikiDE,structure,,,,,False,False


In [28]:
print(df_edits_add_metadata.shape)

# drop "Input Text" and "Output Text" cols
df_edits_add_metadata = df_edits_add_metadata.drop(columns=['Input Text', 'Output Text'])


# remove all Edit Type = split or structure or no_edit
df_edits_add_metadata = df_edits_add_metadata[
    ~df_edits_add_metadata['Edit Type'].isin(['split', 'structure', 'no_edit'])
]

print(df_edits_add_metadata.shape)
# get unique values for Edit Type
df_edits_add_metadata['Edit Type'].unique()

(3954, 10)
(3111, 8)


array(['substitution', 'insertion', 'deletion', 'reorder'], dtype=object)

In [31]:
# Create a copy of df_edits to avoid modifying the original
df_edits_temp = df_edits.copy()

# Convert list columns to strings for merging
list_columns = ['Input Index', 'Output Index']
for col in list_columns:
    df_edits_temp[col] = df_edits_temp[col].apply(lambda x: str(x) if isinstance(x, list) else x)

# Perform the merge
df_edits = pd.merge(
    df_edits_temp, 
    df_edits_add_metadata,
    on=['Sentence ID', 'System', 'Dataset', 'Edit Type', 
         'Input Index', 'Output Index'],
    how='left'
)

# Convert string representation of lists back to actual lists if needed
for col in list_columns:
    df_edits[col] = df_edits[col].apply(lambda x: eval(x) if isinstance(x, str) and x != 'nan' and x != 'None' else None)

In [32]:
# set has_specialized_terms and contains_named_entity to False if Nan if Edit Type in split, structure, no_edit

df_edits.loc[
    df_edits['Edit Type'].isin(['split', 'structure', 'no_edit']),
    ['has_specialized_terms', 'contains_named_entity']
] = False

# count number of rows with either has_specialized_terms or contains_named_entity NaN
print(df_edits[
    df_edits['has_specialized_terms'].isna() | df_edits['contains_named_entity'].isna()
].shape)

df_edits

(0, 25)


Unnamed: 0,Sentence ID,Source,Target,System,Dataset,Family,Edit Type,Edit Sub-Type,Edit Classification,Structure Sub-Type,...,Output Index,Constituent Edits,Is Constituent Edit,Parent Edit Type,Constituent Input Index,Constituent Output Index,Input Text,Output Text,has_specialized_terms,contains_named_entity
0,ChatGPT_001,"The N1 road also designated as RN1, is a road ...","The N1 road, also called RN1, is a road in the...",ChatGPT,wikiEN,Conceptual,substitution,,Good Substitution,,...,"[[18, 24]]",0,False,,[],[],designated,called,False,False
1,ChatGPT_001,"The N1 road also designated as RN1, is a road ...","The N1 road, also called RN1, is a road in the...",ChatGPT,wikiEN,Conceptual,insertion,,Trivial Insertion,,...,"[[43, 46]]",0,False,,[],[],,the,False,False
2,ChatGPT_001,"The N1 road also designated as RN1, is a road ...","The N1 road, also called RN1, is a road in the...",ChatGPT,wikiEN,Syntax,split,Sentence Split,Good Split,,...,,1,False,,"[[[74, 78]]]","[[[76, 78]]]",,,False,False
3,ChatGPT_001,"The N1 road also designated as RN1, is a road ...","The N1 road, also called RN1, is a road in the...",ChatGPT,wikiEN,Syntax,structure,Part of Speech,Trivial Structure,Part of Speech,...,,1,False,,"[[[79, 104]]]","[[[76, 78], [97, 101]]]",,,False,False
4,ChatGPT_002,"Originating in the capital city of Bangui, the...",The road starts in Bangui and goes northwest t...,ChatGPT,wikiEN,Conceptual,deletion,Bad Deletion,Bad Deletion,,...,,0,False,,[],[],capital city of,,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3949,MUSS_049,"Auch in Bristol, das einige Tage zuvor Schaupl...",Am 7. August 2024 war Schauplatz gewalttätiger...,MUSS,wikiDE,Conceptual,deletion,Bad Deletion,Bad Deletion,,...,,0,False,,[],[],eine Gegendemonstration statt.,,False,False
3950,MUSS_049,"Auch in Bristol, das einige Tage zuvor Schaupl...",Am 7. August 2024 war Schauplatz gewalttätiger...,MUSS,wikiDE,Conceptual,deletion,Bad Deletion,Bad Deletion,,...,,0,False,,[],[],fand,,False,False
3951,MUSS_050,Als Reaktion auf die anhaltenden Unruhen veröf...,Am 5. August wurden Reisewarnungen für das Ver...,MUSS,wikiDE,Conceptual,deletion,Bad Deletion,Bad Deletion,,...,,0,False,,[],[],Als Reaktion auf die anhaltenden Unruhen,,False,False
3952,MUSS_050,Als Reaktion auf die anhaltenden Unruhen veröf...,Am 5. August wurden Reisewarnungen für das Ver...,MUSS,wikiDE,Syntax,structure,Voice,Trivial Structure,Voice,...,,1,False,,"[[[41, 57]]]","[[[13, 19], [65, 80]]]",,,False,False


### Pivot Good/Bad Data based on df_edits

In [34]:
# Pivot the sentence level data to have Good / Bad counts
from helper_functions import pivot_key_columns

processed_df_sentences = pivot_key_columns(df_edits, df_sentences)

df_sentences = processed_df_sentences.copy()

df_sentences.columns

Index(['Sentence ID', 'Source', 'Target', 'System', 'Dataset', 'Total Edits',
       'Substitution_count', 'Insertion_count', 'Split_count',
       'Structure_count', 'Deletion_count', 'Reorder_count', 'FKGL_Source',
       'FKGL_Target', 'FKGL_Difference', 'ARI_Source', 'ARI_Target',
       'ARI_Difference', 'Lexical_Diversity_Source',
       'Lexical_Diversity_Target', 'Lexical_Diversity_Difference',
       'Syntactic_Complexity_Source', 'Syntactic_Complexity_Target',
       'Syntactic_Complexity_Difference', 'BERTScore_Precision',
       'BERTScore_Recall', 'BERTScore_F1', 'GLEU', 'LENS_SALSA',
       'No_edit_count', 'LENS-SALSA', 'Deletion (Bad)', 'Insertion (Bad)',
       'Reorder (Bad)', 'Split (Bad)', 'Structure (Bad)', 'Substitution (Bad)',
       'Deletion (Good)', 'Insertion (Good)', 'Reorder (Good)', 'Split (Good)',
       'Structure (Good)', 'Substitution (Good)', 'Deletion (Neutral)',
       'Insertion (Neutral)', 'Reorder (Neutral)', 'Split (Neutral)',
       'Structure 

## Export to CSV for further processing

In [None]:
# export to csv

# disabled after run
# df_sentences.to_csv("../data/salsa_annotations/final_annotated/df_sentences.csv", index=False)
# df_edits.to_csv("../data/salsa_annotations/final_annotated/df_edits.csv", index=False)