# Import libraries

In [1]:
# Import libraries
import os
import pandas as pd
import json
import sacrebleu

# Set Paths

In [2]:
# Base path
base_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..', '..', '..'))

# Model Parameters
unsloth_model_name = 'unsloth/Phi-3.5-mini-instruct-bnb-4bit'
company_name = 'microsoft'
model_name = unsloth_model_name.split('/')[1]

# Results path
results_path_file = os.path.join(base_path, 'results', company_name, model_name, 'merged_results.json')

# Print paths
print('Base path:', base_path)
print('Results path:', results_path_file)

Base path: /cs/student/msc/csml/2023/ngriessh/historical_mt
Results path: /cs/student/msc/csml/2023/ngriessh/historical_mt/results/microsoft/Phi-3.5-mini-instruct-bnb-4bit/merged_results.json


# Load Results File

In [3]:
# Load the JSON file
with open(results_path_file, 'r') as f:
    data = [json.loads(line) for line in f]

# Create a DataFrame based on the JSON file
merged_df = pd.DataFrame(data)

# Check Columns

In [4]:
# Check column names
merged_df.columns

Index(['Early Modern Bohemian German', 'English', 'DE_to_EN_finetuning',
       'EN_to_DE_finetuning'],
      dtype='object')

# BLEU Evaluation

In [6]:
# Define references (the human-provided English translations by Professor Sheilagh Ogilvie)
references = [merged_df['Early Modern Bohemian German'].tolist()]

# List of columns containing the different LLM-generated candidate translations
icl_DE_to_EN_candidate_columns = []#['DE_to_EN_000_example_prompt', 'DE_to_EN_001_example_prompt', 'DE_to_EN_002_example_prompt', 'DE_to_EN_004_example_prompt', 'DE_to_EN_008_example_prompt']# 'DE_to_EN_016_example_prompt', 'DE_to_EN_032_example_prompt', 'DE_to_EN_064_example_prompt']
icl_EN_to_DE_candidate_columns = []#['EN_to_DE_000_example_prompt', 'EN_to_DE_001_example_prompt', 'EN_to_DE_002_example_prompt', 'EN_to_DE_004_example_prompt', 'EN_to_DE_008_example_prompt'] #'EN_to_DE_016_example_prompt', 'EN_to_DE_032_example_prompt', 'EN_to_DE_064_example_prompt']
finetuning_candidate_columns = ['DE_to_EN_finetuning', 'EN_to_DE_finetuning']

# Concatenate all candidate columns
candidate_columns = icl_DE_to_EN_candidate_columns + icl_EN_to_DE_candidate_columns + finetuning_candidate_columns

# Loop through each set of candidate translations and calculate BLEU score
for col in candidate_columns:
    candidates = merged_df[col].tolist()
    bleu_score = sacrebleu.corpus_bleu(candidates, references).score
    print(f"Corpus-level SacreBLEU score for {col}: {bleu_score:.2f}")

Corpus-level SacreBLEU score for DE_to_EN_finetuning: 6.51
Corpus-level SacreBLEU score for EN_to_DE_finetuning: 13.09


# Create Text File to Inspect Translation Results

In [12]:
# Function to generate the inspection text for a specific entry
def generate_inspection_text(df, entry_index, translation_directions, output_file):
    with open(output_file, 'w', encoding='utf-8') as f:

        # Get the row corresponding to the entry index
        row = df.iloc[entry_index]

        # Write the German and English translations pairs by Sheilagh Ogilvie
        f.write(f"Entry {entry_index + 1}:\n")
        f.write(f"Early Modern Bohemian German (Sheilagh Ogilvie's Transcription): {row['Early Modern Bohemian German']}\n")
        f.write(f"English (Sheilagh Ogilvie): {row['English']}\n\n")

        # Write the candidate translations for each translation direction
        for col in df.columns:
            if any(direction in col for direction in translation_directions):
                f.write(f"{col}: {row[col]}\n")
        
        f.write("\n" + "="*50 + "\n\n")

# Define the translation directions and the entry to inspect
translation_directions = ['DE_to_EN', 'EN_to_DE']
entry_index = 1

# Call the function to generate the text file
output_file = 'translation_inspection_entry.txt'
generate_inspection_text(merged_df, entry_index, translation_directions, output_file)
print(f"Text file created: {output_file}")


Text file created: translation_inspection_entry.txt
