# Import libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install datasets
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
base_path = "/content/drive/MyDrive/historical_mt"

In [None]:
pip install unbabel-comet

# Import libraries

In [None]:
import os
import pandas as pd
import json
from comet import download_model, load_from_checkpoint

: 

# Set Paths and Parameters

In [None]:
# Base path
base_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..', '..', '..'))

# Model Parameters
unsloth_model_name = 'unsloth/Phi-3.5-mini-instruct-bnb-4bit'
company_name = 'microsoft'
model_name = unsloth_model_name.split('/')[1]

# Translation Directions
translation_directions = ['DE_to_EN', 'EN_to_DE']

# List of shot values
shots_list = [2**i for i in range(4)]
shots_list.insert(0, 0)

# List of formatted shot values
formatted_shots_list = [f"{shot:03d}" for shot in shots_list]
formatted_shots_list = formatted_shots_list + ['finetuning']

# Results path
results_dir = os.path.join(base_path, 'results', company_name, model_name)

# Print paths
print('Base path:', base_path)
print('Results path:', results_dir)
print('Translation directions:', translation_directions)
print('Shots list:', shots_list)
print('Formatted shots list:', formatted_shots_list)

# Function to load JSON file

In [82]:
# Function to load json file
def load_file(file_path):
    with open(file_path, 'r') as f:
        data = [json.loads(line) for line in f]
    return data

# Load COMET model

In [None]:
# Load model
model_path = download_model("Unbabel/wmt22-comet-da")

# Load the model checkpoint:
model = load_from_checkpoint(model_path)

# Run inference with COMET model

In [None]:
for translation_direction in translation_directions:

    # Define source and target language based on translation_direction
    if translation_direction == 'DE_to_EN':
        source_language = "Early Modern Bohemian German"
        target_language = "English"
    else:
        source_language = "English"
        target_language = "Early Modern Bohemian German"

    # For a given translation_direction go through all shots + finetuning
    for shot in formatted_shots_list:
        print(translation_direction, shot)

        if shot == 'finetuning':
            file_path = os.path.join(results_dir, shot, translation_direction, f'{translation_direction}_{shot}.json')
            print(file_path)
        else:
            file_path = os.path.join(results_dir, "icl", translation_direction, f'{translation_direction}_{shot}_example_prompt.json')
            print(file_path)

        # Load json file as DataFrame
        current_df = pd.DataFrame(load_file(file_path))

        # mt_column
        mt_column = current_df.columns[3]
        print(mt_column)

        # Transform current DataFrame into the desired list of dictionaries for COMET
        current_data = current_df.apply(lambda row: {
            'src': row[source_language],
            'mt': row[mt_column],
            'ref': row[target_language]
        }, axis=1).tolist()

        # Predict COMET scores
        model_output = model.predict(current_data, batch_size=8, gpus=1)

        # Print the system score
        print(f'System score for {translation_direction}_{shot}: {model_output.system_score}')