# Required imports

In [1]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt

from helper_functions import * 
from opus_eng_fra_features import *

# Plot settings

In [2]:
FONT_SIZE = 12

plt.rc('xtick', labelsize=FONT_SIZE)
plt.rc('ytick', labelsize=FONT_SIZE)
plt.rc('font', size=FONT_SIZE)
plt.rc('axes', labelsize="medium")

metrics_palette = {
    'comet22': "lightsteelblue", 
    'comet22-qe': "royalblue", 
    'chrf': "khaki", 
    'sacrebleu': "goldenrod",
}

scalers_palette = {
    'none, lin': "#11324D",
    'none, rf': "#6B7AA1", 
    'none, xgb': "#A6DCEF",
    'standard, lin': "#3A4D39",
    'standard, rf': "#79AC78", 
    'standard, xgb': "#D0E7D2", 
    'min-max, lin': "#7F669D",
    'min-max, rf': "#BA94D1", 
    'min-max, xgb': "#DEBACE",
}

%matplotlib widget

# Load data

In [3]:
BASE_DIR = f"{os.getcwd()}/../../"
MODEL_EVAL_DIR = BASE_DIR + "model_eval/"

In [4]:
fid_file = "fid-finetune_data-dataset_opus_eng_fra-timeInterval_10000-timeIntervalType_sentence-finetuneType_base.csv"
fid = pd.read_csv(MODEL_EVAL_DIR + fid_file)

## View FID

### Add deltas

In [5]:
for test_set in OPUS_TEST_SETS:
    for metric in METRICS:
        key = f"{test_set}_{metric}"
        fid[f"delta-target_{key}"] = fid[f"target_{key}"] - fid[f"curr_{key}"]

### Look for NaN

In [None]:
fid[[
    'curr_finetune',
    'prev_finetune',
    'finetune_delta',
    'amount_new_data',
    'amount_old_data',
    'amount_finetune_data',
]]

In [None]:
print("Feature name")
print("-"*50)
nan_cols = []
for col in fid.columns:
    nan_count = fid[col].isna().sum()
    print(f"{col}\t\t{nan_count}")
    if nan_count > 0:
        nan_cols.append(col)

# Feature correlation

In [None]:
features = (
    BASIC_FEATURES 
    + TARGETS
)

make_corr(features, fid, "Feature correlation")

# Eval FIPs

In [None]:
predictors = [
    "lin",
    "rf", 
    "xgb",
]
print(f"predictors: {predictors}")

In [10]:
res_dict = {}
feature_imp_dict = {}

## Features dict

In [None]:
features_dict = {
    "All": BASIC_FEATURES + SYS_PERF_FEATURES + CONTENT_AWARE_FEATURES,
    "Basic": BASIC_FEATURES,
    "ContAware": CONTENT_AWARE_FEATURES,
    "MTQual": SYS_PERF_FEATURES,
    "Basic-MTQual": BASIC_FEATURES + SYS_PERF_FEATURES,
    "Basic-ContAware": BASIC_FEATURES + CONTENT_AWARE_FEATURES,
    "ContAware-no-ngrams": (
        SENT_OVERLAP_FEATURES["new_data"] + EMBEDDING_FEATURES["new_data"]
        + SENT_OVERLAP_FEATURES["finetune_data"] + EMBEDDING_FEATURES["finetune_data"]
    ),
    "ContAware-MTQual": CONTENT_AWARE_FEATURES + SYS_PERF_FEATURES,
    "All-kiwi": BASIC_FEATURES + CONTENT_AWARE_FEATURES + [f for f in SYS_PERF_FEATURES if '-qe' in f],
    "Basic-kiwi": BASIC_FEATURES + [f for f in SYS_PERF_FEATURES if '-qe' in f],
    "ContAware-kiwi": CONTENT_AWARE_FEATURES + [f for f in SYS_PERF_FEATURES if '-qe' in f],
}
print("features_dict:")
print(json.dumps(features_dict, indent=4))

In [None]:
for metric in ['comet22-qe']: #["comet22", "chrf", "sacrebleu", "comet22-qe"]:
    for test_set in OPUS_TEST_SETS:
        target = f'delta-target_{test_set}_{metric}'

        eval_FIPs_offline(
            target=target, 
            features_dict=features_dict, 
            predictors=predictors, 
            res_dict=res_dict, 
            feature_imp_dict=feature_imp_dict, 
            dataset=fid,
            dataset_name='opus',
            fip_type='specific',
            fid_type='normal',
            create_plots=False,
        )

## Results

In [13]:
results = pd.DataFrame(res_dict).transpose()
results['PCC'] = results['test-PCC'] * 100
results['MAE'] = results['test-mae']

results = results.round({'PCC': 2, 'MAE': 4})

### COMET22

In [None]:
get_results_table(
    results = results.loc[results.metric=='comet22'],
    fid_type = 'normal',
    average = False,
    single = False,    # whether to return a single table or a table for each predictor
    to_latex = True,  # whether to print the table in latex
)

### chrF

In [None]:
get_results_table(
    results = results.loc[results.metric == 'chrf'],
    fid_type = 'normal',
    average = False,
    single = False,    # whether to return a single table or a table for each predictor
    to_latex = True,  # whether to print the table in latex
)

### sacreBLEU

In [None]:
get_results_table(
    results = results.loc[results.metric == 'sacrebleu'],
    fid_type = 'normal',
    average = False,
    single = False,    # whether to return a single table or a table for each predictor
    to_latex = True,  # whether to print the table in latex
)

### comet22-qe

In [None]:
get_results_table(
    results = results.loc[results.metric == 'comet22-qe'],
    fid_type = 'normal',
    average = False,
    single = False,    # whether to return a single table or a table for each predictor
    to_latex = True,  # whether to print the table in latex
)