In [47]:
import pandas as pd
from fun import *
from common import generate_mapper
from groq import Groq
from dotenv import load_dotenv
import os
import json

# Data

In [18]:
df = pd.read_csv("../data/translations.csv")
df.head()

Unnamed: 0,lp,src,mt,ref,score,raw,annotators,domain,year,l1,l2
0,en-zh,Police said in a statement at the time that th...,警方在当时的一份声明中表示，他们决定以“沉重的心情”结束对 Jacsun 的搜索，但指出他们...,警方在当时的一份声明中表示，他们在决定结束对杰森的搜寻时，是怀着“沉重的心情”的，并指出他们...,0.238989,80.5,4,news,2020,en,zh
1,en-zh,"Trump will likely veto the resolution, the six...",特 朗 普 可 能 会 否 决 该 决 议 ， 这 是 他 第 六 次 以 总 统 身 份 ...,特朗普很可能行使总统否决权，这将是他成为总统以来第6次否决国会立法。他早在三月已动用否决权，...,0.487842,81.166667,6,news,2020,en,zh
2,en-zh,The man arrived at work about 9.30am after fai...,该名男子未能回复同事的几条短信和电话，于上午9.30左右上班。,该男子大约在上午 9:30 到达工作单位，在此之前，他未能回复同事发来的几条信息和打来的电话。,0.46985,80.5,4,news,2020,en,zh
3,en-zh,Are hopes for a nuclear-free world realistic?,对一个无核世界的希望是现实的吗 ？,实现无核化世界的希望是否现实？,0.626218,81.0,4,news,2020,en,zh
4,en-zh,The California attorney general's office in Ma...,"3月,加利福尼亚总检察长办公室在经过近一年的调查后拒绝提出州刑事指控,当时,总检察长Xavi...",3 月，加州总检察长办公室在完成了近一年的案件调查后，拒绝对两名警察提起加州刑事起诉。总检察...,-0.207602,59.0,4,news,2020,en,zh


In [19]:
with open("../data/prompts/PROMPT4.txt", "r") as f:
    prompt_template = f.read()

print(prompt_template)

Please evaluate the quality of the translation from [ORIGINAL LANGUAGE] to [TRANSLATED LANGUAGE] based on accuracy, fluency, and coherence:
Provide a score from 0 to 100 to indicate the overall quality of the translation.



In [32]:
language_mapper = {
    "en": "English",
    "fi": "Finnish",
    "de": "German",
    "hi": "Hindi",
    "xh": "Xhosa",
    "zh": "Chinese",
    "cs": "Czech",
    "fr": "French",
    "bn": "Bengali",
    "zu": "Zulu",
}

In [33]:
df["l1"] = df["l1"].map(language_mapper)
df["l2"] = df["l2"].map(language_mapper)

In [55]:
def get_conversation_groq(
    src_language: str, trg_language: str, src_sentence: str, trg_sentence: str
) -> str:
    messages = [
        {
            "role": "system",
            "content": f"""Please evaluate the quality of the translation from {src_language} to {trg_language} based on accuracy, fluency, and coherence: \n Provide a score from 0 to 100 to indicate the overall quality of the translation. Provide only the score, nothing else.""",
        },
        {
            "role": "user",
            "content": f'Original sentence: "{src_sentence}" \n Translated sentence: "{trg_sentence}"',
        },
    ]
    return messages

In [75]:
df["message"] = df.apply(
    lambda x: get_conversation_groq(x["l1"], x["l2"], x["src"], x["mt"]), axis=1
)

In [57]:
df.iloc[0].message

[{'role': 'system',
  'content': 'Please evaluate the quality of the translation from English to Chinese based on accuracy, fluency, and coherence: \n Provide a score from 0 to 100 to indicate the overall quality of the translation. Provide only the score, nothing else.'},
 {'role': 'user',
  'content': 'Original sentence: "Police said in a statement at the time that they made the decision to end the search for Jacsun with a "heavy heart," but noted they couldn\'t pinpoint a location in the landfill "to a point that would make continuing the search reasonable."" \n Translated sentence: "警方在当时的一份声明中表示，他们决定以“沉重的心情”结束对 Jacsun 的搜索，但指出他们无法确定垃圾填埋场的位置，“以至于继续搜索是合理的”。"'}]

# API calls

In [20]:
load_dotenv()

True

In [42]:
client = Groq()

In [76]:
input_data = df["message"].to_list()

In [62]:
chat_completion = client.chat.completions.create(
    messages=input_data[i],
    model="llama3-70b-8192",
    temperature=0.5,
    max_tokens=1024,
    top_p=1,
    stop=None,
    stream=False,
)

In [77]:
input_data[0]

[{'role': 'system',
  'content': 'Please evaluate the quality of the translation from English to Chinese based on accuracy, fluency, and coherence: \n Provide a score from 0 to 100 to indicate the overall quality of the translation. Provide only the score, nothing else.'},
 {'role': 'user',
  'content': 'Original sentence: "Police said in a statement at the time that they made the decision to end the search for Jacsun with a "heavy heart," but noted they couldn\'t pinpoint a location in the landfill "to a point that would make continuing the search reasonable."" \n Translated sentence: "警方在当时的一份声明中表示，他们决定以“沉重的心情”结束对 Jacsun 的搜索，但指出他们无法确定垃圾填埋场的位置，“以至于继续搜索是合理的”。"'}]

In [104]:
last_idx = 1500
max_rows = 2401
outfile = f"../data/llama_3/{last_idx}_starting_row.json"

with open(outfile, "w") as output_file:
    output_file.write("[")
    for i in range(last_idx, len(input_data[:max_rows])):
        try:
            sample = input_data[i]
            chat_completion = client.chat.completions.create(
                messages=input_data[i],
                model="llama3-70b-8192",
                temperature=0.5,
                max_tokens=1024,
                top_p=1,
                stop=None,
                stream=False,
            )
            answer = chat_completion.choices[0].message.content

            output_sample = {
                "sentence": sample[-1]["content"],
                "score": answer,
                "id": i,
            }

            output_file.write(json.dumps(output_sample))

            if i != (max_rows - 1):
                output_file.write(",")
        except:
            print("next start by:")
            print(i + 1)
            break
    output_file.write("]")

In [139]:
with open(outfile) as file:
    content = json.load(file)

In [101]:
content

[{'sentence': 'Original sentence: "It\'s not a big meeting, it\'s not a Royal Ascot - it\'s a mundane Friday night with 20,000 people." \n Translated sentence: "这不是一个大型的会议，它不是皇家 Ascot -它是一个普通的星期五晚上，有20，000人。"',
  'score': '85',
  'id': 250},
 {'sentence': 'Original sentence: "Still there has been a surge in support -- more than 75 House members in about three days -- of launching such an inquiry as part of the fallout from Trump\'s July 25 call with Ukrainian President Volodymyr Zelensky where he discussed former Vice President Joe Biden. There is no evidence of wrongdoing by Biden." \n Translated sentence: "尽管如此，在特朗普7月25日与乌克兰总统弗拉基米尔*泽伦斯基（Volodymyr Zelensky）的电话会议中，他讨论了前副总统乔*拜登（Joe Biden）的后果，仍然有一个激增的支持-在大约三天内超过75众议院成员。 没有证据表明拜登有不法行为。"',
  'score': '92',
  'id': 251},
 {'sentence': 'Original sentence: "Twarock said the new blueprints also provide "a new perspective on viral evolution, suggesting novel routes in which larger and more complex viruses may have evolved from simple ones at ev

In [None]:
last_idx = 1500
max_rows = 2401
outfile = f"../data/llama_3/missing_starting_row.json"

with open(outfile, "w") as output_file:
    output_file.write("[")
    for i in [10, 16, 24]:
        try:
            sample = input_data[i]
            chat_completion = client.chat.completions.create(
                messages=input_data[i],
                model="llama3-70b-8192",
                temperature=0.5,
                max_tokens=1024,
                top_p=1,
                stop=None,
                stream=False,
            )
            answer = chat_completion.choices[0].message.content

            output_sample = {
                "sentence": sample[-1]["content"],
                "score": answer,
                "id": i,
            }

            output_file.write(json.dumps(output_sample))

            if i != (max_rows - 1):
                output_file.write(",")
        except:
            print("next start by:")
            print(i + 1)
            break
    output_file.write("]")

In [114]:
def recover_all_files(path: str):
    files_content = []

    for filename in os.listdir(path):
        with open(f"{path}/{filename}") as infile:
            file_content = json.load(infile)
            files_content += file_content
    return pd.DataFrame(files_content).sort_values(by="id").reset_index(drop=True)

In [125]:
df_llama = recover_all_files("../data/llama_3")

In [126]:
df_llama

Unnamed: 0,sentence,score,id
0,"Original sentence: ""Police said in a statement...",92,0
1,"Original sentence: ""Trump will likely veto the...",95,1
2,"Original sentence: ""The man arrived at work ab...",85,2
3,"Original sentence: ""Are hopes for a nuclear-fr...",95,3
4,"Original sentence: ""The California attorney ge...",92,4
...,...,...,...
2395,"Original sentence: ""Wayalela amagosa ukuba aye...",85,2395
2396,"Original sentence: ""Ulimo lokuphila lulula, ol...",95,2396
2397,"Original sentence: ""UMlawuli wezoTshutshiso lo...",95,2397
2398,"Original sentence: ""Ukuphumla phezu kweentaba ...",95,2398


In [127]:
df_llama.score.unique()

array(['92', '95', '85', '65', '90', '75', '60', '80', '96', '40', '20',
       '45', '98', '70', '35', '78', '87', '68', '72', '0', '62', '99',
       '30', '34', '10', '82', '97'], dtype=object)

In [132]:
df["llama3_score"] = df_llama["score"]

In [134]:
df[["lp", "raw", "llama3_score"]]

Unnamed: 0,lp,raw,llama3_score
0,en-zh,80.500000,92
1,en-zh,81.166667,95
2,en-zh,80.500000,85
3,en-zh,81.000000,95
4,en-zh,59.000000,92
...,...,...,...
2395,xh-zu,100.000000,85
2396,xh-zu,90.000000,95
2397,xh-zu,100.000000,95
2398,xh-zu,50.000000,95


In [129]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [135]:
fig = make_subplots(rows=1, cols=1)

fig.add_trace(
    go.Scatter(x=df.index, y=df.raw, mode="markers", name="raw_score"), row=1, col=1
)

fig.add_trace(
    go.Scatter(x=df.index, y=df.llama3_score, mode="markers", name="gemma_score"),
    row=1,
    col=1,
)

fig.show()

In [142]:
import numpy as np

In [144]:
np.abs(df.llama3_score.astype(float) - df.raw) < 5

0       False
1       False
2        True
3       False
4       False
        ...  
2395    False
2396    False
2397    False
2398    False
2399    False
Length: 2400, dtype: bool

In [136]:
def compute_correlation(
    df: pd.DataFrame,
    ground_truth: str = "raw",
    predicted: str = "gemma_score",
    method: str = "pearson",
    decimals=3,
):
    correlations = {
        "overall": round(df[ground_truth].corr(df[predicted], method=method), decimals)
    }

    for language_pair in df["lp"].unique():
        sub_df = df[df.lp == language_pair]
        correlations[language_pair] = round(
            sub_df[ground_truth].corr(sub_df[predicted], method=method), decimals
        )

    return correlations

In [138]:
for key, value in compute_correlation(df, "raw", "llama3_score").items():
    print(value)

0.375
0.474
0.623
0.67
0.494
0.36
0.436
0.552
0.316
