# Translate Benchmarking

In [1]:
import requests, uuid, json
import datasets
import pandas as pd
from typing import Optional, List, Dict
from sacrebleu import CHRF, BLEU
from langchain.output_parsers import StructuredOutputParser


In [2]:
from snowflake.snowpark.session import Session
from snowflake.snowpark.version import VERSION
from snowflake.cortex import Complete, Translate

from snowflake.ml.utils.connection_params import SnowflakeLoginOptions

In [3]:
#snowflake connection
session = Session.builder.configs(SnowflakeLoginOptions(connection_name='connections.translate_demos')).create()

print("----------------------------------------")
snowflake_environment = session.sql('select current_warehouse(), current_database(), current_schema(), current_version()').collect()
snowpark_version = VERSION
print('Warehouse                   : {}'.format(snowflake_environment[0][0]))
print('Database                    : {}'.format(snowflake_environment[0][1]))
print('Schema                      : {}'.format(snowflake_environment[0][2]))
print('Snowflake version           : {}'.format(snowflake_environment[0][3]))
print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0],snowpark_version[1],snowpark_version[2]))
print("----------------------------------------")

SnowflakeLoginOptions() is in private preview since 0.2.0. Do not use it in production. 


----------------------------------------
Warehouse                   : CONTAINER_HOL_WH
Database                    : None
Schema                      : None
Snowflake version           : 8.37.1
Snowpark for Python version : 1.22.1
----------------------------------------


# Plan


1. Compare Translate function to Azure AI Translator
2. Compare Complete function to Azure AI Translator using BLEU score
    - use standard language (French and Spanish)
    - identify best performing models
3. Prompt engineering on the model to get better results
    - compare results with and without prompt eng using BLEU score
4. Compare the best performing model from previous steps to Azure AI Translator using dialect texts (Quebecois for French, US Spanish for Spanish)
    - Use BLEU
    - Other metrics?
5. If previous step is not satisfactory, use fine-tuning
    - find data sets or generate fake data. We can use 


## Updated plan
1. Start with standard Spanish:
    1. Generate translations using **Cortex Translate**
        1. run a loop on each sentence
        2. run on a larger body of text
    2. Generate translations using **Cortex Complete**
        1. Run a loop of all available models on larger body of text
        2. parse the output
    3. Generate translations using **Azure Translator**
    4. Compare evaluations using BLEU and CHRF scores
    5. Compare using manual evaluation
2. Repeat with US Spanish sample texts
3. Repeat with Standard French
4. Repeat with Quebecois French dialect sample texts

# Constants

In [4]:
LANG_PAIRS = 'es-en'
lang_from, lang_to = LANG_PAIRS.split('-')

In [5]:
LOCAL = True  # load data from local file

## Azure Translator setup

In [6]:
key = "<Azure Translate API Key>"
endpoint = "https://api.cognitive.microsofttranslator.com"

# location, also known as region.
# required if you're using a multi-service or regional (not global) resource. It can be found in the Azure portal on the Keys and Endpoint page.
location = "westus2"

path = '/translate'
constructed_url = endpoint + path

headers = {
    'Ocp-Apim-Subscription-Key': key,
    # location required if you're using a multi-service or regional (not global) resource.
    'Ocp-Apim-Subscription-Region': location,
    'Content-type': 'application/json',
    'X-ClientTraceId': str(uuid.uuid4())
}

# Functions

In [6]:
def compute_metric(metric, df_ref, sys, lang_pairs=LANG_PAIRS):
    """
    calculates BLEU, CHRF, or COMET score
    """
    results = {}
    
    lang_from, lang_to = lang_pairs.split('-')
    if isinstance(df_ref, pd.DataFrame):
        source = list(df_ref[lang_from].tolist())
        refs = list(df_ref[lang_to].tolist())
    elif isinstance(df_ref, list):
        source = df_ref[0]
        refs = df_ref[1]

    if metric == 'bleu':
        bleu = BLEU(trg_lang=lang_to)
        b = bleu.corpus_score(sys, refs)
        results = round(b.score, 3)
    elif metric == 'chrf':
        results = round(CHRF().corpus_score(sys, refs).score, 2)
    elif metric == 'comet':
        global metric_comet
        if metric_comet is None:
            metric_comet = Comet()
        comet_data = [{"src": src, "mt": sys, "ref": ref} for src, sys, ref in zip(source, sys, refs)]
        comet_out = metric_comet.score(comet_data)
        results = round(comet_out.system_score, 4) * 100
    else:
        raise ValueError(f"{metric} metric is not defined")

    return results

In [7]:
def print_valuation(ref, sys, service=None):
    metrics = ['bleu', 'chrf']
    if service is not None:
        print(f'scores for service {service} is:')
    for metric in metrics:
        print(f'{metric}: {compute_metric(metric=metric, df_ref=ref, sys=sys)}')

In [29]:
def translate_prompt_orig(text_to_translate, model="reka-flash", source_language="es", target_language="en"):
    """
    A translation prompt we created
    """
    lang = {'es': 'Spanish', 'en': 'English', 'fr': 'French', 'de': 'German'}
    system_prompt = """As an accomplished translator with a track record
of recognizing dialects, your role is to provide precise translation.
Leveraging your deep understanding of Spanish language and possible presence of English words, your
translation should be clear and accurate."""

    prompt = f""""
Please translate the following text from {lang[source_language]} to {lang[target_language]}. You should be aiming for a concise translation.
Now, take a moment to think and review this text, if you do a good job I will tip you $50: {text_to_translate}
"""


    body = json.dumps(
        {
            "system": system_prompt,
            "messages": [
                {"role": "system", 
                 "content": system_prompt},
                {
                    "role": "user",
                    "content": prompt,
                },
                {
                    "role": "assistant",
                    "content": "Here is my translation of this text:\n<Response>",
                },
            ],
        }
    )

    response = Complete(model, body)

    return response

In [30]:
def translate_prompt_anth(text_to_translate, model="reka-flash", source_language="es", target_language="en"):
    """
    translation prompt suggested by Anthropic
    """
    lang = {'es': 'Spanish', 'en': 'English', 'fr': 'French', 'de': 'German'}
    system_prompt = """You are a highly skilled translator with expertise in many languages. 
    Your task is to identify the language of the text I provide and accurately translate it into the specified target language while preserving the meaning, tone, and nuance of the original text. 
    Please maintain proper grammar, spelling, and punctuation in the translated version."""

    prompt = {
                "type": "text",
                "text": f"{text_to_translate} --> {lang[target_language]}"
                }


    body = json.dumps(
        {
            "system": system_prompt,
            "messages": [
                {
                    "role": "user",
                    "content": prompt,
                },
            ],
        }
    )
    response = Complete(model, body)

    return response

In [19]:
def translate_llm_simple(text_to_translate, model="reka-flash", source_language="es", target_language="en"):
    """
    translation without any prompt eng
    """
    lang = {'es': 'Spanish', 'en': 'English', 'fr': 'French', 'de': 'German'}
    body = f'Translate from {lang[source_language]} to {lang[target_language]}: <Text>{text_to_translate}</Text>.'
    
    return Complete(model, body)

In [11]:
def az_translate(text_to_translate, lang_from="es", lang_to="en"):
    """
    Azure AI Translate
    """
    params = {
    'api-version': '3.0',
    'from': lang_from,
    'to': lang_to
    }
    body = []
    
    if isinstance(text_to_translate, pd.DataFrame):
        for idx, row in text_to_translate.iterrows():
            body.append({'text': row[lang_from]})
    elif isinstance(text_to_translate, str):
        body.append({'text': text_to_translate})
        
    request = requests.post(constructed_url, params=params, headers=headers, json=body)
    response = request.json()

    results = [item['translations'][0]['text'] for item in response]
    return results

In [12]:
def parse_cortexcomplete_response(llm_output):
    """
    parse the text returned from Cortex Complete to get the translation
    """
    return output_parser.parse(output)

In [13]:
def run_and_compare(text_to_traslate, lang_pairs=None):
    if lang_pairs is None:
        lang_pairs = LANG_PAIRS
        lang_from, lang_to = LANG_PAIRS.split('-')
        
    print_valuation

# Get data

## Gender accuracy dataset

In [15]:
if not LOCAL:
    # Huggingface https://huggingface.co/datasets/gsarti/mt_geneval/viewer/context_en_es/test
    gender_acc = datasets.load_dataset('gsarti/mt_geneval', 'context_en_es')
    gender_acc_df_train = pd.DataFrame(gender_acc['train'])
    gender_acc_df_test = pd.DataFrame(gender_acc['test'])

## Thermostatic/parallel_corpus_europarl_english_spanish

In [16]:
force_remote = False
file_name = 'es_en_corpus_train.tsv'
if LOCAL and not force_remote:
    paral_es_en_corpus_df_train = pd.read_csv(file_name, sep='\t', index_col=False)
else:
    # https://huggingface.co/datasets/Thermostatic/parallel_corpus_europarl_english_spanish
    paral_es_en_corpus = datasets.load_dataset('Thermostatic/parallel_corpus_europarl_english_spanish')
    paral_es_en_corpus_df_train = pd.DataFrame(paral_es_en_corpus['train'])

    paral_es_en_corpus_df_train.to_csv(file_name, sep='\t', index=False)

In [17]:
print(paral_es_en_corpus_df_train.shape)
display(paral_es_en_corpus_df_train.head())

(1965734, 2)


Unnamed: 0,en,es
0,Resumption of the session,Reanudación del período de sesiones
1,I declare resumed the session of the European ...,Declaro reanudado el período de sesiones del P...
2,"Although, as you will have seen, the dreaded '...","Como todos han podido comprobar, el gran ""efec..."
3,You have requested a debate on this subject in...,Sus Señorías han solicitado un debate sobre el...
4,"In the meantime, I should like to observe a mi...","A la espera de que se produzca, de acuerdo con..."


## sample data

In [18]:
sample_size = 10
sample = paral_es_en_corpus_df_train.head(sample_size)

In [19]:
sentense_length = 20
single_text_es = '. '.join(paral_es_en_corpus_df_train.head(sentense_length)['es'].tolist())
single_text_en = '. '.join(paral_es_en_corpus_df_train.head(sentense_length)['en'].tolist())

In [20]:
# count number of words
print(len(single_text_es.split(' ')))

443


# Snowflake Cortex translate

Let's try this in two different ways: one sentence at a time, and one big chunk of text.

## Short sentences

In [21]:
%%time
sf_sys = []
for idx, row in sample.iterrows():
    resp = Translate(row[lang_from], from_language=lang_from, to_language=lang_to)
    sf_sys.append(resp)

CPU times: user 190 ms, sys: 49.9 ms, total: 240 ms
Wall time: 5.71 s


In [22]:
print_valuation(ref=sample, sys=sf_sys)

bleu: 0.258
chrf: 5.16


## One long body of text

In [23]:
%%time
resp = Translate(single_text_es, from_language=lang_from, to_language=lang_to)

CPU times: user 10.6 ms, sys: 2.93 ms, total: 13.5 ms
Wall time: 3.07 s


In [24]:
# needs better chunking
df_ref = [single_text_es, single_text_en]
print_valuation(ref=df_ref, sys=resp)

bleu: 0.0
chrf: 100.0


# Using Cortex Complete

## Without any prompt engineering

In [25]:
sample

Unnamed: 0,en,es
0,Resumption of the session,Reanudación del período de sesiones
1,I declare resumed the session of the European ...,Declaro reanudado el período de sesiones del P...
2,"Although, as you will have seen, the dreaded '...","Como todos han podido comprobar, el gran ""efec..."
3,You have requested a debate on this subject in...,Sus Señorías han solicitado un debate sobre el...
4,"In the meantime, I should like to observe a mi...","A la espera de que se produzca, de acuerdo con..."
5,"Please rise, then, for this minute' s silence.",Invito a todos a que nos pongamos de pie para ...
6,(The House rose and observed a minute' s silence),"(El Parlamento, de pie, guarda un minuto de si..."
7,"Madam President, on a point of order.","Señora Presidenta, una cuestión de procedimiento."
8,You will be aware from the press and televisio...,Sabrá usted por la prensa y la televisión que ...
9,One of the people assassinated very recently i...,Una de las personas que recientemente han ases...


In [26]:
translate_llm_simple(sample.loc[1, lang_from])

' In English, the Spanish text <Text>Declaro reanudado el período de sesiones del Parlamento Europeo, interrumpido el viernes 17 de diciembre pasado, y reitero a Sus Señorías mi deseo de que hayan tenido unas buenas vacaciones.</Text> translates to:\n\n"I declare resumed the session period of the European Parliament, interrupted on Friday, December 17th, and once again express to Your Lordships my wish that you have had a good holiday."\n\nExplanation:\n- "Declaro reanudado el período de sesiones del Parlamento Europeo" = "I declare resumed the session period of the European Parliament"\n- "interrumpido el viernes 17 de diciembre pasado" = "interrupted on Friday, December 17th"\n- "y reitero a Sus Señorías mi deseo de que hayan tenido unas buenas vacaciones" = "and once again express to Your Lordships my wish that you have had a good holiday"'

In [27]:
translate_llm_simple(text_to_translate=sample.loc[1, lang_from], model="mistral-large").strip()

'I declare the period of sessions of the European Parliament, interrupted last Friday, December 17, resumed, and I repeat to Your Lordships my wish that you have had a good vacation.'

## Prompt I wrote originally

In [28]:
# setup Cortex Complete function
model_list = [
    "snowflake-arctic",
    "mistral-large",
    "reka-flash",
    "reka-core",
    "mixtral-8x7b",
    "llama3-8b",
    "llama3-70b",
    "mistral-7b"
]

In [60]:
cc_resp = translate_prompt_orig(sample.loc[0, lang_from])
print(cc_resp)

 Here is my translation of the given text from Spanish to English, aiming for a concise translation:

```
Reanudación del período de sesiones -> Resumption of the session period
```

Explanation:
- "Reanudación" in Spanish means "resumption" in English.
- "Del período de sesiones" in Spanish means "of the session period" in English.

So, the complete translation is: "Resumption of the session period"


In [88]:
cc_resp.split('```')[1].split(' -> ')[1]

'Resumption of the session period\n'

## With a prompt taken from Anthropic

In [72]:
print(translate_prompt_anth(sample.loc[0, lang_from]))

 In English, "Reanudaci\u00f3n del per\u00edodo de sesiones" translates to "Resumption of the session period". 

Explanation:
- "Reanudaci\u00f3n" means "resumption" or "continuation" in English.
- "del per\u00edodo de sesiones" means "of the session period" in English. 

So, combining these words, we get "Resumption of the session period" as the accurate translation while preserving the meaning, tone, and nuance of the original text.




# Azure AI Translator

## short sentences

In [90]:
az_sys = az_translate(sample)

print_valuation(ref=sample, sys=az_sys)

bleu: 0.257
chrf: 5.25


## One long text

In [89]:
df_ref = [single_text_es, single_text_en]

az_sys = az_translate(single_text_es)

print_valuation(ref=df_ref, sys=az_sys)

bleu: 0.124
chrf: 0.24


# Pass any text and compare results

In [21]:
USE_CASE_1 = """
Hola estoy llamando porque tengo interés en uno de tus autos que vi en el internet. ¿Veo que tienes muchas trocas de venta y me gustaría saber si tienes mas sales? Prefiero tener un truck de marca Americano como GMC, Chevy or Ford. Ahorita tengo un Honda Accord del año 2019 y mi pago mensual es $500. Ya casi está paid off pero me falta solo $5,000 en el loan. Se que una troca me costaria mas cada mes pero prefiero que mi pago no sea más de $750 al mes. No lo quiero financiar por más de 72 months. ¿Estoy planeando hacer un trade-in y estoy curioso en cuanto me van a dar para mi Honda Accord? Hice mi research y veo que mi carro está valorado a aproximadamente $22,500. ¿Crees que me puedes dar eso para ir towards una troca? Puedo traer mi Honda Accord hoy al dealership para que you can check it out. La verdad es que está en buenas condiciones. Ahora les voy a decir más detalles del tipo de troca que quiero. 
Quiero una troca del año pero si no se puede hacer eso, yo aceptaría una troca que esté más nuevecita. Prefiero que sea black or blue y que tenga llantas grandes con un mínimo de 19 inches. Quiero que el material interior sea del color black. No me importa si tiene un sunroof pero sí necesita tener un truck bed de 6.5 pies. También quiero que sea four-wheel drive y que los asientos tengan heat y aire acondicionado. Y lo último que les voy a pedir es que la troca sea más económica cuando se trata del gas mileage. Se que usualmente las trocas burn up gas mileage pero ojala me puedan entroncar una troca que me puede ahorrar gas.
Muchas gracias por ayudarme."""

In [22]:
USE_CASE_2 = """Buenos dias. Estoy interesada en el white Honda Odyssey 2022 con tan interior que tienen en venta. La verdad es que tengo 4 hijos y necesito mucho espacio porque siempre ando para arriba y abajo por todos lados. En este momento estoy manejando un Mazda 5 pero es del año 2015 y estamos apretados en el minivan. Necesito mucho más espacio en el trunk para el soccer equipment y el mandado. My Mazda 5 ya está pagado en full y esta en okay condición. Tiene algunos problemas y los brakes no funcionan muy bien. Pero no quiero gastar dinero en arreglarlo y prefiero poner ese dinero hacia un van más nueva. Se que lo estás vendiendo por $34,999 pero yo solo puedo pagar $32,000 total out the door. Tengo interés en un warranty pero lo máximo que puedo pagar para agregarlo es $500. También me gustaría hacer un trade-in and I need at least $10,000 de eso para comprar el Honda Odyssey. ¿Y por casualidad ustedes podrían darme unos weathermats? Con mis hijos se ponen bien sucio los pisos del van y necesito algo que me ayude a mantenerlos limpios. ¿Necesito saber qué safety features tiene este ven? La seguridad del van es muy importante para mi familia. Puedo venir al dealership mañana para hablar más sobre todos los detalles. Talk soon.
"""

In [23]:
use_cases = {'use_case_1': USE_CASE_1, 'use_case_2': USE_CASE_2}
compare_results = dict()
for key in use_cases:
    our_solution = translate_llm_simple(text_to_translate=use_cases[key], model="mistral-large")
    azure_soluion = az_translate(text_to_translate=use_cases[key])
    compare_results[key] = {'A': our_solution, 'B': azure_soluion}



In [24]:

print(json.dumps(compare_results, sort_keys=True, ensure_ascii=False, indent=4, separators=(',', ': ')))


{
    "use_case_1": {
        "A": " Hello, I am calling because I am interested in one of your cars that I saw on the internet. I see that you have many trucks for sale and I would like to know if you have more sales? I prefer to have an American brand truck like GMC, Chevy or Ford. Right now I have a Honda Accord from the year 2019 and my monthly payment is $500. It is almost paid off but I still owe $5,000 on the loan. I know that a truck will cost me more each month but I prefer that my payment not be more than $750 a month. I don't want to finance it for more than 72 months. I am planning to do a trade-in and I am curious how much you will give me for my Honda Accord? I did my research and I see that my car is valued at approximately $22,500. Do you think you can give me that to go towards a truck? I can bring my Honda Accord today to the dealership for you to check it out. The truth is that it is in good condition. Now I am going to tell you more details about the type of truck I

In [25]:
print(json.dumps(use_cases, sort_keys=True, ensure_ascii=False, indent=4, separators=(',', ': ')))


{
    "use_case_1": "\nHola estoy llamando porque tengo interés en uno de tus autos que vi en el internet. ¿Veo que tienes muchas trocas de venta y me gustaría saber si tienes mas sales? Prefiero tener un truck de marca Americano como GMC, Chevy or Ford. Ahorita tengo un Honda Accord del año 2019 y mi pago mensual es $500. Ya casi está paid off pero me falta solo $5,000 en el loan. Se que una troca me costaria mas cada mes pero prefiero que mi pago no sea más de $750 al mes. No lo quiero financiar por más de 72 months. ¿Estoy planeando hacer un trade-in y estoy curioso en cuanto me van a dar para mi Honda Accord? Hice mi research y veo que mi carro está valorado a aproximadamente $22,500. ¿Crees que me puedes dar eso para ir towards una troca? Puedo traer mi Honda Accord hoy al dealership para que you can check it out. La verdad es que está en buenas condiciones. Ahora les voy a decir más detalles del tipo de troca que quiero. \nQuiero una troca del año pero si no se puede hacer eso, y

## Evaluation from ChatGPT:

### Use case 1

#### Snowflake:

- Score: **85**

- Evaluation:

    - Accuracy: The translation is mostly accurate but has minor inaccuracies. For example, "¿Veo que tienes muchas trocas de venta y me gustaría saber si tienes más sales?" is translated as "I see that you have many trucks for sale and I would like to know if you have more sales?" which is slightly off in meaning.
    - Clarity: Clear and easy to understand.
    - Naturalness: It reads naturally but has a few awkward phrasings like "to go towards a truck."

#### Azure:

- Score: **70**

- Evaluation:

    - Accuracy: The translation has more inaccuracies. For instance, "Am I planning to trade-in and am curious about how much I will get for my Honda Accord?" is incorrect. The original text is "¿Estoy planeando hacer un trade-in y estoy curioso en cuanto me van a dar para mi Honda Accord?"
    - Clarity: The translation is clear, but some parts are confusing, such as the awkward phrasing in "Am I planning to trade-in."
    - Naturalness: Less natural than the first translation, with some awkward phrasing and grammatical issues.

Both translations are good, but the first one is more accurate and reads more naturally than the second.

### Use case 2

#### Snowflake

- Score: 90

- Evaluation:

    - Accuracy: The translation is accurate, with minor inaccuracies. For example, "tan interior" is correctly translated but the phrasing "I am always going up and down everywhere" could be improved.
    - Clarity: The translation is clear and easy to understand.
    - Naturalness: It reads naturally with only a few awkward phrases.

#### Azure

- Score: 70

- Evaluation:

    - Accuracy: There are more inaccuracies here, such as "so interior" instead of "tan interior," and "ven" instead of "van."
    - Clarity: The translation is mostly clear but has some confusing parts due to inaccuracies.
    - Naturalness: The translation is less natural, with more awkward phrasing and grammatical issues compared to the first translation.

Both translations are good, but the first one is more accurate and reads more naturally than the second.

# Human judge

So for the first use case... I have put in `**bolded and red**` where I feel like each model did better as far as the translation goes. I put in **bold** what is being compared. And did **bold** and ~~strikethrough~~ text to indicate it was a miss on what was written. So, as you can see here, `**A is the clear winner.**`

- "use_case_1":
  
    - "A": " Hello, I'm calling because I'm interested in one of your cars that I saw on the internet. I see that you have many trucks for sale and I would like to know if you have more sales? I prefer to have an American brand truck like GMC, Chevy or Ford. Right now I have a Honda Accord from the year 2019 and my monthly payment is \\$500. It's almost paid off `**but I still owe \\$5,000 on the loan.**` I know a truck would cost me more each month but I prefer that my payment not be more than \\$750 a month. I don't want to finance it for more than 72 months. `**I'm planning on doing a trade-in and I'm curious how much you'll give me for my Honda Accord?**` I did my research and I see that my car is valued at approximately \\$22,500. Do you think you can give me that to go towards a truck? I can bring my Honda Accord today to the dealership for you to check it out. The truth is that it's in good conditions. Now I'm going to tell you more details about the type of truck I want.\n**I want a truck from this year but if that can't be done**, I would accept a truck that is newer. I prefer it to be black or blue and that it has large wheels with a minimum of 19 inches. `**I want the interior material to be black. I don't mind if it has a sunroof but it does need to have a truck bed of 6.5 feet.**` I also want it to be four-wheel drive and that the seats have heat and air conditioning. And the last thing I'm going to ask is that the truck be more economical when it comes to gas mileage.` **I know that usually trucks burn up gas mileage but hopefully you can hook me up with a truck that can save me gas.**`\nThank you very much for helping me.",
    
    - "B": "\nHi, I'm calling because I'm interested in one of your cars that I saw on the internet. I see that you have a lot of trucks for sale and I would like to know if you have more sales? I prefer to have an American brand truck like GMC, Chevy or Ford. Right now I have a 2019 Honda Accord and my monthly payment is \\$500. It's almost paid off ~~but I'm only \\$5,000 short on the loan~~. I know a truck would cost me more each month but I prefer my payment to be no more than \\$750 a month. I don't want to finance it for more than 72 months. ~~Am I~~ **planning to trade-in and am curious about how much I will get for my Honda Accord?** I did my research and I see that my car is valued at approximately \\$22,500. Do you think you can give me that to go towards a truck? I can bring my Honda Accord to the dealership today so you can check it out. The truth is that it is in good condition. Now I'm going to tell you more details about the type of truck I want. \n`**I want a truck of the year but if you can't do that,**` `I would accept a truck that is newer. I prefer it to be black or blue and have large rims with a minimum of 19 inches. **I want the inner material to be black. I don't mind if you have a sunroof but you do need to have a 6.5 foot truck bed**. I also want it to be four-wheel drive and for the seats to have heat and air conditioning. And the last thing I'm going to ask you is that the truck be more economical when it comes to gas mileage. ~~I know that usually burn up gas mileage trucks~~ **but hopefully they can connect me with a truck that can save me gas.\nThank you very much for helping me."**
  },

And for use case 2.... `**A is also the winner.** `

- "use_case_2":

    - "A": " Good morning. `**I am interested in the white Honda Odyssey 2022 with tan interior that you have for sale**`. The truth is that I have 4 children and I need a lot of space `**because I am always going up and down everywhere.**` At this moment I am driving a Mazda 5 but it is from the year 2015 `**and we are cramped in the minivan**`. I need a lot more space in the trunk for the soccer equipment `**and the groceries.**` My Mazda 5 is already paid in full `**and is in okay condition.**` It has some problems and the brakes don't work very well. But I don't want to spend money on fixing it and I prefer to put that money towards a newer van. I know you are selling it for \\$34,999 but I can only pay \\$32,000 total out the door. `**I am interested in a warranty but the maximum I can pay to add it is $500**`. `**I would also like to do a trade-in and I need at least $10,000 from that to buy the Honda Odyssey**`. `**And by chance could you give me some weather mats?** `With my children, the floors of the van get very dirty and I need something to help me keep them clean. Do I need to know what safety features this van has? The safety of the van is very important to my family. I can come to the dealership tomorrow to talk more about all the details. Talk soon.",
    
    - "B": "Good morning. **I'm interested in the white Honda Odyssey 2022**  ~~with so interior~~ **they have for sale**. The truth is that I have 4 children and I need a lot of space **because I am always up and down everywhere.** Right now I'm driving a Mazda 5 but it's from 2015 **and we're squeezed into the minivan**. I need a lot more trunk space for soccer equipment **and errands.** My Mazda 5 is already paid for in full ~~and is in good condition~~. It has some problems and the brakes don't work very well. But I don't want to spend money on fixing it and I'd rather put that money toward a newer van. I know you're selling it for \\$34,999 but I can only afford $\$$32,000 total out the door. **I have an interest in a warranty but the most I can afford** ~~to add it is~~ **\\$500**. **I'd also like to trade-in and I need at least \\$10,000 of that to buy the Honda Odyssey.** **And by chance** ~~you could give me~~ **some weathermats?** With my kids the floors of the van get really dirty and I need something to help me keep them clean. Do I need to know what safety features this ~~ven~~ has? The safety of the van is very important to my family. I can come to the dealership tomorrow to talk more about all the details. Talk soon.\n"

# Compare Translate to Azure

In [27]:
compare_results_translate = dict()
for key in use_cases:
    our_solution = Translate(use_cases[key], from_language=lang_from, to_language=lang_to)
    azure_soluion = az_translate(text_to_translate=use_cases[key])
    compare_results_translate[key] = {'Snowflake_translate': our_solution, 'Azure': azure_soluion}

In [29]:
compare_results_translate

{'use_case_1': {'Snowflake_translate': "I am trying to ask for a colored car, but I want to get the heat. I'm looking at the last 19th of your cars that I saw on the internet or I can call them because they are in the middle of the sun. I see you have many sales and I would like to know if you have more sales? I prefer to have an American brand truck as GMC, Chevy or Ford. I don't think I have a Honda Accord from 2019 and my monthly payment is $500. It's almost paid off for gas but I just need to do some research",
  'Azure': ["\nHi, I'm calling because I'm interested in one of your cars that I saw on the internet. I see that you have a lot of trucks for sale and I would like to know if you have more sales? I prefer to have an American brand truck like GMC, Chevy or Ford. Right now I have a 2019 Honda Accord and my monthly payment is $500. It's almost paid off but I'm only $5,000 short on the loan. I know a truck would cost me more each month but I prefer my payment to be no more than 

# Llama3-70B performance

In [28]:
compare_results_llama = dict()
for key in use_cases:
    our_solution = translate_llm_simple(text_to_translate=use_cases[key], model="llama3-70b")
    azure_soluion = az_translate(text_to_translate=use_cases[key])
    compare_results_llama[key] = {'Snowflake_llama': our_solution, 'Azure': azure_soluion}

## ChatGPT grading

### Example 1


#### Translation 1:

**Score: 95**

**Evaluation:**
- **Accuracy:** The translation is very accurate, capturing the nuances and details of the original text well. 
- **Clarity:** The translation is clear and easy to understand.
- **Naturalness:** It reads very naturally, with only minor adjustments needed to improve the flow.

#### Translation 2:

**Score: 70**

**Evaluation:**
- **Accuracy:** The translation has some inaccuracies and awkward phrasing. For example, "Am I planning to trade-in and am curious" is not correctly translated.
- **Clarity:** The translation is mostly clear, but certain phrases can be confusing.
- **Naturalness:** The translation is less natural, with more awkward phrasing and grammatical issues compared to the first translation.

Both translations are good, but the first one is more accurate and reads more naturally than the second.

### Example 2


#### Translation 1:

**Score: 95**

**Evaluation:**
- **Accuracy:** The translation is very accurate, capturing the nuances and details of the original text well.
- **Clarity:** The translation is clear and easy to understand.
- **Naturalness:** It reads very naturally, with only minor adjustments needed to improve the flow.

#### Translation 2:

**Score: 65**

**Evaluation:**
- **Accuracy:** There are several inaccuracies, such as "with so interior" instead of "with tan interior," and "ven" instead of "van."
- **Clarity:** The translation is mostly clear, but certain phrases can be confusing due to inaccuracies.
- **Naturalness:** The translation is less natural, with more awkward phrasing and grammatical issues compared to the first translation.

Both translations are good, but the first one is more accurate and reads more naturally than the second.

# Deepgram Comparison

In [None]:
from deepgram import DeepgramClient, AnalyzeOptions

DEEPGRAM_API_KEY = "<Your Deepgram API Key>"

TEXT =  {
    "buffer": "Enter your text here."
}

def deepgram_translate():
    try:
        deepgram = DeepgramClient(DEEPGRAM_API_KEY)

        options = AnalyzeOptions(
            language="en",
            summarize="v2", 
            topics=True, 
            intents=True, 
            sentiment=True, 
        )

        response = deepgram.read.analyze.v("1").analyze_text(TEXT, options)
        print(response.to_json(indent=4))

    except Exception as e:
        print(f"Exception: {e}")


In [None]:
response_to_use_case_1 = {
  "metadata": {
    "request_id": "72e0bcde-e253-4a37-bcea-4d2eeadd0f07",
    "created": "2024-06-27T21:57:14.470Z",
    "language": "en",
    "summary_info": {
      "model_uuid": "67875a7f-c9c4-48a0-aa55-5bdb8a91c34a",
      "input_tokens": 372,
      "output_tokens": 61
    },
    "sentiment_info": {
      "model_uuid": "80ab3179-d113-4254-bd6b-4a2f96498695",
      "input_tokens": 630,
      "output_tokens": 632
    },
    "topics_info": {
      "model_uuid": "80ab3179-d113-4254-bd6b-4a2f96498695",
      "input_tokens": 630,
      "output_tokens": 36
    },
    "intents_info": {
      "model_uuid": "80ab3179-d113-4254-bd6b-4a2f96498695",
      "input_tokens": 630,
      "output_tokens": 24
    }
  },
  "results": {
    "summary": {
      "text": "The transcript appears to be a recording of a phone call between a customer and a representative. The customer is asking about a car dealership's stock, and the representative responds with information about a car dealership's stock and a price. The conversation appears to be unrelated to the actual dealership's business."
    },
    "topics": {
      "segments": [
        {
          "text": "Prefiero tener un truck de marca Americano como GMC, Chevy or Ford.",
          "start_word": 31,
          "end_word": 42,
          "topics": [
            {
              "topic": "Car sales",
              "confidence_score": 0.3508841
            }
          ]
        },
        {
          "text": "Ahorita tengo un Honda Accord del a<unk>o 2019 y mi pago mensual es $500.",
          "start_word": 43,
          "end_word": 56,
          "topics": [
            {
              "topic": "Honda accord",
              "confidence_score": 0.22236145
            }
          ]
        },
        {
          "text": "Quiero una troca del a<unk>o pero si no se puede hacer eso, yo aceptar<unk>a una troca que esté más nuevecita.",
          "start_word": 178,
          "end_word": 197,
          "topics": [
            {
              "topic": "Troca",
              "confidence_score": 0.8471961
            }
          ]
        },
        {
          "text": "Quiero que el material interior sea del color black.",
          "start_word": 215,
          "end_word": 223,
          "topics": [
            {
              "topic": "Color",
              "confidence_score": 0.5676131
            }
          ]
        },
        {
          "text": "No me importa si tiene un sunroof pero s<unk> necesita tener un truck bed de 6.5 pies.",
          "start_word": 224,
          "end_word": 240,
          "topics": [
            {
              "topic": "Truck bed",
              "confidence_score": 0.27445966
            }
          ]
        },
        {
          "text": "Se que usualmente las trocas burn up gas mileage pero ojala me puedan entroncar una troca que me puede ahorrar gas.",
          "start_word": 277,
          "end_word": 297,
          "topics": [
            {
              "topic": "Gas mileage",
              "confidence_score": 0.6795571
            }
          ]
        }
      ]
    },
    "intents": {
      "segments": [
        {
          "text": "<unk>Veo que tienes muchas trocas de venta y me gustar<unk>a saber si tienes mas sales?",
          "start_word": 16,
          "end_word": 30,
          "intents": [
            {
              "intent": "Inform about sales",
              "confidence_score": 0.008552421
            }
          ]
        },
        {
          "text": "La verdad es que está en buenas condiciones. Ahora les voy a decir más detalles del tipo de troca que quiero.",
          "start_word": 158,
          "end_word": 178,
          "intents": [
            {
              "intent": "Educate on troca",
              "confidence_score": 0.030202974
            }
          ]
        }
      ]
    },
    "sentiments": {
      "segments": [
        {
          "text": "La verdad es que está en buenas condiciones.",
          "start_word": 157,
          "end_word": 164,
          "sentiment": "positive",
          "sentiment_score": 0.47509765625
        },
        {
          "text": "Ahora les voy a decir más detalles del tipo de troca que quiero. Quiero una troca del a<unk>o pero si no se puede hacer eso, yo aceptar<unk>a una troca que esté más nuevecita. Prefiero que sea black or blue y que tenga llantas grandes con un m<unk>nimo de 19 inches. Quiero que el material interior sea del color black. No me importa si tiene un sunroof pero s<unk> necesita tener un truck bed de 6.5 pies. También quiero que sea four-wheel drive y que los asientos tengan heat y aire acondicionado. Y lo <unk>ltimo que les voy a pedir es que la troca sea más económica cuando se trata del gas mileage. Se que usualmente las trocas burn up gas mileage pero ojala me puedan entroncar una troca que me puede ahorrar gas.",
          "start_word": 165,
          "end_word": 304,
          "sentiment": "neutral",
          "sentiment_score": -0.161376953125
        },
        {
          "text": "Muchas gracias por ayudarme.",
          "start_word": 298,
          "end_word": 301,
          "sentiment": "positive",
          "sentiment_score": 0.75244140625
        }
      ],
      "average": {
        "sentiment": "neutral",
        "sentiment_score": -0.06284612341772151
      }
    }
  }
}

# Glossary preservation

In [15]:
sample_text_english = """The CTO emphasized the importance of DevOps in modern IT. 
Continuous Integration (translated as 'Kontinuierliche Integration') and Agile methodologies are crucial for faster releases and maintaining high-quality software. 
The goal is to improve the CI/CD pipeline efficiency."""

sample_text_input = """The CTO emphasized the importance of DevOps in modern IT. 
Kontinuierliche Integration and Agile methodologies are crucial for faster releases and maintaining high-quality software. 
The goal is to improve the CI/CD pipeline efficiency.""" 
 
desired_german_output = """Der CTO betonte die Bedeutung von DevOps im modernen IT. 
Kontinuierliche Integration und Agile-Methoden sind entscheidend für schnellere Releases und die Aufrechterhaltung qualitativ hochwertiger Software. 
Das Ziel ist es, die Effizienz der CI/CD-Pipeline zu verbessern.
"""

## sample_text_english

In [33]:
sfllm_translate_1 = translate_llm_simple(text_to_translate=sample_text_english, 
                                         model="mistral-large", 
                                         source_language='en', 
                                         target_language='de')
sfllm_translate_1

" Der CTO betonte die Bedeutung von DevOps in der modernen IT.\nKontinuierliche Integration ('Continuous Integration' übersetzt) und agile Methodologien sind entscheidend für schnellere Veröffentlichungen und die Aufrechterhaltung einer hohen Softwarequalität.\nDas Ziel ist es, die Effizienz der CI/CD-Pipeline zu verbessern."

## sample_text_input with mistral-large

In [34]:
sfllm_translate_2 = translate_llm_simple(text_to_translate=sample_text_input, 
                                         model="mistral-large", 
                                         source_language='en', 
                                         target_language='de')
sfllm_translate_2

' Der CTO betonte die Bedeutung von DevOps in der modernen IT.\nKontinuierliche Integration und Agile Methodiken sind entscheidend für schnellere Veröffentlichungen und die Aufrechterhaltung hochwertiger Software.\nDas Ziel ist es, die Effizienz der CI/CD-Pipeline zu verbessern.'

## running through all major models with simple prompt

In [25]:
model_list = [
'llama2-70b-chat',
'llama3.1-8b',
'llama3.1-70b',
'llama3.1-405b',
'llama3.2-3b',
'mistral-large2',
'mixtral-8x7b',
'reka-flash']
# sample_text_input with all major models in Cortex AI
for model in model_list:
    translation = translate_llm_simple(text_to_translate=sample_text_input, 
                                         model=model, 
                                         source_language='en', 
                                         target_language='de')
    print(f" **translation using model -- {model} --:\n")
    print(translation, "\n")

 **translation using model -- llama2-70b-chat --:

 Der CTO betonte die Bedeutung von DevOps in der modernen IT. Die kontinuierliche Integration und agile Methodologien sind entscheidend für schnellere Veröffentlichungen und die Wartung von hochwertigem Software. Das Ziel ist es, die Effizienz der CI/CD-Pipeline zu verbessern.

Here's a word-for-word translation:

Der Chief Technology Officer (CTO) betonte die Bedeutung von DevOps in der modernen Informationstechnologie. Die kontinuierliche Integration und agile Methodologien sind entscheidend für schnellere Veröffentlichungen und die Wartung von hochwertigem Software. Das Ziel ist es, die Effizienz der kontinuierlichen Integration und kontinuierlichen Bereitstellung (CI/CD) zu verbessern.

Note: "kontinuierliche Integration" is a common term in German, but it is not a direct translation of "Continuous Integration" (CI). "Kontinuierliche Bereitstellung" (CB) is a more common term for "Continuous Deployment" (CD). 

 **translation using

In [32]:
## run through custom prompts
translation = translate_prompt_orig(text_to_translate=sample_text_input, 
                                     model='mistral-large2', 
                                     source_language='en', 
                                     target_language='de')
print(f" **translation using model -- mistral-large2 --:\n")
print(translation, "\n")

translation = translate_prompt_anth(text_to_translate=sample_text_input, 
                                     model='mistral-large2', 
                                     source_language='en', 
                                     target_language='de')
print(f" **translation using model -- mistral-large2 --:\n")
print(translation, "\n")

 **translation using model -- mistral-large2 --:

 Der CTO betonte die Bedeutung von DevOps in der modernen IT. Kontinuierliche Integration und agile Methoden sind entscheidend für schnellere Releases und die Aufrechterhaltung hoher Softwarequalität. Das Ziel ist es, die Effizienz der CI/CD-Pipeline zu verbessern. 

 **translation using model -- mistral-large2 --:

 The CTO emphasized the importance of DevOps in modern IT.
Kontinuierliche Integration and Agile methodologies are crucial for faster releases and maintaining high-quality software.
The goal is to improve the CI/CD pipeline efficiency.

--> German

Der CTO betonte die Bedeutung von DevOps in der modernen IT.
Kontinuierliche Integration und agile Methoden sind entscheidend für schnellere Releases und die Aufrechterhaltung hoher Softwarequalität.
Das Ziel ist es, die Effizienz der CI/CD-Pipeline zu verbessern. 

