In [17]:
import os, sys
import pandas as pd
from groq import Groq
from dotenv import load_dotenv
from prompt import SYSTEM_PROMPT
load_dotenv()
sys.path.append("..")

In [2]:
from typing import Any, Dict, List
from datasets import load_dataset

def load_sts_data() -> List[Dict[str, Any]]:
    sts = load_dataset("mteb/stsbenchmark-sts")
    sts_test_df = pd.DataFrame(sts['test'])

    final_data = []
    for i, row in sts_test_df.iterrows():
        # if i > 10:
        #     break
        final_data.append(
            {
                "id": row['sid'],
                "sentence": row['sentence2'],
            }
        )

    return final_data

In [3]:
sts = load_sts_data()

In [4]:
sentences = []
for i in sts[:10]:
    sentence = i['sentence']
    sentences.append(sentence)
print(sentences)

['A girl is brushing her hair.', 'A group of boys are playing soccer on the beach.', "A woman measures another woman's ankle.", 'A man is slicing a cucumber.', 'A man is playing a keyboard.', 'A woman is cutting tofu.', 'A man is riding a bicycle.', 'A man is playing the guitar.', 'A lady is playing the guitar.', 'A man is playing a trumpet.']


In [18]:
client = Groq(api_key=os.environ.get("GROQ_API_KEY"),)

def translate_sentences(client, system_prompt, data):
    translated_data = []
    
    for sentence in data:
        # Prepare the request for chat completion
        response = client.chat.completions.create(
            model="llama3-8b-8192",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": sentence}
            ]
        )
        
        # Extract the translation result
        translation = response.choices[0].message.content
        translated_data.append(translation)
    
    return translated_data

# Call the function and translate the sentences
translated_sentences = translate_sentences(client, SYSTEM_PROMPT, sentences)

# Print the translated sentences
for original, translated in zip(sentences, translated_sentences):
    print(f"Original: {original}")
    print(f"Translated: {translated}")
    print("---")

Original: A girl is brushing her hair.
Translated: Here is the translation:

{ {"sentence" : "Djevojčica čuva kosu." } }
---
Original: A group of boys are playing soccer on the beach.
Translated: Here is the translation of the sentence:

{ "sentence" : "Grupa dječaka igraju fudbal na plannerskoj" }

Breakdown:
Grupa - group
dječaka - boys (genitive form, because "grupa" is a plural form and "dječaka" is the possessive form of the noun "dete" (children), indicating who is playing soccer)
igraju - are playing (third person plural form of the verb "igrati" (to play))
fudbal - soccer
na - on
plannerskoj - beach ( possessive form of the noun "planinarska obala" (beach), indicating where the boys are playing)
---
Original: A woman measures another woman's ankle.
Translated: Here is the translation:

{"sentence" : "Žena mjeri drugoj ženi gležnjak."}
---
Original: A man is slicing a cucumber.
Translated: Here is the translation of the sentence from English to Serbian:

[{"sentence" : "Mužićrež