## Libraries

In [None]:
# Install dependencies
%pip install ollama
%pip install pandas

In [None]:
# Import libraries
import pandas as pd
import ollama
import json
import os

## Parameters

In [None]:
# IA model used to translate
# Plenty of other available models here : https://ollama.com/search
# Find a model whose size fits to your computer
ollama_model = "gemma3:4b" 

# Desired language
lang = "french"

# Nb of words iin each batch to send to the model
batch = 5

# frequency list (input)
freq = "raw_data/frequency_list_from_frequencylist-com.csv"

# output CSV
output = "deck.csv" 

## Prompts

In [None]:
# Prompts for the AI
PROMPT1 = "Translate this english word into " + lang + ": {} ({}). Just for information to help you understand the word, here are its variations : {}. Please only give one word, don't explain. If there are several translations, separate them with a comma"
PROMPT2 = "Give a sentence using this word : {} ({}). Here is the " + lang + " translation of this word : {}. Please only give one sentence, don't explain. Don't give the french translation of the sentence"
    

## Computing
**Nb : the code can be stopped at any moment, the data will be saved. Launching again the code will continue from where it stopped the previous time**

In [None]:
# We load the frequency list
freq = pd.read_csv(freq)

# We add 2 empty columns in te table
freq[lang] = None
freq["sentence"] = None

In [None]:
def translate(row):
    word = row["word"]
    tpe = row["type"]
    inf = row["inflections"]
    prompt = PROMPT1.format(word, tpe, inf)
    response = ollama.chat(model=ollama_model, messages=[{"role": "user", "content": prompt}])
    return response['message']['content'].strip()

def give_example(row):
    word = row["word"]
    tpe = row["type"]
    translation = row[lang]
    prompt = PROMPT2.format(word, tpe, translation)
    response = ollama.chat(model=ollama_model, messages=[{"role": "user", "content": prompt}])
    return response['message']['content'].strip()



In [None]:
# We create the output CSV file
if not os.path.exists(output):
    freq.to_csv(output, index = False)
    
# We load the output CSV
data = pd.read_csv(output)

data.head()

In [None]:
# The file has already been partially filled, we get the remaining rows
m = data.sentence.isnull() | data[lang].isnull()
missing_idx = data[m].index 

print(f"Number of remaining rows to fill : {m.sum()}")

# Main loop
for batch_start in range(0, len(data), batch):

    batch_idx = missing_idx[batch_start:batch_start + batch]
    
    trad = data.loc[batch_idx].apply(translate, axis=1)
    data.loc[batch_idx, lang] = trad
    
    sentence = data.loc[batch_idx].apply(give_example, axis=1)
    data.loc[batch_idx, "sentence"] = sentence
    
    display(data.loc[batch_idx, [lang, "word", "sentence"]])
    
    data.to_csv(output, index = False)

