# Notebook creates translations and sentence examples from the input german text and then generates anki cards

In [None]:
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain, SequentialChain

from anki.german_deck import GermanDeck
from anki.templates import (
    extract_template,
    translate_template,
    words_sentences_template,
    other_forms_template,
    sentence_translate_template,
)

In [None]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

In [None]:
llm_model = "gpt-4o-mini"
# llm_model = "gpt-4o"
llm = ChatOpenAI(temperature=0.01, model=llm_model)

# Open the file in read mode
with open("data/input.txt", "r") as file:
    # Read the entire content of the file into a variable
    text_input = "\n".join(sorted(list(set(list(file.read().split("\n"))))))

## LLMChain

In [None]:
output_variables_templates = {
    "german_words": extract_template,
    "english_words": translate_template,
    "german_sentences": words_sentences_template,
    "english_sentences": sentence_translate_template,
    "other_forms": other_forms_template,
}

overall_chain = SequentialChain(
    chains=[
        LLMChain(llm=llm, prompt=prompt, output_key=output_variable)
        for output_variable, prompt in output_variables_templates.items()
    ],
    input_variables=["input_text"],
    output_variables=list(output_variables_templates.keys()),
    verbose=True,
)

chain_output = overall_chain(text_input)

In [None]:
preprocessed = {
    k: [word.strip() for word in chain_output[k].strip().split(";")]
    for k in output_variables_templates.keys()
}

filtered = {
    k: v for k, v in preprocessed.items() if len(v) == len(preprocessed["german_words"])
}

preprocessed_list = [
    dict(zip(filtered.keys(), values)) for values in zip(*filtered.values())
]

In [None]:
for column in ["german_sentences", "other_forms"]:
    preprocessed[column] = {
        sentence.split(":")[0].strip(): sentence.split(":")[1].strip()
        for sentence in preprocessed[column]
    }

preprocessed["english_sentences"] = [
    sentence.split(":")[1].strip() for sentence in preprocessed["english_sentences"]
]

In [None]:
for item in preprocessed_list:
    for column in ["german_sentences", "other_forms"]:
        item[column] = item[column].split(":")[1].strip()
    item["english_sentences"] = item["english_sentences"].split(":")[1].strip()

In [None]:
for k, v in preprocessed.items():
    print(k, len(v))

In [20]:
# Save results to new anki cards

deck_id = 2059400110  # Example deck ID
model_id = 1607392319  # Example model ID
FILE_PATH = "../../data/german_vocabulary"

german_deck = GermanDeck(deck_id, model_id, FILE_PATH)
# german_deck.load_deck()

for item in preprocessed_list:
    german_deck.add_note(
        german_word=item["german_words"],
        translation=item["english_words"],
        german_sentence=item["german_sentences"],
        english_sentence=item["english_sentences"],
        other_forms=" ",
    )

german_deck.save_deck()