In [2]:
import pandas as pd
from pathlib import Path
import requests
from config import column_names


# # Load language codes
# json_file_path = 'language_codes.json'

# with open(json_file_path, 'r', encoding='utf-8') as file:
#     language_codes = json.load(file)

language_codes = {
    "en": "English",
    "de": "German",
    "fr": "French",
    "es": "Spanish",
    "it": "Italian",
    "pt": "Portuguese",
    "jn": "Japanese",
}

def build_markdown(language):
    base_url = "https://raw.githubusercontent.com/moritzvitt/moritzProjekt/main/prompts/"
    general_url = f"{base_url}_general_prompt.md"
    examples_url = f"{base_url}{language}_examples.md"
    
    # Load general markdown
    response = requests.get(general_url)
    general_content = response.text
    
    # Load examples markdown
    response = requests.get(examples_url)
    examples_content = response.text
    
    # Reconstruct the markdown content
    final_content = general_content + "\n\n" + examples_content
    
    return final_content
    # replace 'target_language' and 'native_language' with the actual language


for language_key in language_codes:
    language_full_name = language_codes.get(language_key, "Unknown Language")
    markdown = build_markdown(language_key)
    markdown = markdown.replace("target_language", language_full_name)

    output_file = f"../complete_prompts/{language_key}_complete.md"
    
    # Check if language-specific dataframe exists
    df_path = Path(f'../test_dataframes/{language_key}_items/items.csv')
    if df_path.exists():
        # Load dataframe from CSV
        df = pd.read_csv(df_path, delimiter='\t', encoding='utf-8')
        df.columns = column_names
        df = df[[
            "Word",
            "Context",
            "Context machine translation",
            "Context human translation",
        ]]
        
        with open(output_file, "w", encoding='utf-8') as file:
            file.write(markdown + '\n\nThis is the table with the word sentence pairs:\n\n' +
                       df.to_csv(sep='\t', encoding='utf-8', index=False))
        
        print(f"Processed {language_full_name} with dataframe and saved to {output_file}")
    else:
        # Save only the markdown content if no dataframe exists
        with open(output_file, "w", encoding='utf-8') as file:
            file.write(markdown)
        
        print(f"Processed {language_full_name} without dataframe (not found) and saved markdown to {output_file}")

Processed Afar without dataframe (not found) and saved markdown to ../complete_prompts/aa_complete.md
Processed Abkhazian without dataframe (not found) and saved markdown to ../complete_prompts/ab_complete.md
Processed Avestan without dataframe (not found) and saved markdown to ../complete_prompts/ae_complete.md
Processed Afrikaans without dataframe (not found) and saved markdown to ../complete_prompts/af_complete.md
Processed Akan without dataframe (not found) and saved markdown to ../complete_prompts/ak_complete.md
Processed Amharic without dataframe (not found) and saved markdown to ../complete_prompts/am_complete.md
Processed Aragonese without dataframe (not found) and saved markdown to ../complete_prompts/an_complete.md
Processed Arabic (U.A.E.) without dataframe (not found) and saved markdown to ../complete_prompts/ar-ae_complete.md
Processed Arabic (Bahrain) without dataframe (not found) and saved markdown to ../complete_prompts/ar-bh_complete.md
Processed Arabic (Algeria) witho

KeyboardInterrupt: 

In [1]:
import genanki
from config import fields_config
import os
import time


def generate_anki_deck(df: pd.DataFrame) -> genanki.Package:
    """Generates an Anki deck from a DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame containing card data.

    Returns:
        genanki.Package: The generated Anki package.
    """
    
    with open('templates/anki_card.html', 'r', encoding='utf-8') as content_file:
        content = content_file.read()

    # Splitting HTML content
    html_sections = content.split('<!-- html -->')

    # Assigning sections to qfmt, afmt, and css
    qfmt_html = html_sections[1]
    afmt_html = html_sections[2]

    with open('static/css/anki_card.css', 'r', encoding='utf-8') as content_file:
        css_code = content_file.read()

    # Ensure all columns are strings
    df = df.astype(str)

    # Define the Anki model
    model_id = 1607392319
    model = genanki.Model(
        model_id,
        'Language Learning with Netflix Model',
        fields = fields_config["fields"],
        templates=[
            {
                'name': 'Card 1',
                'qfmt': qfmt_html,
                'afmt': afmt_html,
            },
        ],
        css=css_code
    )

    # Create an Anki deck
    deck_id = model_id + 1  # Ensure deck_id is different from model_id
    deck = genanki.Deck(deck_id, 'lln_anki_deck')

    # Add cards to the deck
    for index, row in df.iterrows():
        my_note = genanki.Note(
            model=model,
            fields=[row['ID'], row['cloze'], row['hint'], row['definition'], row['notes'], row['image'], row['audio']],
        )
        deck.add_note(my_note)

    apkg_package = genanki.Package(deck)
    return apkg_package

@log_io
def export_df(df: pd.DataFrame, package: genanki.Package, native_language: str, output_file_path: str, encoding: str = 'utf-8') -> Tuple[str, str]:
    """Exports an Anki package and a cleaned DataFrame to CSV.

    Args:
        df (pd.DataFrame): The DataFrame to export.
        package (genanki.Package): The Anki package to save.
        native_language (str): The native language of the data.
        output_file_path (str): The path to save the files.
        encoding (str, optional): The encoding for the CSV file. Defaults to 'utf-8'.

    Returns:
        Tuple[str, str]: A tuple containing the paths to the exported Anki package and CSV file.
    """
    current_time = time.strftime("%Y%m%d%H%M%S", time.localtime())
    package_path = os.path.join(output_file_path, f'{native_language}_LLN_{current_time}.apkg')
    package.write_to_file(package_path)

    csv_file_path = os.path.join(output_file_path, f'{native_language}_LLN_{current_time}.csv')
    df.to_csv(csv_file_path, index=False, sep='\t', encoding=encoding)

    return package_path, csv_file_path


NameError: name 'pd' is not defined