# Flashcard Builder: Danish/English

For use with Anki. 

Data Sources:
* *Fluent Forever* by Gabriel Wyner - Appendix 5: "Your First 625 Words"
* Lonley Planet *Fast Talk Danish* Phrase Book
* *Complete Danish* by Bente Elsworth

All translations double-checked on [ordent.dk](https://ny.ordnet.dk/).

### Initial Setup

In [1]:
import pandas as pd
import json
from pathlib import Path

In [2]:
csv_path = Path.home() / "Desktop" / "Danish Notebooks" / "danish_english_flashcards.csv"

df = pd.read_csv(csv_path)

In [3]:
print("Columns:", list(df.columns))
df.head()

Columns: ['English', 'Danish']


Unnamed: 0,English,Danish
0,about,om
1,accidental,tilfældig
2,an actor,en skuespiller
3,adjective,adjektiv
4,an adult,en voksen


In [4]:
df["English"] = df["English"].astype(str).str.strip()
df["Danish"]  = df["Danish"].astype(str).str.strip()

# Drop empty rows
df = df[(df["English"] != "") & (df["Danish"] != "")]

# Remove exact duplicates
df = df.drop_duplicates(subset=["English", "Danish"]).reset_index(drop=True)

print("Cards:", len(df))
df.head(10)

Cards: 647


Unnamed: 0,English,Danish
0,about,om
1,accidental,tilfældig
2,an actor,en skuespiller
3,adjective,adjektiv
4,an adult,en voksen
5,afternoon,eftermiddag
6,age,alder
7,air,luft
8,an airplane,et fly
9,an airport,en lufthavn


### Flashcard Creation

In [5]:
# Export Anki CSV (Front = Danish, Back = English)

out_dir = Path("/Users/karyncumming/Desktop/Danish Notebooks")
anki_path = out_dir / "anki_import_basic.csv"

anki_df = pd.DataFrame({
    "Front": df["Danish"],   # what you see first
    "Back":  df["English"],  # reveal answer
})

anki_df.to_csv(anki_path, index=False)
print("Wrote:", anki_path)
anki_df.head(10)

Wrote: /Users/karyncumming/Desktop/Danish Notebooks/anki_import_basic.csv


Unnamed: 0,Front,Back
0,om,about
1,tilfældig,accidental
2,en skuespiller,an actor
3,adjektiv,adjective
4,en voksen,an adult
5,eftermiddag,afternoon
6,alder,age
7,luft,air
8,et fly,an airplane
9,en lufthavn,an airport


In [6]:
# Optional: export csv to JSON for later use elsewhere

json_path = out_dir / "danish_cards.json"
records = [{"danish": d, "english": e} for d, e in zip(df["Danish"], df["English"])]

with open(json_path, "w", encoding="utf-8") as f:
    json.dump(records, f, ensure_ascii=False, indent=2)

print("Wrote:", json_path)
print("Example:", records[0])

Wrote: /Users/karyncumming/Desktop/Danish Notebooks/danish_cards.json
Example: {'danish': 'om', 'english': 'about'}


In [7]:
# Quick check look for any strange rows (very long, or NaN-ish strings)

df[df["Danish"].str.len() > 60].head(10)

Unnamed: 0,English,Danish


### Adding Topic Tags

I'm using a lightweight, rule-based tagging system based on English keyword patterns to assign beginner-level semantic topics (e.g., food, travel, time).

This approach is fast, transparent, and easy to debug versus manually adding the tags in. Unmatched items are explicitly labeled `unknown` for later manual review or LLM-based classification.

In [8]:
import re

In [11]:

TOPIC_RULES = [
    ("greetings", r"\b(hello|hi|goodbye|bye|thanks|thank you|please|sorry|yes|no)\b"),
    ("numbers",   r"\b(one|two|three|four|five|six|seven|eight|nine|ten|eighteen|twenty|hundred|thousand)\b"),
    ("time",      r"\b(today|tomorrow|yesterday|morning|evening|night|week|month|year|hour|minute|second)\b"),
    ("food",      r"\b(bread|water|milk|coffee|tea|beer|wine|cheese|egg|meat|fish|apple|banana|rice|potato|salt|sugar)\b"),
    ("family",    r"\b(mother|father|mom|dad|sister|brother|child|son|daughter|family)\b"),
    ("travel",    r"\b(train|bus|ticket|airport|hotel|passport|city|street|left|right|map)\b"),
    ("home",      r"\b(house|home|room|kitchen|bathroom|bed|door|window|chair|table)\b"),
    ("color",      r"\b(red|green|yellow|orange|black|white|blue)\b"),    
    ("verbs",     r"\b(to)\b"),  # crude: catches "to eat", "to go", etc.
]

def assign_topic(english: str) -> str:
    s = str(english).strip().lower()
    for topic, pattern in TOPIC_RULES:
        if re.search(pattern, s):
            return topic
    return "unknown"

df = df.copy()
df["Topic"] = df["English"].apply(assign_topic)

df["Topic"].value_counts().head(20)

Topic
unknown      464
verbs         99
numbers       16
food          15
family        12
travel        10
home          10
time          10
color          8
greetings      3
Name: count, dtype: int64

In [12]:
df[df["Topic"] == "unknown"].sample(min(30, (df["Topic"]=="unknown").sum()), random_state=1)[["Danish","English","Topic"]]

Unnamed: 0,Danish,English,Topic
289,et job,a job,unknown
304,en lampe,a lamp,unknown
595,op,up,unknown
318,lys,light (vs dark),unknown
86,et kort,a card,unknown
334,et magasin,a magazine,unknown
76,en bolle,a bun,unknown
333,frokost,lunch,unknown
235,(en) gud,God,unknown
508,et smil,a smile,unknown


In [None]:
# OPTIONAL MANUAL OVERRIDES (IF NEEDED)

# OVERRIDES = {
    # "English phrase": "topic"
 #   "bread": "food",
 #   "train ticket": "travel",
}

# df.loc[df["English"].str.lower().isin([k.lower() for k in OVERRIDES.keys()]), "Topic"] = (
 #   df["English"].str.lower().map({k.lower(): v for k, v in OVERRIDES.items()}))

In [13]:
# Export to Anki

out_dir = Path("/Users/karyncumming/Desktop/Danish Notebooks")
anki_tagged_path = out_dir / "anki_import_tagged.csv"

#Anki tags should be space-separated
anki_tagged = pd.DataFrame({
    "Front": df["Danish"],
    "Back":  df["English"],
    "Tags":  df["Topic"].apply(lambda t: f"danish beginner {t}")
})

anki_tagged.to_csv(anki_tagged_path, index=False)
print("Wrote:", anki_tagged_path)
anki_tagged.head(10)

Wrote: /Users/karyncumming/Desktop/Danish Notebooks/anki_import_tagged.csv


Unnamed: 0,Front,Back,Tags
0,om,about,danish beginner unknown
1,tilfældig,accidental,danish beginner unknown
2,en skuespiller,an actor,danish beginner unknown
3,adjektiv,adjective,danish beginner unknown
4,en voksen,an adult,danish beginner unknown
5,eftermiddag,afternoon,danish beginner unknown
6,alder,age,danish beginner unknown
7,luft,air,danish beginner unknown
8,et fly,an airplane,danish beginner unknown
9,en lufthavn,an airport,danish beginner travel
