In [2]:
from google import genai
from keys import gemini_API_key
import json
import re

client = genai.Client(api_key=gemini_API_key)
model = "gemini-2.0-flash"


In [3]:
def generate_text(prompt):
    response = client.models.generate_content(
    model=model,
    contents=prompt,
)
    return response.text


In [4]:
def remove_markdown(text):
    # Remove any markdown formatting
    pattern = r'```json|```'
    # Substitute with empty string
    cleaned_text = re.sub(pattern, '', text)
    return cleaned_text

In [5]:
def extract_important_words(text, nr_of_words):
    # returns most important words in text as a list of strings
    prompt = f"Extract the {nr_of_words} most important Dutch words from the following Dutch text. Give it as an JSON array named 'words' Do not add the json markdown formatting, just plain text. Text:\n {text}"
    important_words_string = generate_text(prompt)
    important_words_string = remove_markdown(important_words_string)
    words = json.loads(important_words_string)['words']
    return words



In [None]:
text = "Na ruim 70 jaar hebben Curaçao en Sint-Maarten een nieuw betaalmiddel. De Antilliaanse gulden, sinds 1952 de munt van de twee eilanden, maakt plaats voor de Caribische gulden. Volgens directeur-secretaris Leila Matroos van de Centrale Bank van Curaçao en Sint-Maarten (CBCS) wordt de oude munt vervangen door 'iets van ons'. 'Met trots presenteer ik u de Caribische gulden, onze nieuwe munteenheid. Het is een moment om stil te staan, bij wat we samen hebben bereikt.'"
x = extract_important_words(text=text, nr_of_words=5)

In [6]:
def extract_morphemes(words):
    prompt = f"Identify the morphemes in the following Dutch words and structure the result as a JSON object. The JSON should contain a 'words' list, where each word is represented as an object with three keys: 'word' (containing the word), 'free' (for free morphemes) and 'bound' (for bound morphemes). The 'bound' morphemes should be further categorized into 'prefixes', 'suffixes', and 'other'. DO NOT include markdown JSON formatting syntax like triple backticks in your answer. Only return the JSON structure as plain text. Words:\n{words}"
    morphemes_string = generate_text(prompt)
    morphemes_string = remove_markdown(morphemes_string)
    print(morphemes_string)
    morphemes = json.loads(morphemes_string)['words']
    return morphemes


In [41]:
words = ["vuurwerkverbod", "meerderheid", "kamerleden", "belangrijker", "ondersteuning"]
morphemes = extract_morphemes(words=words)
print(morphemes)


{
  "words": [
    {
      "word": "vuurwerkverbod",
      "free": ["vuur", "werk", "bod"],
      "bound": {
        "prefixes": [],
        "suffixes": [],
        "other": []
      }
    },
    {
      "word": "meerderheid",
      "free": ["meer", "heid"],
      "bound": {
        "prefixes": [],
        "suffixes": [],
        "other": ["der"]
      }
    },
    {
      "word": "kamerleden",
      "free": ["kamer", "leden"],
      "bound": {
        "prefixes": [],
        "suffixes": [],
        "other": []
      }
    },
    {
      "word": "belangrijker",
      "free": ["belang", "rijk"],
      "bound": {
        "prefixes": [],
        "suffixes": ["er"],
        "other": []
      }
    },
    {
      "word": "ondersteuning",
      "free": ["steun"],
      "bound": {
        "prefixes": ["onder"],
        "suffixes": ["ing"],
        "other": []
      }
    }
  ]
}

[{'word': 'vuurwerkverbod', 'free': ['vuur', 'werk', 'bod'], 'bound': {'prefixes': [], 'suffixes': [], 'other': [

In [47]:
x = morphemes[1]
print(x)

{'word': 'meerderheid', 'free': ['meer', 'heid'], 'bound': {'prefixes': [], 'suffixes': [], 'other': ['der']}}


In [7]:
def exercise_identify(dict_word):
    """
    Generates an identify exercise for the given word
    expects a dict object with the following structure:
    {
        "word": "vuurwerkverboden",
        "free": ["vuur", "werk", "verbod"],
        "bound": {
            "prefixes": [],
            "suffixes": ["en"],
            "other": []
    }
    """
    word = dict_word['word']
    free = dict_word['free']
    bound = dict_word['bound']
    prefixes = bound['prefixes']
    suffixes = bound['suffixes']
    other = bound['other']

    exercise_text = f"Identify the free and bound morphemes in the following word: {word}."
    answer_text = f"Free morphemes: {free}. Bound morphemes: {bound}"
    
    # I think the return should probably be a JSON string to communicate with the frontend but for now its a tuple
    return (exercise_text, answer_text)

# x = exercise_identify(x)
# print(f"Question: {x[0]}")
# print(f"Answer: {x[1]}")



In [8]:
def exercise_fill_in_the_blank(dict_word):
    """
    Generates a fill in the blanks exercise for the given word
    expects a dict object with the following structure:
    {
        "word": "vuurwerkverboden",
        ...
    }
    """
    word = dict_word['word']
    prompt = f"Generate a Dutch sentence containing the Dutch word '{word}', but in a changed form. For example, change the tense, make it plural, make it dimminiative, or anything else (but don't add new words to the word). Make sure the sentence is grammatically correct. The sentence should be a complete sentence and not just a fragment. Return your answer formatted as JSON with two keys: sentence (containing the full sentence including the word) and word (containing the modified word)  Do not include any markdown formatting like triple backticks in your answer. Just return the plain text of the sentence."	
    output_json = generate_text(prompt)
    output = remove_markdown(output_json)
    output = json.loads(output)
    sentence = output['sentence']
    modified_word = output['word']
    sentence_blanked = re.sub(modified_word, '_____', sentence)
    exercise_text = f"Fill in the blank in the following sentence: \n ({word}) {sentence_blanked}"
    answer_text = modified_word
    # I think the return should probably be a JSON string to communicate with the frontend but for now its a tuple
    return exercise_text, answer_text
    
    

In [None]:
dict = {'word': "vervuilen"}
y = exercise_fill_in_the_blank(dict_word=dict)
print(f"Question: {y[0]}")
print(f"Answer: {y[1]}")



Question: Fill in the blank in the following sentence: 
 (vervuilen) De _____ rivier stroomde langzaam verder.
Answer: vervuilde


## FULL PIPELINE

In [9]:
text = "Hallo kinderen! Vandaag gaan we een spannende reis maken naar de wonderlijke wereld van bijen. Bijen zijn hele kleine, maar superbelangrijke beestjes voor onze natuur en zelfs voor ons eten!Wat zijn bijen?Bijen zijn insecten die heel goed zijn in bestuiven. Dat betekent dat ze stuifmeel van de ene bloem naar de andere brengen. Zo helpen ze planten om vruchten te maken, zoals appels en kersen. Er zijn heel veel verschillende soorten bijen, maar de meeste wonen samen in een bijenkorf.Hoe leven bijen?In een bijenkorf woont een grote bijenfamilie. Er is een koninginbij, werkbijen, en mannetjesbijen. De koningin is de enige die eitjes legt. De werkbijen doen bijna al het werk: ze verzamelen nectar, maken honing, poetsen de bijenkorf, en zorgen voor de babybijtjes. De mannetjesbijen helpen de koningin met het krijgen van nieuwe bijtjes."
important_words = extract_important_words(text=text, nr_of_words=5)
print(f"Important words: {important_words}")
morphemes = extract_morphemes(words=important_words)
print("Exercises:")
x = exercise_identify(morphemes[0])
y = exercise_fill_in_the_blank(morphemes[1])
print(f"Question: {x[0]}")
print(f"Answer: {x[1]}")
print(f"Question: {y[0]}")
print(f"Answer: {y[1]}")


Important words: ['bijen', 'bloem', 'nectar', 'honing', 'bijenkorf']
{
  "words": [
    {
      "word": "bijen",
      "free": ["bij"],
      "bound": {
        "prefixes": [],
        "suffixes": ["-en"],
        "other": []
      }
    },
    {
      "word": "bloem",
      "free": ["bloem"],
      "bound": {
        "prefixes": [],
        "suffixes": [],
        "other": []
      }
    },
    {
      "word": "nectar",
      "free": ["nectar"],
      "bound": {
        "prefixes": [],
        "suffixes": [],
        "other": []
      }
    },
    {
      "word": "honing",
      "free": ["honing"],
      "bound": {
        "prefixes": [],
        "suffixes": [],
        "other": []
      }
    },
    {
      "word": "bijenkorf",
      "free": ["bij", "korf"],
      "bound": {
        "prefixes": [],
        "suffixes": ["-en"],
        "other": []
      }
    }
  ]
}

Exercises:
Question: Identify the free and bound morphemes in the following word: bijen.
Answer: Free morphemes: ['bij