In [None]:
# install the required packages
pip install lxml pillow

Defaulting to user installation because normal site-packages is not writeable
Collecting pillow
  Downloading pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl.metadata (9.0 kB)
Downloading pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl (4.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m6.6 MB/s[0m  [33m0:00:00[0m eta [36m0:00:01[0m
[?25hInstalling collected packages: pillow
Successfully installed pillow-11.3.0
Note: you may need to restart the kernel to use updated packages.


In [None]:

from lxml import etree  # noqa
from PIL import Image

# Add species information to the XMP metadata of the digital image copy.
NSMAP = {
    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    "dwct": "http://rs.tdwg.org/dwc/terms/",
}
additional_keywords = [
    "Senna artemisioides subsp. x petiolaris",
    "Dodonaea viscosa subsp. angustissima",
    "Senna artemisioides subsp. x petiolaris",
    "Casuarina pauper",
    "Myoporum platycarpum"
]
with Image.open("./photos/pp01_19740800_seq1.jpg") as img:
    root = None
    # Get the current data from the image
    for segment, content in img.applist:
        if segment == "APP1":
            marker, xmp_tags = content.split(b"\x00")[:2]
            if marker == b"http://ns.adobe.com/xap/1.0/":
                root = etree.fromstring(xmp_tags)
                break

    if root is None:
        # Setup the root values for xmp
        root_base = (
            b'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="3.1.2-113">\n'
            b'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
            b"</rdf:RDF>\n</x:xmpmeta>"
        )
        root = etree.fromstring(root_base)

    # find the description element, which is the XMP data,
    # this is where custom information is stored
    description_element = root.find(".//rdf:Description", namespaces=NSMAP)

    if description_element is None:
        parent_element = root.find(".//rdf:RDF", namespaces=NSMAP)
        description_element = etree.SubElement(
            parent_element, f'{{{NSMAP["rdf"]}}}Description', nsmap=NSMAP
        )
        parent_element.insert(0, description_element)

    # Check if there is a scientificName element, if not add to the Description.
    # A Bag element with list items
    scientific_element = description_element.find(".//dwct:scientificName", namespaces=NSMAP)
    if not scientific_element:
        scientific_element = etree.SubElement(
            description_element, f'{{{NSMAP["dwct"]}}}scientificName', nsmap=NSMAP
        )
        # Add bag to element
        item_element = etree.SubElement(
            scientific_element, f'{{{NSMAP["rdf"]}}}Bag', nsmap=NSMAP
        )
        scientific_element.insert(0, item_element)
        description_element.insert(0, scientific_element)

    item_element = root.find(".//rdf:Description/dwct:scientificName/rdf:Bag", namespaces=NSMAP)

    registered_values = etree.tostring(item_element).decode("UTF8")
    # ensure unique values in the scientificNames
    added_values = []
    for item in additional_keywords:
        if item in registered_values or item in added_values:
            continue

        added_values.append(item)
        li_element = etree.SubElement(item_element, f'{{{NSMAP["rdf"]}}}li', nsmap=NSMAP)
        li_element.text = item

        item_element.insert(0, li_element)

    # Save the image
    img.save("./photos/updated_image.jpg", xmp=etree.tostring(root))
    
    # The updated XMP metadata in the can be viewed with an (online) metadata viewer.