# Aussage- und Argumentannotationeditor

Editor zur automatischen Transkription der Audiodateien und Erstellung der RDF-/AIF-Annotationen.


### Import Bibliotheken

In [6]:
#!pip install ipywidgets
#!pip install spacy
#!python -m spacy download de_core_news_sm
#!pip install rdflib

import ipywidgets as widgets
from ipywidgets import FileUpload
from IPython.display import display, clear_output, FileLink
import requests
import os
import json
import spacy
from collections import OrderedDict
import datetime

# 1. Transkription

### Hilfsfunktionen

In [8]:
# MP3-Dateien aus dem Ordner "uploads" einlesen
def list_mp3_files(folder="./uploads"):
    return [f for f in os.listdir(folder) if f.endswith(".mp3")]

# MP3-Upload-Funktion 
def upload_mp3(path):
    with open(path, "rb") as f:
        files = {"file": (os.path.basename(path), f, "audio/mpeg")}
        r = requests.post("http://backend:8000/upload_audio/", files=files)
    return r

# Audiodatei vom Backend abrufen
def fetch_audiofiles():
    response = requests.get("http://backend:8000/audiofiles")
    return response.json() if response.status_code == 200 else []

# Transkriptionsfunktion 
def transcribe_from_db(file_id):
    r = requests.post(f"http://backend:8000/transcribe/{file_id}")
    return r

# Transkriptionen vom Backend abrufen
def fetch_transcripts():
    response = requests.get("http://backend:8000/transcripts")
    return response.json() if response.status_code == 200 else []



## Audiodatei hochladen 

- **Laden Sie eine `.mp3`-Datei im Ordner 'uploads' hoch.**
- Nach dem erfolgreichen Upload wird eine Datei-ID zurückgegeben.

In [3]:
### Audiodatei hochladen ###

# --- UI-Komponenten ---
mp3_files = list_mp3_files()
file_selector = widgets.Dropdown(
    options=["MP3-Datei auswählen"] + mp3_files,
    description='Datei:',
    value="MP3-Datei auswählen"
)

upload_button = widgets.Button(
    description="Hochladen",
    button_style='primary',
    icon='upload',
    layout=widgets.Layout(width='20%')
)

output_area = widgets.Output()

uploaded_file_id = None  # Speichert die ID der hochgeladenen Datei

# --- Upload-Button Event ---
def on_upload_clicked(b):
    output_area.clear_output()
    global uploaded_file_id

    selected_file = file_selector.value
    if selected_file == "MP3-Datei auswählen":
        with output_area:
            print("Bitte wählen Sie eine MP3-Datei aus!")
        return

    with output_area:
        output_area.clear_output()
        print(f"Ausgewählte Datei wird hochgeladen: {selected_file}")
        print("Bitte warten Sie...")
        
    path = os.path.join("./uploads", selected_file)
    response = upload_mp3(path)
    
    with output_area:
        output_area.clear_output()
        print("Upload abgeschlossen.")
        print("Status:", response.status_code)
        print("Audio:", response.text)

upload_button.on_click(on_upload_clicked)

# --- Anzeigen ---
display(widgets.VBox([
    file_selector,
    upload_button,
    output_area
]))


VBox(children=(Dropdown(description='Datei:', options=('MP3-Datei auswählen', 'Interview-Scheffler.mp3'), valu…

## Transkription starten
- Wählen Sie ein Audiodatei aus.
- Das Transkript wird automatisch in der Datenbank gespeichert.

In [4]:
### Transkription ###

audiofiles = fetch_audiofiles()

# --- UI-Komponenten ---
dropdown_options = [('Wählen Sie eine Audiodatei', None)] + [(f"{a['file']} (ID: {a['id']})", a['id']) for a in audiofiles]
audio_selector = widgets.Dropdown(
    options=dropdown_options,
    description='Audiofiles:',
    disabled=False,
    value=None
)

transcribe_button = widgets.Button(
    description="Transkription starten",
    button_style='success',
    icon='play',
    disabled=False,
    layout=widgets.Layout(width='20%')
)

output_area = widgets.Output()

# --- Dropdown-Auswahl aktiviert den Button ---
def on_audio_selected(change):
    selected_id = change['new']
    transcribe_button.disabled = selected_id is None

audio_selector.observe(on_audio_selected, names='value')

# --- Transcribe-Button Event ---
def on_transcribe_clicked(b):
    print(datetime.datetime.now(),"on_transcribe_clicked(b)")
    selected_id = audio_selector.value

    if selected_id is None:
        with output_area:
            output_area.clear_output()
            print("Bitte wählen Sie eine Audiodatei aus.")
        return

    with output_area:
        output_area.clear_output()
        print(f"Transkription wird gestartet für Datei-ID: {selected_id}")
        print("Dieser Vorgang kann mehrere Minuten dauern. Bitte warten Sie ...")

    # Lang laufender Prozess (API-Aufruf)
    print(datetime.datetime.now(),"transcribe_from_db(selected_id)")
    response = transcribe_from_db(selected_id)
    print(datetime.datetime.now(),"responce:", response)
    with output_area:
        output_area.clear_output()
        print("Transkription ist abgeschlossen.")
        print("Status:", response.status_code)
        print("Response:", response.text[:100], "...")

transcribe_button.on_click(on_transcribe_clicked)

# --- Anzeigen ---
display(widgets.VBox([
    audio_selector,
    transcribe_button,
    output_area
]))


VBox(children=(Dropdown(description='Audiofiles:', options=(('Wählen Sie eine Audiodatei', None), ('Interview-…

## existierende Transkript anzeigen und auswählen
- Eine Liste aller gespeicherten Transkripte wird angezeigt.
- Wählen Sie ein Transkript aus.

In [9]:
transcripts = fetch_transcripts()

# --- UI-Komponenten ---
dropdown_options = [('Wählen Sie ein Transkript', None)] + [(f"{t['file']} (ID: {t['id']})", t['id']) for t in transcripts]
transcript_selector = widgets.Dropdown(
    options=dropdown_options,
    description='Transkript:',
    disabled=False,
    value=None
)

text_area = widgets.Textarea(
    value='',
    placeholder='Hier wird das Transkript angezeigt',
    description='Text:',
    layout=widgets.Layout(width='80%', height='300px'),
    disabled=True 
)

rdf_button = widgets.Button(
    description='Diesen Text annotieren',
    button_style='success',
    icon='arrow-right',
    layout=widgets.Layout(width='20%')
)

status_label = widgets.Label(value='')


# --- Dropdown-Auswahl Event ---
def on_select_change(change):
    selected_id = change['new']
    selected = next((t for t in transcripts if t['id'] == selected_id), None)

    if selected_id is None:
        text_area.value = ''
        text_area.disabled = True
        status_label.value = ''
    else:
        selected = next((t for t in transcripts if t['id'] == selected_id), None)
        if selected:
            text_area.value = selected['text']
            text_area.disabled = False
            status_label.value = ''

transcript_selector.observe(on_select_change, names='value')

# --- Button: Zur Annotation Event ---
def on_rdf_button_clicked(b):
    global global_sentences
    global global_transcript_id
    global global_filename
    selected_id = transcript_selector.value
    selected = next((t for t in transcripts if t['id'] == selected_id), None)

    if not selected:
        global_sentences = []
        status_label.value = 'Kein Transkript ausgewählt.'
        return

    # Sätze aufteilen und speichern
    nlp = spacy.load("de_core_news_sm")
    text = selected['text']
    doc = nlp(text)

    global_filename = os.path.splitext(os.path.basename(selected['file']))[0]
    global_sentences = [sent.text.strip() for sent in doc.sents]
    global_transcript_id = selected_id
    
    # 文が分割されたことを通知
    status_label.value = f'ID:{global_transcript_id}, File: {global_filename} - {len(global_sentences)} Sätze wurden ausgewählt.'

rdf_button.on_click(on_rdf_button_clicked)

# --- Gesamtes UI anzeigen ---
display(widgets.VBox([
    transcript_selector,
    text_area,
    rdf_button,
    status_label
]))


VBox(children=(Dropdown(description='Transkript:', options=(('Wählen Sie ein Transkript', None), ('Interview-S…

# 2. RDF-Annotation

### Funktionen

In [25]:
def fetch_existing_annotations(transcript_id):
    try:
        r = requests.get(f"http://backend:8000/rdf_annotation/{transcript_id}")
        if r.ok:
            return {ann["rdf_id"]: ann for ann in r.json()}
        else:
            print(f"Fehler: Status {r.status_code}")
            return {}
    except Exception as e:
        print(f"Ausnahme: {e}")
        return {}

def to_camel_case(text: str) -> str:
    words = text.strip().split()
    if not words:
        return ''
    return words[0].lower() + ''.join(word.capitalize() for word in words[1:])
    
def format_dict_for_display(data: dict) -> str:
    key_order = ["rdf_id", "transcript_id", "sentence", "subject", "predicate", "object_", "rdf_timestamp"]
    ordered = OrderedDict((k, data[k]) for k in key_order if k in data)
    return json.dumps(ordered, indent=2, ensure_ascii=False)

# Annotation-UI erzeugen
def create_annotation_ui(sentence, sentence_id):
    satz_label = widgets.HTML(f"<b>Satz {sentence_id}:</b> {sentence}")

    subject = widgets.Text(description="Subjekt")
    predicate = widgets.Text(description="Prädikat")
    object_ = widgets.Text(description="Objekt")
                       
    # 既存アノテーションがあれば埋める
    rdf_id = f"{transcript_id}_{sentence_id}"
    if rdf_id in existing_annotations:
        ann = existing_annotations[rdf_id]
        subject.value = ann.get("subject", "")
        predicate.value = ann.get("predicate", "")
        object_.value = ann.get("object_", "")

    save_btn = widgets.Button(description="💾 Speichern", button_style='success')
    status_label = widgets.HTML(value="")
    output_area = widgets.Output()

    def on_save_clicked(b):
        
        if subject.value == "" or predicate.value == "" or object_.value == ""  :
            with output_area:
                clear_output()
                print("Bitte geben Sie Annotation ein!")
            return
        
        status_label.value = "Speichern..."
        payload = {
            "sentence_id": sentence_id,
            "sentence": sentence,
            "subject": to_camel_case(subject.value),
            "predicate": to_camel_case(predicate.value),
            "object_": to_camel_case(object_.value),
        }

        try:
            r = requests.post(f"http://backend:8000/annotate_rdf/{transcript_id}", json=payload)
            if r.ok:
                status_label.value = "<span style='color:green;'> Annotation wurde gespeichert.</span>"
                with output_area:
                    clear_output()
                    response_data = json.loads(r.text)
                    print(format_dict_for_display(response_data))

            else:
                status_label.value = f"<span style='color:red;'> Fehler: {r.status_code} {r.text}</span>"
                output_area.value = ""
        except Exception as e:
            status_label.value = f"<span style='color:red;'> Ausnahme: {e}</span>"
            output_area.value = ""

    save_btn.on_click(on_save_clicked)

    return widgets.VBox([
        satz_label,
        widgets.HBox([subject, predicate, object_]),
        save_btn,
        status_label,
        output_area
    ])

## RDF-Annotation erzeugen

In [26]:
transcript_id = global_transcript_id
sentences = global_sentences 

existing_annotations = fetch_existing_annotations(transcript_id)

#　UI für alle Sätze erzeugen
annotation_blocks = [
    create_annotation_ui(sent, i+1)
    for i, sent in enumerate(sentences)
]

spacing = widgets.Label(value='')

export_button = widgets.Button(
    description='Speichern & Export RDF/XML',
    button_style='warning',
    icon='download',
    layout=widgets.Layout(width='25%')
)

status_label = widgets.Label(value='')

rdf_output = widgets.Output()

# --- Export Button Event ---
def on_export_clicked(b):
    status_label.value = "RDF-Export läuft..."
    try:
        # 1. APIからRDFを取得
        r = requests.get(f"http://backend:8000/export_rdf/{transcript_id}")
        response = r.json()
        if r.ok:
            rdf_text = response["rdf_xml"]
            with rdf_output: 
                clear_output()
                print(rdf_text)
    
            status_label.value = "RDF/XML wurde erfolgreich generiert. Die RDF/XML-Datei wird im Ordner 'out' abgelegt."

            # 2. ファイルとして保存
            now = datetime.datetime.now().strftime("%Y%m%d_%H%M")
            filename = f"rdf_{global_filename}_{transcript_id}_{now}.xml"
            with open(f"out/{filename}", "w", encoding="utf-8") as f:
                f.write(rdf_text)

        else:
            status_label.value = f"❌ Fehler beim Exportieren: Status {r.status_code}"

    except Exception as e:
        status_label.value = f"❌ Ausnahme: {e}"

export_button.on_click(on_export_clicked)

# Anzeigen
display(widgets.VBox([
    *annotation_blocks,
    spacing,
    export_button,
    status_label,
    rdf_output
]))


VBox(children=(VBox(children=(HTML(value='<b>Satz 1:</b> Interview 16.'), HBox(children=(Text(value='ich', des…

# 3. AIF-Annotation

### Import

## AIF-Annotation erzeugen

In [21]:
transcript_id = global_transcript_id
sentences = global_sentences 

def create_aif_ui(sentence, sentence_id):
    satz_label = widgets.HTML(f"<b>Satz {sentence_id}:</b> {sentence}")

    type_dropdown = widgets.Dropdown(
        options=[("", ""), ("Prämisse", "Premise"), ("Schlussfolgerung", "Conclusion")],
        description="Typ",
        value="",
    )

    supports_dropdown = widgets.Dropdown(
        options=[("", "")] + [(f"Satz {i + 1}", i + 1) for i in range(len(sentences)) if i + 1 != sentence_id],
        description="supports",
        value=""
    )

    output_area = widgets.Output()

    # 役割選択による挙動の切り替え
    def on_type_change(change):
        if change["name"] == "value":
            if change["new"] == "Prämisse":
                supports_dropdown.disabled = False
            elif change["new"] == "Schlussfolgerung":
                supports_dropdown.value = ""
                supports_dropdown.disabled = True

    type_dropdown.observe(on_type_change)

    save_btn = widgets.Button(description="💾 Speichern", button_style='info')
    status_label = widgets.HTML(value="")

    def on_save_clicked(b):
        selected_file = type_dropdown.value
        if selected_file == "":
            with output_area:
                clear_output()
                print("Bitte wählen Sie einen Typ!")
            return

        if selected_file == "Premise" and supports_dropdown.value == "":
            with output_area:
                clear_output()
                print("Bitte wählen Sie einen Satz!")
            return

        status_label.value = "Speichern..."
        payload = {
            "sentence_id": sentence_id,
            "type": type_dropdown.value,
            "supports": supports_dropdown.value if type_dropdown.value == "Premise" else None
        }
        print(payload)
        try:        
            r = requests.post(f"http://backend:8000/annotate_aif/{transcript_id}", json=payload)
            print(r.text)
            if r.ok:
                status_label.value = "<span style='color:green;'> Annotation wurde gespeichert.</span>"
                with output_area:
                    clear_output()
                    response_data = r.text
                    print(response_data)
            else:
                status_label.value = f"<span style='color:red;'> Fehler: {r.status_code} - {r.text}</span>"
        except Exception as e:
            status_label.value = f"<span style='color:red;'> Ausnahme: {e}</span>"

    save_btn.on_click(on_save_clicked)

    return widgets.VBox([
        satz_label,
        widgets.HBox([type_dropdown, supports_dropdown]),
        save_btn,
        status_label,
        output_area

    ])

aif_annotation_blocks = [
    create_aif_ui(sent, i+1)
    for i, sent in enumerate(sentences)
]

spacing = widgets.Label(value='')

export_button = widgets.Button(
    description='Speichern & Export AIF/XML',
    button_style='warning',
    icon='download',
    layout=widgets.Layout(width='25%')
)

status_label = widgets.Label(value='')

aif_output = widgets.Output()

# --- Export Button Event ---
def on_export_clicked(b):
    status_label.value = "AIF-Export läuft..."
    try:
        # 1. APIからRDFを取得
        r = requests.get(f"http://backend:8000/export_aif/{transcript_id}")
        response = r.json()
        if r.ok:
            aif_text = response["aif_xml"]
            with aif_output: 
                clear_output()
                print(aif_text)
    
            status_label.value = "AIF/XML wurde erfolgreich generiert. Die AIF/XML-Datei wird im Ordner 'out' abgelegt."

            # 2. ファイルとして保存
            now = datetime.datetime.now().strftime("%Y%m%d_%H%M")
            filename = f"rdf_{global_filename}_{transcript_id}_{now}.xml"
            with open(f"out/{filename}", "w", encoding="utf-8") as f:
                f.write(aif_text)

        else:
            status_label.value = f"❌ Fehler beim Exportieren: Status {r.status_code} - {r.text}"

    except Exception as e:
        status_label.value = f"❌ Ausnahme: {e}"

export_button.on_click(on_export_clicked)

# Anzeigen
display(widgets.VBox([
    *aif_annotation_blocks,
    spacing,
    export_button,
    status_label,
    aif_output
]))


VBox(children=(VBox(children=(HTML(value='<b>Satz 1:</b> Interview 16.'), HBox(children=(Dropdown(description=…