In [47]:
#convert.py

import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

def get_full_turn_text(element):
    """Extract the raw text of a turn exactly as it appears in the XML file, including all tags"""
    # Get the element's inner text content including children tags
    content = ""
    if element.text:
        content += element.text
    
    for child in element:
        # Convert child element to string with its tags and content
        child_str = ET.tostring(child, encoding="unicode")
        content += child_str
        
        # Add any tail text that appears after the child
        if child.tail:
            content += child.tail
    
    return content

def get_inner_xml(element):
    """Preserve tags and their attributes inside an element."""
    print(element.attrib)
    out = []
    for e in element:
        out.append(ET.tostring(e, encoding="unicode"))
    out_string = ''.join(out)
    return out_string

# Parse XML file
tree = ET.parse("../data/unibo_1.era")
root = tree.getroot()

# Extract <text> metadata
elem = root.find(".//text")
meta = elem.attrib
meta_df = pd.DataFrame([meta])

# Extract annotated turns
turns = []
for turn in elem.findall(".//turn"):
    speaker = turn.attrib.get("who", "student")  # "student" if not chatbot
    turn_type = turn.attrib.get("type", "")
    
    text_og = "".join(turn.itertext()).strip()
    text_an = get_full_turn_text(turn)

    turns.append({
        "speaker": speaker,
        "turn_type": turn_type,
        "text_an": text_an,
        "text_og": text_og,
        # "correct_text": correct_text,
    })

# Create conversation DataFrame
turns_df = pd.DataFrame(turns)
turns_df['text_an'] = turns_df['text_an'].apply(lambda x: x if x else np.nan)
# turns_df = turns_df.dropna()
display(turns_df) 

Unnamed: 0,speaker,turn_type,text_an,text_og
0,Pi.ai,chatbot,"Hey there, great to meet you. I‚Äôm Pi, your personal AI.\n\tMy goal is to be useful, friendly and fun. Ask me for advice, for answers, or let‚Äôs talk about whatever‚Äôs on your mind.\n\tHow's your day going?","Hey there, great to meet you. I‚Äôm Pi, your personal AI.\n\tMy goal is to be useful, friendly and fun. Ask me for advice, for answers, or let‚Äôs talk about whatever‚Äôs on your mind.\n\tHow's your day going?"
1,student,student,Hi,Hi
2,Pi.ai,chatbot,Hey User! üëã How's everything going on your side? Anything exciting happening today?,Hey User! üëã How's everything going on your side? Anything exciting happening today?
3,student,student,"<DMCC corr=""How"">how</DMCC> are you today? are you today?",how are you today?
4,Pi.ai,chatbot,"I‚Äôm doing great! Every day is a fantastic day in the life of an AI. ü§ñ No worries, no stress‚Äîjust endless curiosity and the joy of chatting with awesome people like you!","I‚Äôm doing great! Every day is a fantastic day in the life of an AI. ü§ñ No worries, no stress‚Äîjust endless curiosity and the joy of chatting with awesome people like you!"
5,student,student,"I'm very sad because my <Z corr=""diet"">food routine</Z> is is <FS corr=""gluten-free"">gluten free</FS>",I'm very sad because my food routine is gluten free
6,Pi.ai,chatbot,"Ah, a gluten-free diet can be quite challenging at times. ü•ñüö´ It's completely normal to feel sad about it. But hey, there are lots of delicious gluten-free options out there, and maybe we can explore some together? üçùüåÆüçï","Ah, a gluten-free diet can be quite challenging at times. ü•ñüö´ It's completely normal to feel sad about it. But hey, there are lots of delicious gluten-free options out there, and maybe we can explore some together? üçùüåÆüçï"
7,student,student,Yes,Yes
8,Pi.ai,chatbot,"Awesome! Let's get started then. Are you looking for gluten-free alternatives to specific meals, like breakfast, lunch, or dinner? Or maybe you're craving some tasty snacks? üçΩÔ∏è","Awesome! Let's get started then. Are you looking for gluten-free alternatives to specific meals, like breakfast, lunch, or dinner? Or maybe you're craving some tasty snacks? üçΩÔ∏è"
9,student,student,Lunch,Lunch
