In [38]:
import pandas as pd
from xml.dom import minidom
import xml.etree.ElementTree as ET
import lxml

In [39]:
storyname = 'uigarasaa-nu-zunbun'

story = pd.read_csv('/'.join([storyname, 'words.csv']))
story = story.drop([c for c in story if 'Unnamed' in c], axis = 1).fillna('N/A')

d = pd.read_csv('../dictionary/dictionary.csv').fillna('N/A')

# Columns we want to fill
cols = ['japanese', 'root', 'parse', 'cognate', 'notes']

In [40]:
# Apply function--look up japanese, root, parse, notes and add to story. If there is more than one entry, flag it
for c in cols:
    
    story[c] = story['meera'].apply(lambda m: d.query('meera == @m')[c].tolist() if len(d.query('meera == @m')[c].tolist()) > 0 else '')

In [41]:
# Look for ones where there is more than one element (homophones)
story[story['japanese'].str.len() > 1]

# Look in story and determine which meaning it is

Unnamed: 0,meera,word break,sentence break,japanese,root,parse,cognate,notes
2,ぬ,1.0,,"[〜の, 〜ので]","[N/A, N/A]","[N/A, N/A]","[1, 0]","[N/A, N/A]"
6,ぬ,1.0,,"[〜の, 〜ので]","[N/A, N/A]","[N/A, N/A]","[1, 0]","[N/A, N/A]"
27,ぬ,1.0,,"[〜の, 〜ので]","[N/A, N/A]","[N/A, N/A]","[1, 0]","[N/A, N/A]"
29,ぬ,1.0,,"[〜の, 〜ので]","[N/A, N/A]","[N/A, N/A]","[1, 0]","[N/A, N/A]"
36,ぬ,1.0,,"[〜の, 〜ので]","[N/A, N/A]","[N/A, N/A]","[1, 0]","[N/A, N/A]"
40,ぬ,1.0,,"[〜の, 〜ので]","[N/A, N/A]","[N/A, N/A]","[1, 0]","[N/A, N/A]"
42,うり,,,"[居, それ]","[うる, N/A]","[〜い, N/A]","[1, 1]","[N/A, N/A]"
44,うり,,,"[居, それ]","[うる, N/A]","[〜い, N/A]","[1, 1]","[N/A, N/A]"
46,ふぉい,,,"[食べた, 食べ]","[ふぉー, ふぉー]","[〜い, 〜い]","[1, 1]","[食う, 食う]"
60,ふぉい,,,"[食べた, 食べ]","[ふぉー, ふぉー]","[〜い, 〜い]","[1, 1]","[食う, 食う]"


In [42]:
# List of meanings
meanings = {2: '〜の', 
            6: '〜の', 
            27: '〜の', 
            29: '〜の', 
            36: '〜の', 
            40: '〜の', 
            42: '居',
            44: 'それ',
            46: '食べ', 
            60: '食べ', 
            151: '〜ので',
            152: '食べ', 
            166: '〜の',
            170: '食べた'}

for idx, meaning in meanings.items():
    
    meaning_idx = story.loc[idx, 'japanese'].index(meaning)
    
    # Replace with intended meaning
    for c in cols:
        story.loc[idx, c] = story.loc[idx, c][meaning_idx]

In [43]:
for c in story[cols]:
    story[c] = story[c].apply(lambda x: ''.join(x))

In [44]:
story.to_csv('/'.join(['stories', storyname, 'words_new.csv']), index = False)

In [51]:
# Build root
root = ET.Element('div')
root.set('style', 'font-weight:bold')

# For each word in the story
for idx, row in story.iterrows():
    
    # If no data, no tooltip, but just append to last node
    if row['japanese'] == '': 
        
        last_node = [e for e in root][-1]
        
        #last_node = [e for e in root.iter() if e != root][-1]
        last_node.tail = row['meera']
    
    # Add tooltip div
    else:
        
        ttip = ET.Element('div')
        ttip.set('class', 'tooltip')

        # If it has a Japanese cognate and is not a suffix
        if (row['cognate'] == '1'):
            
            ttip.set('style', 'color:darkorange')
        
        # If it is a suffix
        if row['japanese'][0] == '〜':
            
            ttip.set('style', 'color:green')
        
        # Add Japanese gloss
        ttip.text = row['meera']

        # Append subelement tooltip text span
        ttiptext = ET.SubElement(ttip, 'span')
        ttiptext.set('class', 'tooltiptext')

        # Add span elements for each piece of ttiptext
        gloss = ET.SubElement(ttiptext, 'span')
        gloss.set('class', 'gloss')
        gloss.text = row['japanese']
                
        for info, tag in zip(cols[1:], ['語根', '形態素', '注']):
            
            if row[info] != 'N/A': 
                
                # Add <br>
                last_child = list(ttip.iter())[-1]
                ET.SubElement(last_child, 'br')
                
                info_e = ET.SubElement(ttiptext, 'span')
                info_e.text = tag + '：' + row[info]
                
        # Add space if word break == 1 and period + <br> if sentence break == 1
        if row['word break'] == 1: ttip.tail = ' '
        if row['sentence break'] == 1: 
            
            ttip.tail = '。'
            
        root.append(ttip)
        
        if row['sentence break'] == 1:
        
            root.append(ET.Element('br'))
            root.append(ET.Element('br'))

tree = ET.ElementTree(root)

xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(indent = '\t')

with open('/'.join(['stories', storyname, 'gloss.xml']), 'w') as f:
    f.write(xmlstr)