# Convert KFN_lus.json and KFN_annotations into Web data

1) first delete "*.json" in /web/kolu folder

2) run all functions

3) then copy all files into your web server, especially kolu, frame, and lu folder

In [13]:
import json
import json
import glob
import xml.etree.ElementTree as ET
from lxml import etree

In [14]:
fndir = '../resource/'
web_lu_dir = '../resource/web/kolu/'
web_frame_index_dir = '../resource/web/frame/'
web_FN17_index_dir = '../resource/web/FN17_frame_index/'

In [15]:
def load_data():
    with open(fndir+'KFN_lus.json','r') as f:
        kfn_lus = json.load(f)
    with open(fndir+'KFN_annotations.json','r') as f:
        kfn_annos = json.load(f)
    return kfn_lus, kfn_annos
kfn_lus, kfn_annos = load_data()
print('lus:', len(kfn_lus))

lus: 5663


TODO: gen patterns

In [16]:
def gen_lu_files():
    n_of_lu_file = 0 
    for lu in kfn_lus:
        filename = str(lu['lu_id'])+'.json'
        d = {}
        d['ko_pos'] = lu['pos']
        d['ko_lu'] = lu['lu']
        d['frameID'] = lu['fid']
        d['frameName'] = lu['frameName']
        d['en_lus'] = lu['en_lu']
        d['lu_id'] = lu['lu_id']
        patterns = []
        koannotation_ids = lu['ko_annotation_id']
        annotation = []
        for aid in koannotation_ids:
            for anno in kfn_annos:
                text = anno['text']['ko_text']
                ko_annos = anno['frameAnnotation']['ko_annotations']
                for ko_anno in ko_annos:
                    if aid == ko_anno['ko_annotation_id']:
                        pat = {}
                        pat['valenceText'] = 'annotation id: '+str(aid)
                        exam = ko_anno
                        exam['text'] = text
                        examples = []
                        examples.append(exam)
                        pat['examples'] = examples
                        patterns.append(pat)

        d['patterns'] = patterns
        with open(web_lu_dir+filename,'w') as f:
            json.dump(d,f,indent=4,ensure_ascii=False)
            n_of_lu_file +=1
    print(n_of_lu_file, 'of lu files are generated')
gen_lu_files()

5663 of lu files are generated


In [17]:
def gen_lu_index():
    files = glob.glob('../resource/web/kolu/*.json')
    lu_files = []
    for i in files:
        if 'index.json' in i:
            pass
        else:
            lu_files.append(i)
    indices = []
    for i in lu_files:
        with open(i, 'r') as f:
            data = json.load(f)
        kolu = data['ko_lu']
        indices.append({'lu':kolu, 'pos':data['ko_pos'], 'id':data['lu_id']})
        
    indices = sorted(indices,key=lambda x: x['lu'])
    with open(web_lu_dir+'index.json', 'w') as f:
        json.dump(indices,f,indent=4,ensure_ascii=False)
        print('index.json is written')
    print(len(indices), 'is indexed')

gen_lu_index()

index.json is written
5663 is indexed


In [21]:
def generate_frame_kolu_pair():
    with open('../resource/FN17_frame_id.json','r') as f:
        pairs = json.load(f)
    for pair in pairs:
        pair['ko_lu'] = []
    for pair in pairs:
        for i in kfn_lus:
            is_publish = i['publish']['is_publish']
            if is_publish == True:
                lu = i['lu']
                fid = i['fid']
                if fid == pair['id']:
                    kolus = pair['ko_lu']
                    kolus.append(lu)
                    kolus = list(set(kolus))
                    pair['ko_lu'] = kolus
    with open('../resource/KFN_frame_lu_pair.json','w') as f:
        json.dump(pairs, f, ensure_ascii=False, indent=4)
        print('../resource/KFN_frame_lu_pair.json is written')
    n_of_pair = 0
    for pair in pairs:
        if pair['ko_lu']:
            n_of_pair += 1
        else:
            pass
    print(n_of_pair, 'frames are assigned to Korean LU')
generate_frame_kolu_pair()

../resource/KFN_frame_lu_pair.json is written
687 frames are assigned to Korean LU


In [10]:
def get_lu_id(lu):
    with open('../resource/KFN_lus.json','r') as f:
        d = json.load(f)
    for i in d:
        if lu == i['lu']:
            lu_id = i['lu_id']
            break
    return lu_id

def gen_frame_index_file():
    with open('../resource/KFN_frame_lu_pair.json','r') as f:
        pairs = json.load(f)
    cDate = '10/09/2018 09:00:00 KST Web'
    cBy = 'hahm'
    status = 'Created'
    
    n = 0
    for frame in pairs:
        filename = web_FN17_index_dir+frame['frame']+'.xml'
        tree = etree.parse(filename)
        note = tree.getroot()
        if len(frame['ko_lu']) > 0:
            n += 1
        for j in frame['ko_lu']:
            lu_id = get_lu_id(j)
            lu_id = 'ko.'+str(lu_id)
            lu = j

            lexUnit = etree.Element('lexUnit')
            lexUnit.attrib['name'] = lu
            lexUnit.attrib['ID'] = lu_id
            lexUnit.attrib['cDate'] = cDate
            lexUnit.attrib['cBy'] = cBy
            lexUnit.attrib['status'] = status
            note.append(lexUnit)
        
        target_file = web_frame_index_dir+frame['frame']+'.xml'
        tree.write(target_file, xml_declaration=True,encoding='utf-8',method='xml', pretty_print=True)
    print(n, 'frame index files are revised to add Korean LU')   
gen_frame_index_file()

687 frame index files are revised to add Korean LU


In [12]:
def gen_lu_xml():
    n = 1
    for lu in kfn_lus:
        name = lu['lu']
        frame = lu['frameName']
        lu_id = lu['lu_id']

        filename = '../resource/web/lu/luko.'+str(lu_id)+'.xml'
        i = "http://www.w3.org/2001/XMLSchema-instance"
        root = etree.Element("lexUnit", nsmap={'xsi':i})
        root.attrib['name'] = name
        root.attrib['ID'] = str(lu_id)
        root.attrib['frame'] = frame
        root.attrib[etree.QName(i,'schemaLocation')] = "../schema/lexUnit.xsd"
        root.attrib['xmlns'] = "http://framenet.icsi.berkeley.edu"
        root.addprevious(lxml.etree.PI('xml-stylesheet','type="text/xsl" href="lexUnit.xsl"'))
        doc = etree.ElementTree(root)            
        
        doc.write(filename,pretty_print=True,xml_declaration=True,encoding='utf-8', method='xml')
        n += 1        
    print(n, 'LU XML files are saved to /web/lu/')
gen_lu_xml()

5664 LU XML files are saved to /web/lu/
