In [1]:
import sys
sys.path.append('../')
from setting import config_read

In [2]:
from owlready2 import *
import pandas as pd
from elasticsearch import Elasticsearch
from konlpy.tag import Okt





In [3]:
# Read config file
config = config_read('../')

# Load owl file
data_path = config['owl']['path']
onto = get_ontology(data_path).load()

okt = Okt()

In [4]:
# Elasticsearch
server_ip = config['elasticsearch']['ip']
index_name = config['elasticsearch']['name']
es = Elasticsearch(server_ip)

In [5]:
def change_prefix(s):
    s = str(s)
    s_list = s.rsplit('.',1) 

    if data_path[:-3] in s:
        s = s.replace(data_path[:-3],'skmo:')
    else:
        s = s_list[0] + ':' + s_list[-1]
    return s

In [6]:
def tokenize_query(input):
    query_pos = okt.pos(input, norm=True)
    query_terms = [q[0] for q in query_pos if q[1] not in ["Josa", "Punctuation"]]
    return ' '.join(query_terms)

In [7]:
def tokenize(label_list):
    return [tokenize_query(label) for label in label_list]

In [8]:
index_list = []
index_list.extend([(change_prefix(r), "T_c", tokenize(r.label)) for r in onto.classes()])
index_list.extend([(change_prefix(r), "T_op", tokenize(r.label)) for r in onto.object_properties()])
index_list.extend([(change_prefix(r), "T_dp", tokenize(r.label)) for r in onto.data_properties()])

In [9]:
instance_list = []
for r in onto.individuals():
    if r.is_a[0] == Thing:  continue
    tbox = [change_prefix(t) for t in r.is_a]
    instance_list.append((change_prefix(r), "T_i", tokenize(r.label), tbox))

In [10]:
if es.indices.exists(index=index_name):
    es.indices.delete(index=index_name,ignore=[400, 404])
    print('Index has been deleted successfully')

In [11]:
# thing_list = ['언제', '누구', '무엇', '뭐']
# doc = {'URI':'owl:Thing', 'Type':'T_c', 'Annotation Values':thing_list}
doc = {'URI':'owl:Thing', 'Type':'T_c'}
es.index(index=index_name, body=doc)

{'_index': 'skmo',
 '_id': 'rr46k4sBJyIE9Xr096nt',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 0,
 '_primary_term': 1}

In [12]:
for row in index_list:
    doc = {'URI':row[0], 'Type':row[1], 'Annotation Values':row[2]}
    es.index(index=index_name, body=doc)

In [13]:
for row in instance_list:
    doc = {'URI':row[0], 'Type':row[1], 'Annotation Values':row[2], 'Tbox':row[3]}
    es.index(index=index_name, body=doc)