In [1]:
import sys
sys.path.append('../')
from setting import config_read

In [2]:
from elasticsearch import Elasticsearch
import pandas as pd
from path import *
from graph import ConceptualGraphGenerator
from mapping import ResourceMapper





In [3]:
config = config_read('../')

# Elasticsearch
server_ip = config['elasticsearch']['ip']
index_name = config['elasticsearch']['name']
es = Elasticsearch(server_ip)

# Unit Path
unit_path = pd.read_csv('../unit_path.csv')
G = generate_graph(unit_path)

rm = ResourceMapper(config)
cgg = ConceptualGraphGenerator(config)

In [19]:
user_input = '뱅뱅뱅 부른 음악그룹 소속사는?'
# user_input = '용감한 형제가 작곡한 노래의 장르는?'
resource_combinations = rm.process(user_input)
conceptual_graph = cgg.generate_conceptual_graph(resource_combinations)

In [20]:
def get_tbox(resource):
    search_query = {"query":{"term":{"URI.keyword": resource}}}
    result = es.search(index=index_name, body=search_query)
    return result['hits']['hits'][0]['_source']['Tbox']

In [26]:
ca2sp = {}

for cg in conceptual_graph:

    for ca in cg:
        if ca in ca2sp: 
            continue

        ca2sp[ca] = []
        d,p,r = ca

        # Restict search space to Tbox level
        if not G.has_node(d):
            d = get_tbox(d)
        else: d = [d]

        if not G.has_node(r):
            r = get_tbox(r)
        else: r = [r]

        p = [p]

        # Find shortest path
        for u,e,v in itertools.product(d,p,r):
            
            if e == 'Any P': e=None

            print(f'conceptual arc : {ca}')
            print(f'Tbox : {u, e, v}')
            print('-'*80)

            forward_result = find_shortest_path(G, u, v, e, weight=True)
            backward_result = find_shortest_path(G, v, u, e, weight=True)

            if forward_result[0] <= backward_result[0]:
                score, result = forward_result
                abox = ca[0], ca[-1]
            else:
                score, result = backward_result
                abox = ca[-1], ca[0]

            if len(result) == 0: continue

            # 앞이 instance일 경우
            if result[0][0] != abox[0]:
                result[0] = (result[0][0] + '('+abox[0]+')', result[0][1], result[0][2])
            
            # 뒤가 instance일 경우
            if result[-1][-1] != abox[1]:
                result[-1] = (result[-1][0], result[-1][1], result[-1][2] + '('+abox[1]+')')
            
            ca2sp[ca].append((score,result))

conceptual arc : ('skmo:뱅뱅뱅', 'skmo:isSungBy', 'mo:MusicGroup')
Tbox : ('mo:Track', 'skmo:isSungBy', 'mo:MusicGroup')
--------------------------------------------------------------------------------
conceptual arc : ('mo:MusicGroup', 'schema:affiliation', 'owl:Thing')
Tbox : ('mo:MusicGroup', 'schema:affiliation', 'owl:Thing')
--------------------------------------------------------------------------------
conceptual arc : ('skmo:뱅뱅뱅_inst', 'skmo:isSungBy', 'mo:MusicGroup')
Tbox : ('skmo:Instrumental', 'skmo:isSungBy', 'mo:MusicGroup')
--------------------------------------------------------------------------------
conceptual arc : ('skmo:뱅뱅뱅_황치열', 'skmo:isSungBy', 'mo:MusicGroup')
Tbox : ('mo:Track', 'skmo:isSungBy', 'mo:MusicGroup')
--------------------------------------------------------------------------------


In [27]:
# Shortest Path
for k,v in ca2sp.items():
    if len(v) == 0:
        print(f'{k}의 shortest path : ')
        print(': 일치하는 shortest path가 없습니다.\n')
    else:
        print('-'*100)
        print(f'{k}의 shortest path : ')
        for elem in v:
            print()
            for e in elem[1]:
                print(e)
print('-'*100)

----------------------------------------------------------------------------------------------------
('skmo:뱅뱅뱅', 'skmo:isSungBy', 'mo:MusicGroup')의 shortest path : 

('mo:Track(skmo:뱅뱅뱅)', 'skmo:isSungBy', 'mo:MusicGroup')
----------------------------------------------------------------------------------------------------
('mo:MusicGroup', 'schema:affiliation', 'owl:Thing')의 shortest path : 

('mo:MusicGroup', 'schema:affiliation', 'foaf:Organization')
('foaf:Organization', 'rdfs:subClassOf', 'owl:Thing')
----------------------------------------------------------------------------------------------------
('skmo:뱅뱅뱅_inst', 'skmo:isSungBy', 'mo:MusicGroup')의 shortest path : 

('skmo:Instrumental(skmo:뱅뱅뱅_inst)', 'skmo:isSungBy', 'mo:MusicGroup')
----------------------------------------------------------------------------------------------------
('skmo:뱅뱅뱅_황치열', 'skmo:isSungBy', 'mo:MusicGroup')의 shortest path : 

('mo:Track(skmo:뱅뱅뱅_황치열)', 'skmo:isSungBy', 'mo:MusicGroup')
-------------

In [23]:
query_graph = []
for cg in conceptual_graph:
    query_graph_candidates = []
    for ca in cg:
        query_graph_candidates.append(ca2sp[ca])
    for qg in itertools.product(*query_graph_candidates):
        sp_list = []
        query_graph_score = 0
        for arc_score, sp in qg:
            query_graph_score += arc_score
            sp_list.append(sp)
        sp_list = sum(sp_list, list())
        query_graph.append((query_graph_score, sp_list))

for score, qg in query_graph:
    print(qg)

print(f'\n: 총 {len(query_graph)}개의 query graph candidates generated')

[('mo:Track(skmo:뱅뱅뱅)', 'skmo:isSungBy', 'mo:MusicGroup'), ('mo:MusicGroup', 'schema:affiliation', 'foaf:Organization'), ('foaf:Organization', 'rdfs:subClassOf', 'owl:Thing')]
[('skmo:Instrumental(skmo:뱅뱅뱅_inst)', 'skmo:isSungBy', 'mo:MusicGroup'), ('mo:MusicGroup', 'schema:affiliation', 'foaf:Organization'), ('foaf:Organization', 'rdfs:subClassOf', 'owl:Thing')]
[('mo:Track(skmo:뱅뱅뱅_황치열)', 'skmo:isSungBy', 'mo:MusicGroup'), ('mo:MusicGroup', 'schema:affiliation', 'foaf:Organization'), ('foaf:Organization', 'rdfs:subClassOf', 'owl:Thing')]

: 총 3개의 query graph candidates generated


In [24]:
for score, qg in query_graph:
    print(qg)

print(f'\n: 총 {len(query_graph)}개의 query graph candidates generated')

[('mo:Track(skmo:뱅뱅뱅)', 'skmo:isSungBy', 'mo:MusicGroup'), ('mo:MusicGroup', 'schema:affiliation', 'foaf:Organization'), ('foaf:Organization', 'rdfs:subClassOf', 'owl:Thing')]
[('skmo:Instrumental(skmo:뱅뱅뱅_inst)', 'skmo:isSungBy', 'mo:MusicGroup'), ('mo:MusicGroup', 'schema:affiliation', 'foaf:Organization'), ('foaf:Organization', 'rdfs:subClassOf', 'owl:Thing')]
[('mo:Track(skmo:뱅뱅뱅_황치열)', 'skmo:isSungBy', 'mo:MusicGroup'), ('mo:MusicGroup', 'schema:affiliation', 'foaf:Organization'), ('foaf:Organization', 'rdfs:subClassOf', 'owl:Thing')]

: 총 3개의 query graph candidates generated


In [25]:
final_query_graph = sorted(query_graph, key=lambda x: (x[0], len(x[1])))[0][1]
final_query_graph

[('mo:Track(skmo:뱅뱅뱅)', 'skmo:isSungBy', 'mo:MusicGroup'),
 ('mo:MusicGroup', 'schema:affiliation', 'foaf:Organization'),
 ('foaf:Organization', 'rdfs:subClassOf', 'owl:Thing')]