In [1]:
import json
import prettytable
from neo4j import GraphDatabase
from dotenv import load_dotenv
# from gen_query import handle_gen_query
import util
from get_entities import handle_get_entities
import constants

load_dotenv()




False

In [2]:
# Connect neo4j DB.
driver = GraphDatabase.driver('bolt://localhost:7687',
                              auth=('neo4j', 'password'))


In [146]:
def get_concepts(text):
    """
  It takes a string of text and returns a dictionary of concepts and their aliases

  :param text: The text to be analyzed
  :return: A dictionary of concepts and their aliases.
  """
    with open('data/intent_alias_data.json', encoding="utf8") as f:
        dictionary = json.load(f)

    out = {}
    for concept in dictionary:
        for alias in sorted(dictionary[concept], key=len, reverse=1):
            if alias in text:
                out[concept] = alias
                break
    return out

In [147]:
def recheck_returned_labels(matched_labels, returned_labels, attr):
    ##
    # Remove the label which has a condition from returned_label.
    #
    # Argument:
    #     matched_labels: The labels which were matched.
    #     returned_labels: The labels which were returned.
    #     attr: The attribute that you want to check for.
    # Return:
    #     returned_labels.
    # #

    for match_label in matched_labels:
        if attr in match_label:
            returned_labels.remove(match_label)

    return returned_labels

In [148]:
def condition_common(evidences, matched_labels, returned_labels, conditions):
    ##
    # Condition common: real estate, position, direction, floor....
    #
    # Return:
    #       returned_labels.
    #       conditions.
    # #

    attrs = [constants.LABEL_REAL_ESTATE_TYPE, constants.LABEL_REAL_ESTATE_SUB_TYPE, \
              constants.LABEL_POSITION, constants.LABEL_DIRECTION, \
              constants.LABEL_FRONT_LENGTH, constants.LABEL_ROAD_WIDTH, \
              constants.LABEL_FLOOR, constants.LABEL_BED_ROOM, constants.LABEL_LIVING_ROOM, constants.LABEL_BATH_ROOM, \
              constants.LABEL_SURROUNDING, constants.LABEL_PROJECT_NAME, \
              constants.LABEL_LEGAL, constants.LABEL_TRANSACTION]

    key2dbcol = {
        'tang': constants.LABEL_FLOOR,
        'ban cong': constants.LABEL_FLOOR_BAN_CONG,
        'gac': constants.LABEL_FLOOR_GAC,
        'ham': constants.LABEL_FLOOR_HAM,
        'lung': constants.LABEL_FLOOR_LUNG,
        'san thuong': constants.LABEL_FLOOR_SAN_THUONG,
        'tret': constants.LABEL_FLOOR_TRET
    }

    for attr in attrs:
        if attr in evidences:
            if attr == constants.LABEL_FLOOR:
                for val in evidences[attr]:
                    target_k = key2dbcol[val['type']]
                    target_v = val['value']

                    returned_labels = recheck_returned_labels(
                        matched_labels, returned_labels, target_k)

                    conditions.append(f"{target_k}.individual = '{target_v}'")
            else:
                returned_labels = recheck_returned_labels(
                    matched_labels, returned_labels, attr)

                if attr == constants.LABEL_REAL_ESTATE_TYPE:
                    conditions.append(
                        f"{attr}.individual IN {evidences[attr]}")
                else:
                    conditions.append(
                        f"{attr}.individual = '{evidences[attr][0]}'")

    return [returned_labels, conditions]


In [149]:
def condition_location(evidences, matched_labels, returned_labels, conditions):
    ##
    # Condition for city, district, ward, street.
    #
    # Return:
    #       returned_labels.
    #       conditions.
    # #

    loc_attrs = [
        constants.LABEL_DISTRICT, constants.LABEL_CITY, constants.LABEL_WARD,
        constants.LABEL_STREET
    ]

    for attr in loc_attrs:
        if attr in evidences:
            returned_labels = recheck_returned_labels(matched_labels,
                                                      returned_labels, attr)

            conditions.append(f"{attr}.individual = '{evidences[attr][0]}'")

    return [returned_labels, conditions]


In [150]:
def condition_price_n_area(is_price, evidences, matched_labels,
                           returned_labels, conditions):
    ##
    # Condition for price and area.
    #
    # Return:
    #       returned_labels.
    #       conditions.
    # #

    OFFSET_CONST = 0.1

    condition_item = is_price and constants.LABEL_PRICE or constants.LABEL_AREA

    if condition_item in evidences:
        for ele in evidences[condition_item][:1]:
            low, high = ele

            if high is None:
                high = low + low * OFFSET_CONST
                low = low - low * OFFSET_CONST

            returned_labels = recheck_returned_labels(matched_labels,
                                                      returned_labels,
                                                      condition_item)

            conditions.append(
                f"('{low}' <= {condition_item}.individual OR {condition_item}.individual <= '{high}')"
            )

    return [returned_labels, conditions]


In [151]:
def condition_usage(evidences, matched_labels, returned_labels, conditions):
    ##
    # Condition for usage.
    #
    # Return:
    #       returned_labels.
    #       conditions.
    # #

    if constants.LABEL_USAGE in evidences:
        returned_labels = recheck_returned_labels(matched_labels,
                                                  returned_labels,
                                                  constants.LABEL_USAGE)

        conditions.append("({})".format(" OR ".join([
            f"{constants.LABEL_USAGE}.individual LIKE '%, {x},%' + 'OR {constants.LABEL_USAGE}.individual LIKE '{x}, %' OR {constants.LABEL_USAGE}.individual LIKE '%, {x}'"
            for x in evidences[constants.LABEL_USAGE]
        ])))

    return [returned_labels, conditions]


In [152]:
def handle_gen_query(concepts_keys, evidences, match_relation):
    """
    It takes a list of concepts, a list of evidences, and a list of relations, and returns a query that
    matches the concepts, and filters the results based on the evidences

    :param concepts_keys: ['location', 'price', 'area', 'usage']
    :param evidences: {'location': ['Hà Nội'], 'price': ['1 tỷ'], 'area': ['100 m2']}
    :param match_relation:
    :return: The query returns the following:
    """
    out = """
    MATCH {}
    WHERE {}
    RETURN {}
    LIMIT 50
        """
    conditions = []
    matched_labels = []  # Format: (alias:Node_name)
    returned_labels = []  # Format: (alias.individual)

    # Define matched labels.
    if not concepts_keys:
        matched_labels = ['n']
    else:
        for concept_key in concepts_keys:
            matched_labels.append('(' + concept_key + ':' +
                                  concept_key.title() + ')')

    # Define returned labels.
    returned_labels = matched_labels.copy()

    # ----- Define condition labels. -----
    # Condition common.
    [returned_labels,
     conditions] = condition_common(evidences, matched_labels, returned_labels,
                                    conditions)

    # Condition for location.
    [returned_labels,
     conditions] = condition_location(evidences, matched_labels,
                                      returned_labels, conditions)

    # Condition for price.
    [returned_labels,
     conditions] = condition_price_n_area(True, evidences, matched_labels,
                                          returned_labels, conditions)

    # Condition for area.
    [returned_labels,
     conditions] = condition_price_n_area(False, evidences, matched_labels,
                                          returned_labels, conditions)

    # Condition for usage.
    [returned_labels,
     conditions] = condition_usage(evidences, matched_labels, returned_labels,
                                   conditions)

    # Adjust returned labels
    adjusted_return_labels = []

    for return_label in returned_labels:
        if (constants.LABEL_REAL_ESTATE_TYPE).title() in return_label:
            return_label = return_label.replace(
                (constants.LABEL_REAL_ESTATE_TYPE).title(),
                (constants.LABEL_HOUSE).title())
        elif (constants.LABEL_REAL_ESTATE_SUB_TYPE).title() in return_label:
            return_label = return_label.replace(
                (constants.LABEL_REAL_ESTATE_SUB_TYPE).title(),
                (constants.LABEL_HOUSE).title())

        return_label = f"collect(distinct {return_label.split('(')[1].split(':')[0]}.individual) AS {(return_label.split(':')[1].split(')')[0]).title()}"

        adjusted_return_labels.append(return_label)

    return out.format(
        ',\n'.join(match_relation),
        ' \nAND '.join([f"{x}" for x in conditions]),
        ',\n '.join(adjusted_return_labels),
    )


In [153]:
def gen_query_ontology(text):
    table = prettytable.PrettyTable(["Step", "Result"])
    table.max_width = 80
    table.add_row(["Input", text])
    print(text)

    concepts = get_concepts(text)
    print(concepts)

    match_relation = util.get_match_relation(concepts.keys())

    evidences = handle_get_entities(text)

    if any(constants.LABEL_HOUSE_NUMBER in e for e in evidences.keys()):
        evidences[constants.LABEL_REAL_ESTATE_TYPE] = evidences[
            constants.LABEL_HOUSE_NUMBER]
        del evidences[constants.LABEL_HOUSE_NUMBER]

    if any(constants.LABEL_REAL_ESTATE_SUB_TYPE in e
           for e in evidences.keys()):
        evidences[constants.LABEL_REAL_ESTATE_TYPE] = evidences[
            constants.LABEL_REAL_ESTATE_SUB_TYPE]
        del evidences[constants.LABEL_REAL_ESTATE_SUB_TYPE]
    print(evidences)

    table.add_rows([["Match alias", concepts], ["Find individuals",
                                                evidences]])

    targets = list(set(concepts.keys()).difference(set(evidences.keys())))
    print(targets)
    table.add_row(["Target concepts", targets])

    query = handle_gen_query(concepts.keys(), evidences, match_relation)
    print(query)

    table.add_row(["Query", query])
    print(table)
    return query

In [154]:
def run_query(query):
    with driver.session(database="htdb") as session:
        results = session.run(query)

        table_results = prettytable.PrettyTable(results.keys())
        table_results.max_width = 60
        for r in results:
            table_results.add_row(r.values())

        print(table_results)
        # return table_results

In [162]:
# question = 
# question = 
# question = 
# question = 
# question = 
# cqlNodeQuery = gen_query_ontology(question)
# run_query(cqlNodeQuery)

In [163]:
with open('data/question_dict.json', 'r', encoding="utf-8") as fp:
    question_dict = json.load(fp)
    fp.close()

REAL_ESTATE_TYPE = question_dict["real_estate_type"]
PRICE = question_dict["price"]
AREA = question_dict["area"]
DISTRICT = question_dict["district"]
BED_ROOM = question_dict["bed_room"]
FLOOR = question_dict["floor"]
LEGAL = question_dict["legal"]
POSITION = question_dict["position"]
LOCATION = question_dict["location"]
POTENTIAL = question_dict["potential"]

In [164]:
for question in POSITION:
    cqlNodeQuery = gen_query_ontology(question)
    run_query(cqlNodeQuery)
    print(
        "----------------##########################################----------------"
    )


biệt thự mặt tiền diện tích 567m2, thành phố Đà Lạt, giá bao nhiêu
{'real_estate_type': 'biệt thự', 'price': 'giá bao nhiêu', 'area': 'diện tích', 'city': 'thành phố', 'position': 'mặt tiền'}
{'area': [(567.0, None)], 'city': ['da lat'], 'real_estate_type': ['biet thu']}
['price', 'position']

    MATCH (real_estate_type:House)-->(price:Price),
(real_estate_type:House)-->(area:Area),
(real_estate_type:House)-->(city:City),
(real_estate_type:House)-->(position:Position)
    WHERE real_estate_type.individual IN ['biet thu'] 
AND city.individual = 'da lat' 
AND ('510.3' <= area.individual OR area.individual <= '623.7')
    RETURN collect(distinct price.individual) AS Price,
 collect(distinct position.individual) AS Position
    LIMIT 50
        
+------------------+----------------------------------------------------------------------------------+
|       Step       |                                      Result                                      |
+------------------+-------------------