In [14]:
DISTRICT_ENTITIES = ["quận 1", "quận 2", "quận 3", "quận 4",
                     "quận 5", "quận 6", "quận 7", "quận 8", "quận 9", "quận 10"]
WARD_ENTITIES = ["phường 1", "phường 2", "phường 3", "phường 4",
                 "phường 5", "phường 6", "phường 7", "phường 8", "phường 9", "phường 10"]
STREET_ENTITIES = ["Lê Lai", "Lê Lợi", "Lê Duẩn"]
HOUSE_ENTITIES = ["cấp 1", "cấp 2", "cấp 3", "cấp 4"]
HOUSE_QUERY_ENTITIES = ["nhà"]


In [2]:
class BaseValue:
    def __init__(self):
        self.type = str
        self._value = None

    @property
    def is_filled(self):
        return self._value is not None

    @classmethod
    def class_name(cls):
        return cls.__name__

    def __repr__(self):
        return f"{self._value}: {self.type.__name__}"

    @property
    def value(self):
        return self._value

    @value.setter
    def value(self, _value):
        if _value is not None:
            if isinstance(_value, self.type):
                self._value = _value
            else:
                raise ValueError(
                    f"Value of {self.__class__.__name__} must be instance of {self.type}")


In [3]:
class BaseString(BaseValue):
    def __init__(self):
        super(BaseString, self).__init__()
        self.type = str


In [4]:
class BaseEntity:
    def __repr__(self):
        output = {
            "class": self.__class__.__name__
        }
        for attr in self.attributes:
            output[attr] = self.get_attribute_type(attr)
        return str(output)

    @classmethod
    def class_name(cls):
        return cls.__name__

    @property
    def attributes(self):
        output = self.__dir__()
        return [item for item in output
                if "__" not in item and "_" != item[0]
                and item not in ["attributes", "get_attribute_type", "class_name", "has_attribute", "add_attribute"]]

    def get_attribute_type(self, attr_name):
        if attr_name in self.attributes:
            return self.__getattribute__(attr_name)

    def add_attribute(self, item):
        for attr in self.attributes:
            attr_value = self.get_attribute_type(attr)
            if attr_value.type.__name__ == item.class_name():
                attr_value.value = item
                return True
        return False

    # def add_attribute(self, item):
    #     attr_value = self.get_attribute_type(item.class_name())
    #     attr_value.value = item


In [5]:
class District(BaseEntity):
    def __init__(self, name=None):
        super(District, self).__init__()
        self._name = BaseString()
        self._name.value = name

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, _name):
        self._name.value = _name


In [6]:
class DistrictType(BaseValue):
    def __init__(self):
        super(DistrictType, self).__init__()
        self.type = District


In [7]:
class Ward(BaseEntity):
    def __init__(self, name=None, belongDistrict=None):
        super(Ward, self).__init__()
        self._name = BaseString()
        self._belongDistrict = DistrictType()
        self._name.value = name
        self._belongDistrict.name = belongDistrict

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, _name):
        self._name.value = _name

    @property
    def belongDistrict(self):
        return self._belongDistrict

    @belongDistrict.setter
    def belongDistrict(self, _belongDistrict):
        self._belongDistrict = _belongDistrict


In [8]:
class WardType(BaseValue):
    def __init__(self):
        super(WardType, self).__init__()
        self.type = Ward


In [9]:
class Street(BaseEntity):
    def __init__(self, name=None, belongWard=None):
        super(Street, self).__init__()
        self._name = BaseString()
        self._belongWard = WardType()
        self._name.value = name
        self._belongWard.name = belongWard

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, _name):
        self._name.value = _name

    @property
    def belongWard(self):
        return self._belongWard

    @belongWard.setter
    def belongWard(self, _belongWard):
        self._belongWard = _belongWard


In [10]:
class House:
    def __init__(self, name=None):
        super(House, self).__init__()
        self._name = BaseString()
        self._name.value = name

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, _name):
        self._name.value = _name


In [11]:
class NER:
    @staticmethod
    def extract(input_text):
        label = ""
        output = []

        for ent in DISTRICT_ENTITIES:
            if ent in input_text:
                output.append(District(name=ent))
            else:
                label = "District"

        for ent in WARD_ENTITIES:
            if ent in input_text:
                output.append(Ward(name=ent))
            else:
                label = "Ward"

        for ent in STREET_ENTITIES:
            if ent in input_text:
                output.append(Street(name=ent))
            else:
                label = "Street"

        for ent in HOUSE_ENTITIES:
            if ent in input_text:
                output.append(House(name=ent))
            else:
                label = "House"

        return label, output


In [15]:
text = "Mua nhà ở phường 1 quận 1"
[matchLabel, whereLabel] = NER.extract(text)
print(matchLabel)
print(whereLabel)


House
[{'class': 'District', 'name': quận 1: str}, {'class': 'Ward', 'name': phường 1: str, 'belongDistrict': None: District}]


In [14]:
# class RelationExtraction:
#     @staticmethod
#     def mapping(input_entities: [BaseEntity]):
#         for src_entity in input_entities:
#             for dst_entity in input_entities:
#                 success = src_entity.add_attribute(dst_entity)
#                 print(f"ORIGINAL ENTITY: {src_entity}")
#                 if success:
#                     print(f"MAPPED ENTITY: {src_entity}")
#                 print("++++++++++++++++++++++++++++++++++++++++")


In [15]:
# RelationExtraction.mapping(entities)


In [113]:
import pandas as pd
from neo4j import GraphDatabase
import json

In [16]:
driver = GraphDatabase.driver('bolt://localhost:7687', auth=('neo4j', 'password'))

In [75]:
def run_query(query):
    # print("---query--")
    # print(query)
    with driver.session(database="htdb") as session:
        result = session.run(query)
        # print("----- result -----")
        # print(result)

        # for r in result:
        #     print(r.values())

        # return result
        return pd.DataFrame([r.values() for r in result], columns=result.keys())


In [22]:
# cqlNodeQuery = "match (n:" + matchLabel + "), (n1:Ward), (n2:District) where n1.name = '" + \
#     whereLabel[1].name.value + "' and n2.name = '" + \
#     whereLabel[0].name.value+"' return n"

# run_query(cqlNodeQuery)

In [None]:
# cqlNodeQuery = "match (n:House) where n.ward_id = '1' and n.district_id = '1' return n"
# cqlNodeQuery = "match (n:House), (n1:Ward), (n2:District) where n1.name = 'phường 1' and n2.name = 'quận 1' return n"

In [99]:
# get_labels = "MATCH (n) RETURN distinct labels(n)"
get_labels_query = "match(n) return labels(n) as labels, keys(n) as properties, " + \
    "reduce(accumulator = [], key IN keys(n) | accumulator + n[key]) as property_values"

result_data = run_query(get_labels_query)

print(result_data)
# print("-----")
# print(result_data.labels)
# print("-----")
# print(result_data.properties)
# print("-----")
# print(result_data.property_values)
# print("-----")
# print(result_data.values)

              labels        properties  property_values
0     [House_number]  [individual, ID]      [319, 2831]
1     [House_number]  [individual, ID]      [300, 2852]
2     [House_number]  [individual, ID]      [155, 2857]
3     [House_number]  [individual, ID]  [g1 901a, 2876]
4     [House_number]  [individual, ID]       [63, 2884]
...              ...               ...              ...
5149  [House_number]  [individual, ID]      [359, 2781]
5150  [House_number]  [individual, ID]     [835c, 2784]
5151  [House_number]  [individual, ID]      [152, 2795]
5152  [House_number]  [individual, ID]   [752/28, 2801]
5153  [House_number]  [individual, ID]      [608, 2810]

[5154 rows x 3 columns]


In [102]:
for key_name in result_data.keys():
  print(key_name)

labels
properties
property_values


In [104]:
print(result_data.keys()[1])

properties


In [109]:
# for result_data_values in result_data:
#   print(result_data_values['properties'])

In [111]:
HOUSE_DATA = pd.read_csv("data/house_data.csv")
print(HOUSE_DATA['alias'])

0             shophouse
1              chung cu
2                  lien
3             officetel
4         toa vinaconex
             ...       
154                ntcb
155    penthouse duplex
156    lau dai biet phu
157            penhouse
158           villas dt
Name: alias, Length: 159, dtype: object


In [147]:
with open('data/intent_alias_data.json', 'r', encoding="utf-8") as fp:
    alias_file = json.load(fp)
    fp.close()

In [164]:
print(alias_file)

{'Price': ['giá', 'giá mấy tiền', 'giá bao nhiêu', 'giá bao nhiêu tiền', 'giá bn tiền', 'giá khoảng bao nhiêu', 'giá khoảng bao nhiêu tiền', 'giá khoảng bn', 'giá khoảng bn tiền', 'giá thế nào', 'giá như thế nào', 'giá ra sao', 'tầm giá mấy', 'tầm giá mấy tiền', 'tầm giá bao nhiêu', 'tầm giá bao nhiêu tiền', 'tầm giá bn tiền', 'tầm giá bn', 'tầm giá khoảng bao nhiêu', 'tầm giá khoảng bao nhiêu tiền', 'tầm giá khoảng bn', 'tầm giá khoảng bn tiền', 'tầm giá thế nào', 'tầm giá tn', 'tầm giá như thế nào', 'tầm giá ntn', 'tầm giá sao', 'tầm giá ra sao', 'mức giá mấy', 'mức giá mấy tiền', 'mức giá bao nhiêu', 'mức giá bao nhiêu tiền', 'mức giá bn tiền', 'mức giá bn', 'mức giá khoảng bao nhiêu', 'mức giá khoảng bao nhiêu tiền', 'mức giá khoảng bn', 'mức giá khoảng bn tiền', 'mức giá thế nào', 'mức giá tn', 'mức giá như thế nào', 'mức giá ntn', 'mức giá ra sao', 'khoảng giá mấy', 'khoảng giá mấy tiền', 'khoảng giá bao nhiêu', 'khoảng giá bao nhiêu tiền', 'khoảng giá bn tiền', 'khoảng giá bn', 

In [100]:
get_list_labels_query = "MATCH (n) RETURN distinct labels(n)"

list_labels = run_query(get_list_labels_query)
print(list_labels)

        labels(n)
0  [House_number]
1         [House]
2          [City]
3      [District]
4          [Ward]
5        [Street]


In [None]:
"------------------------------"

In [183]:
get_alias_list_query = "match (n) return labels(n) as label, collect(n.alias) as alias"

alias_list_data = run_query(get_alias_list_query)

# print(alias_list_data["label"][1][0])

for label_item in alias_list_data["label"]:
  print(label_item[0])
  if label_item[0] == "House":
    print("y")

a= alias_list_data.loc[alias_list_data['label'] == "[House]"]
# (df.loc[df['A'] == 'foo'])
# print(a)

House_number
House
y
City
District
Ward
Street


In [156]:
get_individual_list_query = "match (n) return labels(n) as label, collect(n.individual) as individual"

individual_list_data = run_query(get_individual_list_query)

print(individual_list_data)

            label                                         individual
0  [House_number]  [319, 300, 155, g1 901a, 63, 297, 268, 217, 13...
1         [House]  [shophouse, chung cu, lien, officetel, toa vin...
2          [City]  [bien hoa, hcm, vinh phuc, binh duong, hn, da ...
3      [District]  [ha dong, 3, 10, phu nhuan, long thanh, binh t...
4          [Ward]  [6, 8, loc an, binh hung hoa a, tan dinh, hiep...
5        [Street]  [bien hoa vung tau, ngo thi nham, tran quoc to...


In [187]:
# print(list_labels[str(list_labels.keys()[0])])
label = ""
output = []

for label_item in list_labels[str(list_labels.keys()[0])]:
    label_name = label_item[0]
    # print(label_name)

    for label_item in alias_list_data["label"]:
        # print(label_item[0])
        if label_item[0] == label_name:
            print(label_item[0])

    # alias_list = alias_file[label_name]
    # print()

    for alias_text in alias_list:
        # print(text)
        # print(type(alias_text), alias_text)
        if alias_text in text:
            # print(text)
            print(label_name)
            # print(alias_text)
        #     output.append(District(name=alias_text))
        # else:
        #     print("no--", alias_text)
        #     label = "District"



House_number
House
City
District
Ward
Street


In [125]:
print(text)

Mua nhà ở phường 1 quận 1


In [143]:
# for ent in DISTRICT_ENTITIES:
        # print(text)
        # print(type(ent), ent)
        # if ent in text:
            # print(ent)
        #     print(label_name)
        #     print(ent)
        #     output.append(District(name=ent))
        # else:
        #     label = "District"

quận 1
