In [1]:
import re
import numpy as np
import pandas as pd
from pandas import DataFrame

pd.set_option('display.max_columns', None)  # 显示时不折叠

print("******Start prepare dataset******")
# 店铺仍在开（属性齐全）且评论数不小于5条的遴选出来
df_shop = pd.read_csv("newData/Shop_data_10.csv", encoding="utf-8", low_memory=False)

******Start prepare dataset******


In [4]:
# 构造shop attribute向量
poi_num = df_shop.shape[0]
attribute_embedding_matrix = np.zeros((poi_num, 61), dtype=np.float32)
for shop in df_shop.iterrows():
    avg_price = '{:018b}'.format(int(shop[1]['avg_price']))
    bit_idx = 0
    for num in str(avg_price):
        attribute_embedding_matrix[shop[0]][bit_idx:bit_idx+1] = num
        bit_idx += 1
    
    big_category_dict = {"美食":1, "丽人":2, "休闲娱乐":4, "购物":8, "亲子":16, "景点":32, "运动健身":64, "酒店":128, "教育培训":256, "结婚":512, "生活服务":1024, "宠物":2048, "医疗健康":4096, "家装":8192, "爱车":16384}
    big_category = '{:015b}'.format(int(big_category_dict[shop[1]['big_category']]))
    for num in str(big_category):
        attribute_embedding_matrix[shop[0]][bit_idx:bit_idx+1] = num
        bit_idx += 1
    
    star_dict = {5.0:1, 4.5:2, 4.0:4, 3.5:8, 3.0:16, 2.0:32}
    star = '{:06b}'.format(int(star_dict[shop[1]['star']]))
    for num in str(star):
        attribute_embedding_matrix[shop[0]][bit_idx:bit_idx+1] = num
        bit_idx += 1
    
    if shop[1]["score1"]<5.0 and shop[1]["score2"]<5.0 and shop[1]["score3"]<5.0:
        shop[1]["score1"] = 2 * shop[1]["score1"]
        shop[1]["score2"] = 2 * shop[1]["score2"]
        shop[1]["score3"] = 2 * shop[1]["score3"]
    score1 = round(shop[1]["score1"], 1)
    score2 = round(shop[1]["score2"], 1)
    score3 = round(shop[1]["score3"], 1)
    
    score1 = '{:07b}'.format(int(score1 * 10))
    score2 = '{:07b}'.format(int(score2 * 10))
    score3 = '{:07b}'.format(int(score3 * 10))
    
    for num in str(score1):
        attribute_embedding_matrix[shop[0]][bit_idx:bit_idx+1] = num
        bit_idx += 1
    for num in str(score2):
        attribute_embedding_matrix[shop[0]][bit_idx:bit_idx+1] = num
        bit_idx += 1
    for num in str(score3):
        attribute_embedding_matrix[shop[0]][bit_idx:bit_idx+1] = num
        bit_idx += 1
    attribute_embedding_matrix[shop[0]][bit_idx:bit_idx+1] = 1 if shop[1]['bookable'] else 0

#存储attribute矩阵
np.save('generateData/Attribute_Embedding',attribute_embedding_matrix)

In [12]:
# 本体图两列反转；应遵循左小又大， a is b
# with open('newData/treeModelOfCategory.tsv', "r") as f1:
#     for line in f1.readlines():
#         messageList = line.strip().split("\t")
#         with open("newData/CategoryTree.tsv", "a") as f:
#             f.write(messageList[1] + "\t" + messageList[0] + "\n")

In [23]:
#构造shop type向量
from gensim.models.poincare import PoincareModel, PoincareRelations
from gensim.test.utils import datapath

file_path = datapath('C:/Users/lenovo/myPaper/newData/CategoryTree.tsv')
relations = PoincareRelations(file_path, encoding="gbk")
category_model = PoincareModel(train_data = relations,size=16, alpha=0.03, negative=3)
category_model.train(epochs=50, batch_size=16, print_every=3)
#存储训练好的模型
category_embedding_path = "generateData/Category_Hierarchy_Model"
category_model.save(category_embedding_path)

In [None]:
# 构造空间图，训练
from math import cos, sin, asin, sqrt, pow
import networkx as nx
def rad(d):
    pi = 3.1415926
    return d * pi / 180.0


def get_distance(lat1, lng1, lat2, lng2):
    EARTH_REDIUS = 6378.137
    radLat1 = rad(lat1)
    radLat2 = rad(lat2)
    a = radLat1 - radLat2
    b = rad(lng1) - rad(lng2)
    s = 2 * math.asin(math.sqrt(math.pow(sin(a / 2), 2) + cos(radLat1) * cos(radLat2) * math.pow(sin(b / 2), 2)))
    s = s * EARTH_REDIUS
    return s


def construct_graph(df_shop):
    struct_list = []
    df_copy = df_shop
    for index, row in df_shop.iterrows():
        poi_id_1 = row['shopId']+1         # pad 0
        longitude_1 = row['longitude']
        latitude_1 = row['latitude']

        for index1, row1 in df_copy.iterrows():
            poi_id_2 = row1['shopId']+1   #pad 0
            longitude_2 = row1['longitude']
            latitude_2 = row1['latitude']

            d = get_distance(latitude_1, longitude_1, latitude_2, longitude_2) * 1000
            if poi_id_1 != poi_id_2 and d < 100:
                struct_list.append([poi_id_1, poi_id_2])
    struct_matrix = np.array(struct_list).T
    return struct_matrix

In [None]:
struct_matrix = construct_graph(df_shop)
np.save("newData/Spatual_Graph_200m", struct_matrix)