# POI数据点整理

In [3]:
import json

In [None]:
with open("data/poi_data.json", "r") as f:
    data = json.load(f)

In [None]:
def generate_main_tag(poi: dict):
    """从原本的tag中, 提取出主要关键词
    """
    tag: dict = poi['tags']
    if "amenity" in tag.keys():
        return 'amenity', tag['amenity']
    elif "shop" in tag.keys():
        return 'shop', tag['shop']
    elif "tourism" in tag.keys():
        return 'tourism', tag['tourism']
    elif "leisure" in tag.keys():
        return 'leisure', tag['leisure']
    elif "healthcare" in tag.keys():
        return 'healthcare', tag['healthcare']
    elif "public_transport" in tag.keys():
        return 'public_transport', tag['public_transport']
    elif "railway" in tag.keys():
        if "subway" in tag.keys():
            return 'railway', 'subway'
        elif 'train' in tag.keys():
            return 'railway', 'train'
        elif 'subway' in tag['railway']:
            return 'railway', 'subway'
        else:
            return 'railway', tag['railway']
    elif "aeroway" in tag.keys():
        return 'aeroway', tag['aeroway']
    elif "sport" in tag.keys():
        return 'sport', tag['sport']
    elif "education" in tag.keys():
        return 'education', tag['education']
    elif "office" in tag.keys():
        return 'office', tag['office']
    else:
        return 'error', poi['id']

In [None]:
error_count = 0
for i, poi in enumerate(data):
    try:
        result = generate_main_tag(poi)
        if result is None:
            print(f"警告: POI {i} 返回None, ID: {poi.get('id', 'unknown')}")
            poi['first_tag'], poi['second_tag'] = 'unknown', 'unknown'
        else:
            poi['first_tag'], poi['second_tag'] = result
    except Exception as e:
        error_count += 1
        print(f"错误: POI {i} 处理失败: {e}")
        print(f"  标签内容: {poi.get('tags', {})}")
        poi['first_tag'], poi['second_tag'] = 'error', 'processing_failed'

print(f"\n处理完成! 总计 {len(data)} 个POI, {error_count} 个错误")

In [None]:
with open('data/poi_data_more_precise.json', 'w') as f:
    json.dump(data, f, ensure_ascii=False, indent = 4)

## 对总体数据处理

In [None]:
with open("data/house_poi_data.json", "r") as f:
    data = json.load(f)

In [None]:
data = data['houses']

In [None]:
error_count = 0
i = 0
for house in data:
    pois = house['pois']
    for poi in pois:
        try:
            i += 1
            result = generate_main_tag(poi)
            if result is None:
                print(f"警告: POI {i} 返回None, ID: {poi.get('id', 'unknown')}")
                poi['first_tag'], poi['second_tag'] = 'unknown', 'unknown'
            else:
                poi['first_tag'], poi['second_tag'] = result
        except Exception as e:
            error_count += 1
            print(f"错误: POI {i} 处理失败: {e}")
            print(f"  标签内容: {poi.get('tags', {})}")
            poi['first_tag'], poi['second_tag'] = 'error', 'processing_failed'

print(f"\n处理完成! 总计 {i} 个POI, {error_count} 个错误")

In [None]:
with open('data/house_poi_data_more_precise.json', 'w') as f:
    json.dump(data, f, ensure_ascii=False, indent = 4)

## POI-房屋数据整理

In [None]:
# 以poi数据为中心，统计与poi相互关联的房屋数据

In [None]:
with open('data/house_poi_data_more_precise.json', 'r') as f:
    houses = json.load(f)
with open("data/poi_data_more_precise.json", "r") as f:
    pois = json.load(f)

In [None]:
poi_dict = {poi['id']:[] for poi in pois}
for house in houses:
    house_info = house.copy()
    del house_info['pois']
    for poi in house['pois']:
        poi_dict[poi['id']].append(house_info)
for poi in pois:
    id = poi['id']
    poi['nearby_houses'] = poi_dict[id]

In [None]:
# 计算每个poi节点周围房产的平均价格
for poi in pois:
    total_value = 0
    total_houses_count = len(poi['nearby_houses'])
    for house in poi['nearby_houses']:
        total_value += float(house['price_per_meter'])
    poi['total_houses_count'] = total_houses_count
    poi['average_nearby_house_price'] = total_value / total_houses_count

In [None]:
with open("data/poi_house_data_more_precise.json", "w") as f:
    json.dump(pois, f, ensure_ascii=False, indent = 4)

In [None]:
pois.sort(key = lambda x: x['average_nearby_house_price'], reverse=True)

In [14]:
with open("./data/poi_house_data_more_precise.json", "r") as f:
    data = json.load(f)
data[0]

{'id': 'node/1765216410',
 'lat': 39.8842806,
 'lon': 116.4833676,
 'tags': {'bus': 'yes',
  'highway': 'bus_stop',
  'name': '窑洼湖桥北',
  'name:zh': '窑洼湖桥北',
  'public_transport': 'stop_position'},
 'first_tag': 'public_transport',
 'second_tag': 'stop_position',
 'nearby_houses': [{'id': 1,
   'price_per_meter': '26641',
   'lon': '116.480977',
   'lat': '39.886406'},
  {'id': 64,
   'price_per_meter': '26492',
   'lon': '116.487808',
   'lat': '39.889853'},
  {'id': 108,
   'price_per_meter': '27525',
   'lon': '116.48072',
   'lat': '39.884041'},
  {'id': 373,
   'price_per_meter': '23664',
   'lon': '116.481662',
   'lat': '39.878749'},
  {'id': 409,
   'price_per_meter': '23928',
   'lon': '116.487808',
   'lat': '39.889853'},
  {'id': 410,
   'price_per_meter': '21799',
   'lon': '116.481662',
   'lat': '39.878749'},
  {'id': 462,
   'price_per_meter': '24583',
   'lon': '116.481662',
   'lat': '39.878749'},
  {'id': 500,
   'price_per_meter': '34957',
   'lon': '116.483653',
   '

In [15]:
for poi in data:
    del poi['nearby_houses']
with open("./data/poi_data_more_precise.json", "w") as f:
    json.dump(data, f, ensure_ascii=False, indent = 4)