In [1]:
import pandas as pd
import osmnx as ox # OSMnx is a Python package to get access to geospatial features from OpenStreetMap. (Boeing, G. 2024)
import geopandas as gpd
import numpy as np

from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import shap

In [3]:
from ohsome import OhsomeClient


In [3]:
# final_gdf.to_csv("data/poi2015.csv", index=False)

In [8]:
gdf = gpd.read_file("london_lsoa21_combined.shp")
gdf.crs

<Projected CRS: EPSG:27700>
Name: OSGB36 / British National Grid
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: United Kingdom (UK) - offshore to boundary of UKCS within 49°45'N to 61°N and 9°W to 2°E; onshore Great Britain (England, Wales and Scotland). Isle of Man onshore.
- bounds: (-9.01, 49.75, 2.01, 61.01)
Coordinate Operation:
- name: British National Grid
- method: Transverse Mercator
Datum: Ordnance Survey of Great Britain 1936
- Ellipsoid: Airy 1830
- Prime Meridian: Greenwich

In [9]:
# 转换为 WGS84（EPSG:4326）
gdf = gdf.to_crs(epsg=4326)

In [12]:
print(gdf.columns)

Index(['lsoa21cd', 'lsoa21nm', 'msoa21cd', 'msoa21nm', 'lad22cd', 'lad22nm',
       'borough', 'geometry'],
      dtype='object')


In [14]:
# 2. 初始化 Ohsome 客户端
client = OhsomeClient()

# 3. 设置查询时间为单个时间点（快照）
time_param = "2020-06-01"

# 4. 查询三种 POI 类型（获取点要素）
queries = {
    "shops": "type:node and shop=*",
    "metro": "type:node and railway=station and station=subway",
    "bus_stops": "type:node and highway=bus_stop"
}

# 获取整个伦敦区域的边界

london_geom = gdf.unary_union

# 创建一个空的GeoDataFrame用于存储所有POI点
all_poi_gdf = gpd.GeoDataFrame()

# 遍历每种POI类型
for name, filter_query in queries.items():
    print(f"查询 {name} POIs...")
    
    try:
        # 使用elements.geometry.post方法获取点要素
        response = client.elements.geometry.post(
            bpolys=london_geom,
            time=time_param,
            filter=filter_query
        )
        
        # 直接转换为GeoDataFrame
        poi_gdf = response.as_dataframe()
        
        # 如果返回为空，则跳过
        if poi_gdf.empty:
            print(f"未找到 {name} POIs")
            continue
        
        # 确保几何列是有效的几何类型
        if 'geometry' not in poi_gdf.columns:
            print(f"警告: {name} 结果中没有几何列")
            continue
        
        # 添加POI类型列
        poi_gdf['poi_type'] = name
        
        # 添加到总的数据集
        all_poi_gdf = pd.concat([all_poi_gdf, poi_gdf], ignore_index=True)
        
        print(f"已获取 {len(poi_gdf)} 个 {name} POI点")
    
    except Exception as e:
        print(f"查询 {name} 时出错: {str(e)}")
        continue

# 5. 空间连接：将POI点分配到LSOA区域
if not all_poi_gdf.empty:
    print(f"总共获取 {len(all_poi_gdf)} 个POI点")
    print("执行空间连接...")
    
    # 确保几何列是有效的Point对象
    if not all_poi_gdf.geometry.is_valid.all():
        print("修复无效几何对象...")
        all_poi_gdf = all_poi_gdf[all_poi_gdf.geometry.is_valid]
    
    # 执行空间连接（点在多边形内）
    joined_gdf = gpd.sjoin(gdf, all_poi_gdf, how='left', predicate='contains')
    
    # 6. 按LSOA和POI类型分组计数
    print("计算每个LSOA的POI数量...")
    
    # 创建交叉表：行是LSOA，列是POI类型，值是计数
    counts = pd.crosstab(joined_gdf['lsoa21cd'], joined_gdf['poi_type'])
    
    # 确保三种POI类型都存在（如果某一类型在某个LSOA中没有，则计数为0）
    for col in queries.keys():
        if col not in counts.columns:
            counts[col] = 0
    
    # 只保留我们关心的三种类型
    counts = counts[list(queries.keys())].reset_index()
    counts.rename(columns={'lsoa21cd': 'lsoa_id'}, inplace=True)
    
    # 7. 合并回原始GeoDataFrame
    final_gdf = gdf.merge(counts, left_on='lsoa21cd', right_on='lsoa_id', how='left')
    
    # 8. 将缺失值替换为0（没有POI的LSOA）
    final_gdf[list(queries.keys())] = final_gdf[list(queries.keys())].fillna(0)
    
    # ✅ 保存结果
    print("保存结果...")
    final_gdf.to_file("london_poi_counts_2020.shp")  # 保存为shapefile
    final_gdf.to_csv("london_poi_counts_2020.csv", index=False)  # 保存为CSV
    
    print("完成!")
    print(final_gdf[['lsoa21cd', 'shops', 'metro', 'bus_stops']].head())
else:
    print("未找到任何POI数据")
    
    # 如果未找到POI，创建空结果
    for name in queries.keys():
        gdf[name] = 0
    
    # 保存空结果
    gdf.to_file("london_poi_counts_2020.shp")
    gdf.to_csv("london_poi_counts_2020.csv", index=False)
    print("已创建包含零值的空结果文件")

  london_geom = gdf.unary_union


查询 shops POIs...
已获取 18297 个 shops POI点
查询 metro POIs...
已获取 250 个 metro POI点
查询 bus_stops POIs...
已获取 19968 个 bus_stops POI点
总共获取 38515 个POI点
执行空间连接...
计算每个LSOA的POI数量...
保存结果...
完成!
    lsoa21cd  shops  metro  bus_stops
0  E01000011    0.0    0.0        1.0
1  E01000046    0.0    0.0        2.0
2  E01000051    0.0    0.0        2.0
3  E01000077    0.0    0.0        4.0
4  E01000083    0.0    0.0        6.0


In [15]:
final_gdf.to_csv("data/poi2020.csv", index=False)

In [6]:
poi_15 = pd.read_csv("data/poi2015.csv")
poi_19 = pd.read_csv("data/poi2019.csv")

In [7]:
poi_15.rename(columns={'shops': 'shop_15'}, inplace=True)
poi_15.rename(columns={'metro': 'metro_15'}, inplace=True)
poi_15.rename(columns={'bus_stops': 'bus_stops_15'}, inplace=True)
poi_19.rename(columns={'shops': 'shop_19'}, inplace=True)
poi_19.rename(columns={'metro': 'metro_19'}, inplace=True)
poi_19.rename(columns={'bus_stops': 'bus_stops_19'}, inplace=True)

In [8]:
print(poi_15.columns)

Index(['LSOA code', 'LSOA11NM', 'imd_score_', 'price_grow', 'high_price',
       'high_imd_i', 'gentri_cat', 'geometry', 'lsoa_id', 'shop_15',
       'metro_15', 'bus_stops_15'],
      dtype='object')


In [9]:
df_poi = poi_15.merge(poi_19, on="LSOA code", how="left")
print(df_poi.columns)

Index(['LSOA code', 'LSOA11NM_x', 'imd_score__x', 'price_grow_x',
       'high_price_x', 'high_imd_i_x', 'gentri_cat_x', 'geometry_x',
       'lsoa_id_x', 'shop_15', 'metro_15', 'bus_stops_15', 'LSOA11NM_y',
       'imd_score__y', 'price_grow_y', 'high_price_y', 'high_imd_i_y',
       'gentri_cat_y', 'geometry_y', 'lsoa_id_y', 'shop_19', 'metro_19',
       'bus_stops_19'],
      dtype='object')


In [10]:
df_poi['shop_increase'] = df_poi['shop_19'] - df_poi['shop_15']
df_poi['metro_increase'] = df_poi['metro_19'] - df_poi['metro_15']
df_poi['bus_increase'] = df_poi['bus_stops_19'] - df_poi['bus_stops_15']
print(df_poi.head(5))

   LSOA code                 LSOA11NM_x  imd_score__x  price_grow_x  \
0  E01000001        City of London 001A     -0.023916     -0.033399   
1  E01000002        City of London 001B      0.177778      0.092194   
2  E01000003        City of London 001C      0.007469      0.107022   
3  E01000005        City of London 001E     -0.101788      0.697026   
4  E01000006  Barking and Dagenham 016A      0.121012      0.261313   

   high_price_x  high_imd_i_x          gentri_cat_x  \
0         False         False        non_gentrified   
1         False          True  imd_improvement_only   
2         False         False        non_gentrified   
3          True         False       high_price_only   
4          True          True        non_gentrified   

                                          geometry_x  lsoa_id_x  shop_15  ...  \
0  POLYGON ((-0.0972886738363828 51.5215770412695...  E01000001      6.0  ...   
1  POLYGON ((-0.0881291513868094 51.5194107187603...  E01000002      6.0  ...   

In [12]:
poi_col = ['LSOA code', 'shop_increase', 'metro_increase', 'bus_increase']
df_poi = df_poi[poi_col]
print(df_poi.head(5))

   LSOA code  shop_increase  metro_increase  bus_increase
0  E01000001            2.0            -1.0           1.0
1  E01000002            0.0             1.0          -6.0
2  E01000003            8.0             0.0           0.0
3  E01000005            6.0             1.0          -1.0
4  E01000006            0.0             0.0           0.0


In [13]:
df_poi.to_csv("data/poi_15_19.csv", index=False)