In [6]:
import geopandas as gpd  # 用于处理空间数据
import pandas as pd  # 用于处理DataFrame和CSV文件
import numpy as np  # 用于数值计算
from libpysal import weights  # 用于创建空间权重矩阵
from esda import Moran
from spreg import ML_Lag  # 用于运行空间滞后模型


# load spatial data
gdf = gpd.read_file("E:\\study\\CASAterm1\\CASA0013_FSDS\\group_research\\SAR data\\greater_london.gpkg")

# load csv
tourism_df = pd.read_csv("E:\\study\\CASAterm1\\CASA0013_FSDS\\group_research\\SAR data\\londonT&NTlist.csv") 
turnover_df = pd.read_csv("E:\\study\\CASAterm1\\CASA0013_FSDS\\group_research\\SAR data\\turnover_final_merge.csv")
listings_df = pd.read_csv("E:\\study\\CASAterm1\\CASA0013_FSDS\\group_research\\SAR data\\listings.csv") 

# Rename gdf's geo code for merging data
gdf = gdf.rename(columns={'geo_code': 'MSOA_CODE'})
turnover_df = turnover_df.rename(columns={'geo_code':'MSOA_CODE'})

#定义与旅游相关的行业关键词
tourism_categories = [ '_retail', '_f&b', '_travel', '_art&ent', '_sp_recrea', '_gambling']

# 筛选含有上述关键词的列
tourism_cols = [col for col in turnover_df.columns if any(cat in col for cat in tourism_categories)]
tourism_cols_numeric = [col for col in tourism_cols if np.issubdtype(turnover_df[col].dtype, np.number)]

# 将所有相关列相加形成一个综合指标
turnover_df['tourism_economic_indicator'] = turnover_df[tourism_cols_numeric].sum(axis=1)

listings_gdf = gpd.GeoDataFrame(
    listings_df,
    geometry=gpd.points_from_xy(listings_df['longitude'], listings_df['latitude']),
    crs="EPSG:4326"
)
listings_gdf = listings_gdf.to_crs(epsg=27700)

# 确保gdf的投影与listings_gdf一致，如有需要：
gdf = gdf.to_crs(epsg=27700)

# 使用空间连接将每个Airbnb房源分配到MSOA区域
listings_with_area = gpd.sjoin(listings_gdf, gdf, how="left",predicate="within")

# 按MSOA_CODE汇总Airbnb房源数
airbnb_supply = listings_with_area.groupby('MSOA_CODE').size().reset_index(name='airbnb_supply')

#-------------------------
# 合并数据
#-------------------------
# Select only the required column from tourism_df along with 'MSOA_CODE'
tourism_df_selected = turnover_df[['MSOA_CODE', 'tourism_economic_indicator']]

# 如果你只需要从 turnover_df 中获取部分列，直接使用选定列进行合并
merged_df = tourism_df_selected.copy()

# 将Airbnb供应信息合并
merged_df = pd.merge(merged_df, airbnb_supply, on='MSOA_CODE', how='left')
merged_df['airbnb_supply'] = merged_df['airbnb_supply'].fillna(0)  # 没有房源的区域记为0

# 将 tourism_df 数据合并
merged_df = pd.merge(merged_df, tourism_df, on='MSOA_CODE', how='left')

# 将数据与 gdf 合并
SARdata = gdf.merge(merged_df, on='MSOA_CODE', how='inner')

# Create interaction term: Airbnb supply * traditional tourism dummy
SARdata['airbnb_interact'] = SARdata['airbnb_supply'] * SARdata['hotspot_binary']

# 计算每个区域的面积（m²）
SARdata['area_m2'] = SARdata.geometry.area

# 将面积转换为公顷 (1 ha = 10,000 m²)
SARdata['area_ha'] = SARdata['area_m2'] / 10000.0

# 现在你可以使用 'area_ha' 计算Airbnb密度，例如每公顷房源数
SARdata['airbnb_density_per_ha'] = SARdata['airbnb_supply'] / SARdata['area_ha']
X_vars = ['airbnb_density_per_ha', 'airbnb_interact']  # 如果需要控制变量，请加入
X = SARdata[X_vars].values
y = SARdata['tourism_economic_indicator'].values.reshape(-1,1)

# -------------------------------------------------------------------------
# Create a spatial weights matrix W
# Using a Queen contiguity for demonstration.
# Transform W to row-standardized form.
# -------------------------------------------------------------------------
W = weights.contiguity.Queen.from_dataframe(SARdata, use_index=True)
W.transform = 'r'

# -------------------------------------------------------------------------
# Check for spatial autocorrelation using Moran's I
# -------------------------------------------------------------------------
mi = Moran(y.flatten(), W)
print("Moran's I:", mi.I)
print("p-value:", mi.p_sim)
W = weights.contiguity.Queen.from_dataframe(SARdata,use_index=True)
W.transform = 'r'

sar_model = ML_Lag(y, X, w=W, name_y='tourism_economic_indicator', 
                   name_x=X_vars, name_w='W', method='full')

print(sar_model.summary)