In [4]:
import os
# Graphics
import matplotlib.pyplot as plt
import seaborn
from pysal.viz import splot
from splot.esda import plot_moran
import contextily
# Analysis
import geopandas as gpd
import pandas as pd
from pysal.explore import esda
from pysal.lib import weights
from numpy.random import seed

# Global variables
BASE_DIR = r"C:\Users\Masat\デスクトップ_Instead\大学研究室用\自分の研究\2020研究(Master)\2020.10to_退蔵&解体シミュレーション\モデル作成用データセット\解体vs残存モデル用\demolish_remain_分析用データセット"
BUILDING_SHAPE = os.path.join(BASE_DIR, 'dataset_demolishModel_2010.shp')
BUILDING_CSV = os.path.join(BASE_DIR, 'dataset_demolishModel_2010_renamed.csv')
TARGET_COL = 'LOST'

In [2]:
df_building_shp = gpd.read_file(BUILDING_SHAPE)
df_building_shp.head(3)

Unnamed: 0,Join_Count,TARGET_FID,TARGET_F_1,LOST,AREA,Floor,AREA_Floor,Year,用途,station_m,...,RC,W,区域区分,区域名,masterplan,ritteki,N03_004,A29_005,傾斜角度_1,geometry
0,1,0,0,1,198.19,1,198.19,1933,住宅,1519.289,...,0,1,市街化区域,,1,0,門司区,第一種住居地域,17.361483,POINT Z (-2462.126 103752.025 0.000)
1,1,1,1,1,426.34,5,2131.7,1977,文教厚生施設（B),188.881,...,1,0,市街化区域,都市機能誘導区域,1,1,門司区,商業地域,0.624959,POINT Z (-5948.437 100462.051 0.000)
2,1,2,2,1,121.02,2,242.04,1985,文教厚生施設（B),359.764,...,0,0,市街化区域,都市機能誘導区域,1,1,八幡東区,商業地域,4.26074,POINT Z (-17887.156 96235.315 0.000)


In [6]:
df_building_shp[TARGET_COL].mean()

0.10898941012985769

In [11]:
df_building_csv = pd.read_csv(BUILDING_CSV, encoding='shift-jis')
df_building_csv.head(3)

Unnamed: 0.1,Unnamed: 0,TARGET_FID,TARGET_F_1,lost,area,floor,total_floor_area,year,use,station_m,...,ritteki_name,masterplan_dammy,ritteki_dammy,word_name,zoning,slope,id_1kmmesh,houseNum_10000m2,houseNum_km2,old_rate
0,0,0,0,1,198.19,1,198.19,1933,住宅,1519.289,...,,1,0,門司区,第一種住居地域,17.361483,50307727,22,841,0.24137
1,1,1,1,1,426.34,5,2131.7,1977,文教厚生施設（B),188.881,...,都市機能誘導区域,1,1,門司区,商業地域,0.624959,50306784,137,949,0.207081
2,2,2,2,1,121.02,2,242.04,1985,文教厚生施設（B),359.764,...,都市機能誘導区域,1,1,八幡東区,商業地域,4.26074,50306644,16,187,0.19242


In [10]:
# Generate W from the GeoDataFrame
spatial_weight_matrix = weights.KNN.from_dataframe(df_building_shp, k=8)
# Row-standardization
spatial_weight_matrix.transform = 'R'

In [11]:
print(spatial_weight_matrix.transform)
spatial_weight_matrix.transform = "O"
print(spatial_weight_matrix.transform)

R
O


In [12]:
seed(1234)
join_count_obj = esda.join_counts.Join_Counts(df_building_shp[TARGET_COL], spatial_weight_matrix)

In [19]:
print('-'*10 + 'join count statistic')
print(f'join count bb(demolished * demolished): {join_count_obj.bb}')
print(f'join count ww(remain * remain): {join_count_obj.ww}')
print(f'join count bw(demolished * remain): {join_count_obj.bw}')
print(f'the num of all join count: {join_count_obj.J}')

print('-'*10 +'expectation with spatial randomness')
print(f'expectation with spatial randomness of bb(demolished * demolished): {join_count_obj.mean_bb}')
print(f'expectation with spatial randomness of bw(demolished * remain): {join_count_obj.mean_bw}')

print('-'*10 +'difference between actual value and expectation with spatial randomness')
print(f'bb diff : {join_count_obj.bb - join_count_obj.mean_bb}')
print(f'bw diff : {join_count_obj.bw - join_count_obj.mean_bw}')
print('-'*10 + 'Statistical inference. check the Empirical p-value')
print(f'{join_count_obj.p_sim_bb}')
print(f'{join_count_obj.p_sim_bw}')


----------join count statistic
join count bb(demolished * demolished): 24642.5
join count ww(remain * remain): 677628.0
join count bw(demolished * remain): 132489.5
the num of all join count: 834760.0
----------expectation with spatial randomness
expectation with spatial randomness of bb(demolished * demolished): 9918.169669669669
expectation with spatial randomness of bw(demolished * remain): 162123.67567567568
----------difference between actual value and expectation with spatial randomness
bb diff : 14724.330330330331
bw diff : -29634.17567567568
----------Statistical inference. check the Empirical p-value
0.001
1.0


この経験的 p 値の解釈：

- `join_count_obj.p_sim_bb==0.001`に関して
  - Spatial Randomness を持つデータをランダムにサンプリングしてきた場合、1000 回に 1 回の割合でのみ、join count statistic の値が、実測値`join_count_obj.bb`の値を上回った。
  - 1000 回の join count bb の値の平均が`join_count_obj.mean_bb`
- `join_count_obj.p_sim_bw==1.0`に関して
  - Spatial Randomness を持つデータをランダムにサンプリングしてきた場合、1000 回に 0 回の割合でのみ、join count statistic の値が、実測値`join_count_obj.bw`の値を上回った。
  - 1000 回の join count bb の値の平均が`join_count_obj.mean_bb`

=>従って、この join count statistic の統計的検定の結果は、positive な Spatial Autocorrelation の存在を指摘している...!
なぜなら、同じカテゴリ同士のペアの結合が期待値以上に多く(`p_sim_bb`)、異なるカテゴリ同士のペアの結合が期待値よりも著しく少ない(`p_sim_bw`)から。(期待値と実測値の値の差は、シミュレーションで得られた経験的 p 値によって、"有意性=偶然では無いと言えそう"が示されている)
