[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kevin7261/Geographic-Data-Science-with-Python/blob/main/全域空間自相關.ipynb)

# 全域空間自相關

## [1] 上傳資料

In [None]:
# @title <介面> 上傳shp檔案

from google.colab import files
import os

def upload_geojson():
  print("請選擇要上傳的 geojson 檔案。若上傳同名檔案，將會自動覆寫。")
  uploaded = files.upload()
  for uploaded_filename in uploaded.keys():
    if uploaded_filename.endswith('.geojson'):
      # 檢查是否需要處理 Colab 自動重新命名的情況
      if uploaded_filename.endswith(').geojson'):
          # 嘗試找出原始檔名
          original_filename = uploaded_filename.rsplit('(', 1)[0].strip() + '.geojson'
          if os.path.exists(original_filename):
              os.remove(original_filename) # 刪除舊檔案
              print(f"Existing file '{original_filename}' overwritten.")
          # 將上傳的檔案重新命名為原始檔名
          os.rename(uploaded_filename, original_filename)
          print(f"已將 '{uploaded_filename}' 重新命名為 '{original_filename}'")
          return original_filename # 返回原始檔名
      else:
         # 如果沒有被自動重新命名，直接返回上傳的檔名
         print(f"已上傳檔案：{uploaded_filename}")
         return uploaded_filename

  print("沒有上傳有效的 geojson 檔案。")
  return None # 沒有上傳或上傳的不是 geojson

FILE_NAME_GEOJSON = upload_geojson()
print("實際使用的檔名：", FILE_NAME_GEOJSON)

請選擇要上傳的 geojson 檔案。若上傳同名檔案，將會自動覆寫。


In [None]:
# @title <介面> 上傳xlsx檔案

from google.colab import files
import os

def upload_xlsx():
  print("請選擇要上傳的 xlsx 檔案。若上傳同名檔案，將會自動覆寫。")
  uploaded = files.upload()
  for uploaded_filename in uploaded.keys():
    if uploaded_filename.endswith('.xlsx'):
      # 檢查是否需要處理 Colab 自動重新命名的情況
      if uploaded_filename.endswith(').xlsx'):
          # 嘗試找出原始檔名
          original_filename = uploaded_filename.rsplit('(', 1)[0].strip() + '.xlsx'
          if os.path.exists(original_filename):
              os.remove(original_filename) # 刪除舊檔案
              print(f"Existing file '{original_filename}' overwritten.")
          # 將上傳的檔案重新命名為原始檔名
          os.rename(uploaded_filename, original_filename)
          print(f"已將 '{uploaded_filename}' 重新命名為 '{original_filename}'")
          return original_filename # 返回原始檔名
      else:
         # 如果沒有被自動重新命名，直接返回上傳的檔名
         print(f"已上傳檔案：{uploaded_filename}")
         return uploaded_filename

  print("沒有上傳有效的 xlsx 檔案。")
  return None # 沒有上傳或上傳的不是 xlsx

FILE_NAME_XLSX = upload_xlsx()
print("實際使用的檔名：", FILE_NAME_XLSX)

## [2] 初始設定

In [None]:
# @title 專案設定

PROJECT_NAME = "15_台南市區_合併位置"

WORKSHEET_NAME = "15_台南市區_合併位置"
ANALYSIS_FIELD_NAME = "count"
ANALYSIS_FIELD_NAME_LAG = f"{ANALYSIS_FIELD_NAME}_lag"
ANALYSIS_FIELD_NAME_BINARY = f"{ANALYSIS_FIELD_NAME}_binary"

In [None]:
# @title 安裝套件

!pip install -q geopandas gdown
!pip install -q pysal splot contextily
!pip install -q ipywidgets

In [None]:
# @title 下載台北思源黑體

# 下載台北思源黑體，並隱藏輸出
!wget -q -O TaipeiSansTCBeta-Regular.ttf https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download

# 匯入必要的庫
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.font_manager import fontManager

# 新增字體
fontManager.addfont('TaipeiSansTCBeta-Regular.ttf')

# 設定字體
mpl.rc('font', family='Taipei Sans TC Beta')


---

In [None]:
# @title 匯入函式庫

# Graphics
import matplotlib.pyplot as plt
import seaborn
from pysal.viz import splot
from splot.esda import plot_moran
import contextily

# Analysis
import geopandas
import pandas
from pysal.explore import esda
from pysal.lib import weights
from numpy.random import seed

## [3] 資料處理

### 載入資料

In [None]:
# @title 載入xlsx

ref = pandas.read_excel(FILE_NAME_XLSX, sheet_name=WORKSHEET_NAME).set_index("name")

#ref.head()

In [None]:
# @title 載入geojson

lads = geopandas.read_file(
    FILE_NAME_GEOJSON,
).set_index("CODEBASE")
#lads.info()

#print(lads.crs) # EPSG:4979 Geodetic 3D coordinate system

lads.set_crs(epsg=3826, inplace=True, allow_override=True) # EPSG:3826 TWD97

#print(lads.crs)

In [None]:
# @title 用index欄位合併xlsx與shp

db = (
    geopandas.GeoDataFrame(
        lads.join(ref[[ANALYSIS_FIELD_NAME]]), crs=lads.crs
    )
    .to_crs(epsg=3857)[ # EPSG:3857 Spherical Mercator
        [ANALYSIS_FIELD_NAME, "geometry"]
    ]
    #.dropna()
)

#db.info()

# @title 空值填0
db[ANALYSIS_FIELD_NAME] = db[ANALYSIS_FIELD_NAME].fillna(0)

### 繪製資料

In [None]:
# @title <介面> 繪製資料地圖

from ipywidgets import Dropdown, FloatSlider, Button, Output, VBox
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import matplotlib
import contextily

all_colormaps = sorted(matplotlib.colormaps())

available_schemes = [
    "equalinterval", "quantiles", "fisherjenks",
    "naturalbreaks", "headtailbreaks", "maxp", "boxplot"
]

scheme_widget = Dropdown(
    options=available_schemes,
    value="quantiles",
    description="分級方法：",
    style={'description_width': 'initial'}
)

cmap_widget = Dropdown(
    options=all_colormaps,
    value="viridis",
    description="色帶：",
    style={'description_width': 'initial'}
)

alpha_slider = FloatSlider(
    value=0.75,
    min=0.0,
    max=1.0,
    step=0.05,
    description='透明度：',
    readout_format='.2f',
    style={'description_width': 'initial'}
)

execute_button = Button(
    description='確定',
    button_style='primary',
    icon='paint-brush'
)

output_widget = Output()

display(VBox([
    scheme_widget,
    cmap_widget,
    alpha_slider,
    execute_button,
    output_widget
]))

def on_button_clicked(b):
    selected_scheme = scheme_widget.value
    selected_cmap = cmap_widget.value
    selected_alpha = alpha_slider.value

    with output_widget:
        clear_output(wait=True)

        # -----------------------------------------------

        print(f"\n使用的分級方法: {selected_scheme}")
        print(f"使用的色帶: {selected_cmap}")
        print(f"使用的透明度: {selected_alpha}")

        f, ax = plt.subplots(1, figsize=(9, 9))

        db[db[ANALYSIS_FIELD_NAME] == 0].plot(
            color="lightgray",
            edgecolor="white",
            linewidth=0.2,
            alpha=selected_alpha,
            ax=ax
        )

        db[db[ANALYSIS_FIELD_NAME] > 0].plot(
            column=ANALYSIS_FIELD_NAME,
            cmap=selected_cmap,
            scheme=selected_scheme,
            k=5,
            edgecolor="white",
            linewidth=0.2,
            alpha=selected_alpha,
            legend=True,
            legend_kwds={"loc": "upper left"},
            ax=ax
        )

        contextily.add_basemap(
            ax,
            crs=db.crs,
            source=contextily.providers.CartoDB.PositronNoLabels,
            zoom=13
        )

        ax.set_axis_off()
        ax.set_title(f"{ANALYSIS_FIELD_NAME}")

        # -----------------------------------------------

        plt.tight_layout()
        plt.show()

execute_button.on_click(on_button_clicked)
on_button_clicked(None)


## [4] 開始分析

In [None]:
# @title <介面> 設定最近鄰居計算數

from ipywidgets import IntText, Button, Output, VBox
from IPython.display import display, clear_output
from libpysal import weights

# ✅ 整數輸入框
knn_input = IntText(
    value=8,
    description='最近鄰居數：',
    style={'description_width': 'initial'}
)

# ✅ 執行按鈕
execute_button = Button(
    description='確定',
    button_style='primary',
    icon='sitemap'
)

# ✅ 輸出區域
output_widget = Output()

# ✅ 顯示介面
display(VBox([
    knn_input,
    execute_button,
    output_widget
]))

# ✅ 點擊事件
def on_button_clicked(b):
    selected_k = knn_input.value

    with output_widget:
        clear_output(wait=True)
        print(f"\n使用的最近鄰居數：{selected_k}")

        # -----------------------------------------------

        # ✅ 建立 KNN 空間權重矩陣
        global w

        # Generate W from the GeoDataFrame
        w = weights.KNN.from_dataframe(db, k=selected_k) # 使用8個最近鄰居
        # 從 GeoDataFrame db 中的每個地理單位（通常是區塊、多邊形）計算其8個最近鄰居（k=8）
        # 這裡的「最近鄰居」是根據幾何中心距離來計算的
        # 回傳一個空間權重物件 w，表示每個區塊與其最近的 8 個區塊之間的鄰接關係

        # Row-standardization
        w.transform = "R" # ✅ 將空間權重矩陣 w 標準化為「行標準化（Row-standardized）」
        # 將權重矩陣 w 做 row-standardization（行標準化）
        # 也就是把每一列的權重加總為 1（每個區塊的鄰居權重會被均分）
        # 常見於 Moran’s I 或 LISA 分析中，避免不同鄰居數造成不公平影響

        # -----------------------------------------------

        print(f"✅ 權重矩陣建立成功，共 {len(w.neighbors)} 筆，每筆 {selected_k} 鄰居")

# ✅ 綁定事件
execute_button.on_click(on_button_clicked)

# ✅ 預設執行一次
on_button_clicked(None)


### Spatial lag

In [None]:
# @title 計算Spatial Lag

db[ANALYSIS_FIELD_NAME_LAG] = weights.spatial_lag.lag_spatial(
    w, db[ANALYSIS_FIELD_NAME]
)

#db.loc[["A6737-0210-00", "A6733-0731-00"], [ANALYSIS_FIELD_NAME, ANALYSIS_FIELD_NAME_LAG]]

In [None]:
# @title <介面> 繪製原始值與Spatial Lag值

from ipywidgets import Dropdown, FloatSlider, Button, Output, VBox
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import matplotlib
import contextily
from libpysal import weights, examples
from libpysal.weights import spatial_lag

# ✅ 所有 colormap 與分級方式
all_colormaps = sorted(matplotlib.colormaps())

available_schemes = [
    "equalinterval", "quantiles", "fisherjenks",
    "naturalbreaks", "headtailbreaks", "maxp", "boxplot"
]

# ✅ 互動元件
scheme_widget = Dropdown(
    options=available_schemes,
    value="quantiles",
    description="分級方法：",
    style={'description_width': 'initial'}
)

cmap_widget = Dropdown(
    options=all_colormaps,
    value="viridis",
    description="色帶：",
    style={'description_width': 'initial'}
)

alpha_slider = FloatSlider(
    value=0.75,
    min=0.0,
    max=1.0,
    step=0.05,
    description='透明度：',
    readout_format='.2f',
    style={'description_width': 'initial'}
)

execute_button = Button(
    description='確定',
    button_style='primary',
    icon='paint-brush'
)

output_widget = Output()

# ✅ 顯示元件
display(VBox([
    scheme_widget,
    cmap_widget,
    alpha_slider,
    execute_button,
    output_widget
]))

# ✅ 點擊後繪圖
def on_button_clicked(b):
    selected_scheme = scheme_widget.value
    selected_cmap = cmap_widget.value
    selected_alpha = alpha_slider.value

    with output_widget:
        clear_output(wait=True)

        # -----------------------------------------------

        print(f"\n使用的分級方法: {selected_scheme}")
        print(f"使用的色帶: {selected_cmap}")
        print(f"使用的透明度: {selected_alpha}")

        # ✅ 若未計算空間滯後值，則執行一次
        if "count_lag" not in db.columns:
            global w
            w = weights.KNN.from_dataframe(db, k=8)
            w.transform = "R"
            db["count_lag"] = spatial_lag.lag_spatial(w, db[ANALYSIS_FIELD_NAME])

        # ✅ 建立圖框
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 9))

        # ✅ 原始值
        db.plot(
            column=ANALYSIS_FIELD_NAME,
            cmap=selected_cmap,
            scheme=selected_scheme,
            k=5,
            edgecolor="white",
            linewidth=0.2,
            alpha=selected_alpha,
            legend=True,
            legend_kwds={"loc": "upper left"},
            ax=ax1,
        )
        contextily.add_basemap(
            ax1,
            crs=db.crs,
            source=contextily.providers.CartoDB.PositronNoLabels,
            zoom=13
        )
        ax1.set_axis_off()
        ax1.set_title(f"{ANALYSIS_FIELD_NAME}")

        # ✅ 空間滯後值
        db.plot(
            column="count_lag",
            cmap=selected_cmap,
            scheme=selected_scheme,
            k=5,
            edgecolor="white",
            linewidth=0.2,
            alpha=selected_alpha,
            legend=True,
            legend_kwds={"loc": "upper left"},
            ax=ax2,
        )
        contextily.add_basemap(
            ax2,
            crs=db.crs,
            source=contextily.providers.CartoDB.PositronNoLabels,
            zoom=13
        )
        ax2.set_axis_off()
        ax2.set_title(f"{ANALYSIS_FIELD_NAME} - Spatial Lag (k={w.k})")

        # -----------------------------------------------

        plt.tight_layout()
        plt.show()

# ✅ 綁定事件
execute_button.on_click(on_button_clicked)

# ✅ 預設執行
on_button_clicked(None)


### Binary case

In [None]:
# @title <介面> 輸入二元分隔值

from ipywidgets import IntText, Button, Output, VBox # 引入 VBox
from IPython.display import display, clear_output

count_range = (db[ANALYSIS_FIELD_NAME].min(), db[ANALYSIS_FIELD_NAME].max())
# 移除 print(f"{ANALYSIS_FIELD_NAME}範圍：{int(count_range[0])} - {int(count_range[1])}")

# 計算中間值作為預設值
default_binary_value = int((count_range[0] + count_range[1]) / 2)

# 創建一個帶有預設值的 IntText 輸入框
binary_value_widget = IntText(
    value=default_binary_value,  # 設定預設值
    description='分隔值：', # 只保留分隔值描述
    disabled=False,
    style={'description_width': 'initial'} # 讓 description 完整顯示
)

# 創建一個 HTML widget 顯示範圍資訊
from ipywidgets import HTML
range_html = HTML(value=f"({int(count_range[0])} - {int(count_range[1])})")

# 創建一個按鈕
execute_button = Button(
    description='確定',
    disabled=False,
    button_style='primary', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='點擊以根據分隔值進行二元化並繪圖',
    icon='check' # (FontAwesome icons available: https://fontawesome.com/icons?d=gallery&c=all&v=4.7.0)
)

# 創建一個 Output widget 用來顯示結果
output_widget = Output()

# 使用 HBox 將輸入框和範圍資訊放在同一行
input_and_range = HBox([binary_value_widget, range_html])

# 使用 VBox 將 HBox、按鈕和輸出區域垂直排列
input_area = VBox([input_and_range, execute_button])

# 顯示 widget
display(input_area, output_widget)

# 定義一個函數，當按鈕點擊時執行
def on_button_clicked(b):
    global binary_value # 宣告使用全域變數
    binary_value = binary_value_widget.value # 從輸入框獲取分隔值

    with output_widget:
        clear_output(wait=True) # 清除之前的輸出
        # 這裡放你想要重新執行的程式碼

        # ----------------------------------------

        db[ANALYSIS_FIELD_NAME_BINARY] = (db[ANALYSIS_FIELD_NAME] > binary_value).astype(int)
        # db[[ANALYSIS_FIELD_NAME, "count_binary"]].tail()

        # ----------------------------------------

        print(f"\n使用的分隔值: {binary_value}")

        f, ax = plt.subplots(1, figsize=(9, 9))
        db.plot(
            ax=ax,
            column=ANALYSIS_FIELD_NAME_BINARY,
            categorical=True,
            legend=True,
            edgecolor="0.5",
            linewidth=0.25,
            cmap="Set3",
            figsize=(9, 9),
        )
        ax.set_axis_off()
        ax.set_title(f"{ANALYSIS_FIELD_NAME} Binary (Threshold: {binary_value})")
        plt.axis("equal")
        plt.show()

        # ----------------------------------------

# 將 on_button_clicked 函數連結到按鈕的 on_click 事件
execute_button.on_click(on_button_clicked)

# (可選) 在 notebook 加載時，先執行一次以顯示預設值下的圖
on_button_clicked(None) # 如果需要預設顯示，取消這行的註解

In [None]:
w.transform

In [None]:
w.transform = "O" # 這是設定 PySAL 的空間權重矩陣 w 的權重轉換方式為 "O"，也就是：✅ 不做任何標準化處理，保留原始的權重值。

In [None]:
w.transform

In [None]:
seed(1234)
jc = esda.join_counts.Join_Counts(db[ANALYSIS_FIELD_NAME_BINARY], w)


In [None]:
jc.bb # GG

In [None]:
jc.ww # YY

In [None]:
jc.bw # GY

In [None]:
jc.bb + jc.ww + jc.bw

In [None]:
jc.mean_bb # GG

In [None]:
jc.mean_bw # GY

In [None]:
jc.p_sim_bb

In [None]:
jc.p_sim_bw

### Moran Plot and Moran’s I

In [None]:
db["count_std"] = db[ANALYSIS_FIELD_NAME] - db[ANALYSIS_FIELD_NAME].mean() # 標準差
db["count_lag_std"] = weights.lag_spatial(
    w, db["count_std"]
)

In [None]:
f, ax = plt.subplots(1, figsize=(6, 6))
seaborn.regplot(
    x="count_std",
    y="count_lag_std",
    ci=None,
    data=db,
    line_kws={"color": "r"},
)
ax.axvline(0, c="k", alpha=0.5)
ax.axhline(0, c="k", alpha=0.5)
ax.set_title("Moran Plot - Count")
plt.show()

In [None]:
w.transform = "R" # ✅ 將空間權重矩陣 w 標準化為「行標準化（Row-standardized）」
moran = esda.moran.Moran(db[ANALYSIS_FIELD_NAME], w)

In [None]:
moran.I

In [None]:
moran.p_sim # ✅ 結果具有統計顯著性（空間自相關成立）

In [None]:
plot_moran(moran);

### Other global indices

#### Geary’s C

In [None]:
geary = esda.geary.Geary(db[ANALYSIS_FIELD_NAME], w)

In [None]:
geary.C # 正向空間自相關（鄰近值相似）

In [None]:
geary.p_sim

### Getis and Ord’s G

In [None]:
# 某個地點周圍是否形成高值或低值的集群。
db_osgb = db.to_crs(epsg=3826) # epsg=3826 TWD97 / epsg=27700 British National Grid
pts = db_osgb.centroid
xys = pandas.DataFrame({"X": pts.x, "Y": pts.y})
min_thr = weights.util.min_threshold_distance(xys)
min_thr # 找出「使得每個點至少有一個鄰居」所需的最小距離 d」

In [None]:
w_db = weights.DistanceBand.from_dataframe(db_osgb, min_thr)

In [None]:
gao = esda.getisord.G(db[ANALYSIS_FIELD_NAME], w_db)

In [None]:
print(
    "Getis & Ord G: %.3f | Pseudo P-value: %.3f" % (gao.G, gao.p_sim)
)