[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kevin7261/Geographic-Data-Science-with-Python/blob/main/Choropleth_Mapping.ipynb)

# ChoroplethMapping

https://geographicdata.science/book/notebooks/05_choropleth.html

[台北市里界圖](https://data.taipei/dataset/detail?id=6b17b31d-4e16-495e-95b1-9fd1f47c80d8)

In [None]:
# @title 初始值設定

PROJECT_NAME = "臺北市_村里_綜稅綜合所得總額"
GPKG_FILE_PATH = "https://drive.google.com/file/d/1cmW_VxEyyRZ9xILM_7Blq98r7B40SwqA/view?usp=sharing"

In [None]:
# @title 下載台北思源黑體

# 下載台北思源黑體，並隱藏輸出
!wget -q -O TaipeiSansTCBeta-Regular.ttf https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download

# 匯入必要的庫
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.font_manager import fontManager

# 新增字體
fontManager.addfont('TaipeiSansTCBeta-Regular.ttf')

# 設定字體
mpl.rc('font', family='Taipei Sans TC Beta')


In [None]:
# @title 安裝套件

!pip install -q geopandas gdown pysal

---

In [None]:
# @title read_shape_file

import geopandas as gpd
import gdown
import os

SPREADSHEET_ID = GPKG_FILE_PATH.split("/d/")[1].split("/")[0]

def read_shape_file():

    # ✅ 設定下載與儲存路徑
    gpkg_path = f"{PROJECT_NAME}.gpkg"

    # ✅ 下載 .gpkg 檔案（從 Google Drive）
    gdown.download(
        url=f"https://drive.google.com/uc?id={SPREADSHEET_ID}",
        output=gpkg_path,
        quiet=True
    )

    # ✅ 使用 GeoPandas 自動讀取第一層
    gdf = gpd.read_file(gpkg_path)

    # ✅ 顯示 GeoDataFrame 頭部
    print("📋 GeoDataFrame 頭部：")
    print(gdf.head())

    return gdf

# ✅ 執行
gdf = read_shape_file()

# Principles

In [None]:
# @title 匯入函式庫

import seaborn
import pandas
import geopandas
import pysal
import numpy
import matplotlib.pyplot as plt

## 資料集

1. 臺北市_村里_綜稅綜合所得總額 (gpkg)

# Quantitative data classification

In [None]:
mx = geopandas.read_file(f"{PROJECT_NAME}.gpkg")
mx[["FULL", "中位數"]].head()

In [None]:
# Plot histogram
ax = seaborn.histplot(mx["中位數"], bins=5)
# Add rug on horizontal axis
seaborn.rugplot(mx["中位數"], height=0.05, color="red", ax=ax);

In [None]:
mx["中位數"].describe()

In [None]:
counts, bins, patches = ax.hist(mx["中位數"], bins=5)

In [None]:
counts # 數量

In [None]:
bins # 斷點

In [None]:
import mapclassify

## Equal intervals

In [None]:
ei5 = mapclassify.EqualInterval(mx["中位數"], k=5)
ei5

## Quantiles 分位數

In [None]:
q5 = mapclassify.Quantiles(mx.中位數, k=5)
q5

In [None]:
q5.bins[1:] - q5.bins[:-1]

In [None]:
# 隨機產生資料

# Set seed for reproducibility
numpy.random.seed(12345)
# Generate a variable of 20 values randomly
# selected from 0 to 10
x = numpy.random.randint(0, 10, 20)
# Manually ensure the first ten values are 0 (the
# minimum value)
x[0:10] = x.min()
x

In [None]:
ties = mapclassify.Quantiles(x, k=5)
ties

In [None]:
ux = numpy.unique(x)
ux

## Mean-standard deviation 平均標準差

In [None]:
msd = mapclassify.StdMean(mx["中位數"])
msd

## Maximum breaks

In [None]:
mb5 = mapclassify.MaximumBreaks(mx["中位數"], k=5)
mb5

## Boxplot

In [None]:
bp = mapclassify.BoxPlot(mx["中位數"])
bp

In [None]:
bp1 = mapclassify.BoxPlot(mx["中位數"], hinge=1)
bp1

## Head-tail breaks

In [None]:
ht = mapclassify.HeadTailBreaks(mx["中位數"])
ht

## Jenks-Caspall breaks

In [None]:
numpy.random.seed(12345)
jc5 = mapclassify.JenksCaspall(mx["中位數"], k=5)
jc5

## Fisher-Jenks breaks

In [None]:
numpy.random.seed(12345)
fj5 = mapclassify.FisherJenks(mx["中位數"], k=5)
fj5

## Max-p

In [None]:
mp5 = mapclassify.MaxP(mx["中位數"], k=5)
mp5

## Comparing classification schemes

In [None]:
# 計算ADCM (absolute deviation around class medians)，越低越好

# Bunch classifier objects
class5 = q5, ei5, ht, mb5, msd, fj5, jc5, mp5
# Collect ADCM for each classifier
fits = numpy.array([c.adcm for c in class5])
# Convert ADCM scores to a DataFrame
adcms = pandas.DataFrame(fits)
# Add classifier names
adcms["classifier"] = [c.name for c in class5]
# Add column names to the ADCM
adcms.columns = ["ADCM", "Classifier"]
ax = seaborn.barplot(
    y="Classifier", x="ADCM", data=adcms, palette="Pastel1"
)

In [None]:
# Append class values as a separate column
mx["Quantiles"] = q5.yb
mx["Equal Interval"] = ei5.yb
mx["Head-Tail Breaks"] = ht.yb
mx["Maximum Breaks"] = mb5.yb
mx["Mean-Standard Deviation"] = msd.yb
mx["Fisher-Jenks"] = fj5.yb
mx["Jenks Caspall"] = jc5.yb
mx["MaxP"] = mp5.yb

In [None]:
#f, ax = plt.subplots(1, figsize=(9, 3))
f, ax = plt.subplots(1, figsize=(90, 3))
seaborn.heatmap(
    mx.set_index("FULL")
    .sort_values("中位數")[
        [
            "Head-Tail Breaks",
            "Fisher-Jenks",
            "Maximum Breaks",
            "Equal Interval",
            "MaxP",
            "Quantiles",
            "Jenks Caspall",
            "Mean-Standard Deviation",
        ]
    ]
    .T,
    cmap="YlGn",
    cbar=False,
    ax=ax,
)
ax.set_xlabel("State ID");

In [None]:
# ✅ 建立修正後的 class5，確保每個 c.counts 長度為 5（多的截斷、少的補 NaN）
class5_fixed = []

for c in class5:
    counts = list(c.counts)
    # 若群數少於 5 → 補 NaN
    while len(counts) < 5:
        counts.append(float("nan"))
    # 若群數多於 5 → 截斷
    counts = counts[:5]

    # 模擬一個物件，具有 c.name 和截斷後 counts 屬性
    class Fixed:
        pass

    fixed = Fixed()
    fixed.name = c.name
    fixed.counts = counts
    class5_fixed.append(fixed)

# ✅ 完全照你要求的寫法建立表格
pandas.DataFrame(
    {c.name: c.counts for c in class5_fixed},
    index=["Class-{}".format(i) for i in range(5)],
)

In [None]:
# pandas.DataFrame(
#     {c.name: c.counts for c in class5},
#     index=["Class-{}".format(i) for i in range(5)],
# )

# Color

In [None]:
ax = mx.plot(
    column="中位數",  # Data to plot
    scheme="Quantiles",  # Classification scheme
    cmap="YlGn",  # Color palette
    legend=True,  # Add legend
    legend_kwds={"fmt": "{:.0f}"},  # Remove decimals in legend
)
ax.set_axis_off();

## Sequential palettes

In [None]:
ax = mx.plot(
    column="中位數",  # Data to plot
    scheme="Quantiles",  # Classification scheme
    cmap="Blues",  # Color palette
    edgecolor="k",  # Borderline color
    linewidth=0.1,  # Borderline width
    legend=True,  # Add legend
    legend_kwds={
        "fmt": "{:.0f}"
    },  # Remove decimals in legend (for legibility)
)
ax.set_axis_off();

## Diverging palettes

In [None]:
# Create income-based rank table (Rank 1 is highest)
rnk = mx[["FULL", "中位數", "平均數"]].rank(ascending=False)
# Compute change from 中位數 to 平均數
rnk["change"] = rnk["中位數"] - rnk["平均數"]
# Add column with bin class
#rnk["class"] = pandas.cut(rnk["change"], [-numpy.inf, -5, 0, 5, 20])
rnk["class"] = pandas.cut(rnk["change"], [-300, -50, 0, 50, 300])

In [None]:
ax = (
    mx[["geometry"]]
    .join(rnk)
    .plot("class", legend=True, cmap="RdYlGn")
)
ax.set_axis_off();

## Qualitative palettes

In [None]:
mx["JenksCaspall"] = jc5.yb # 儲存中位數分類

In [None]:
mx["JenksCaspall"].head()

In [None]:
ax = mx.plot("JenksCaspall")
ax.set_axis_off();

In [None]:
ax = mx.plot("JenksCaspall", categorical=True, legend=True)
ax.set_axis_off();

# Advanced topics

## User-defined choropleths

In [None]:
classi = mapclassify.UserDefined(
    #mx["平均數"], [10000, 12500, 15000]
    mx["平均數"], [800, 1000, 1200]
)
classi

In [None]:
classi.plot(
    mx,  # Use geometries in the geo-table
    legend=True,  # Add a legend
    legend_kwds={
        "loc": "upper right"
    },  # Place legend on top right corner
    axis_on=False,  # Remove axis
    cmap="viridis_r",  # Use reverse Viridis
);

In [None]:
# Classify values specifying bins
lbls = pandas.cut(
    #mx["平均數"], [-numpy.inf, 10000, 12500, 15000, numpy.inf]
    mx["平均數"], [-numpy.inf, 800, 1000, 1200, numpy.inf]
)
# Dynamically assign to geo-table and plot with a legend
ax = mx.plot(lbls, cmap="viridis_r", legend=True)
# Remove axis
ax.set_axis_off();

## Pooled classifications

In [None]:
# List the years we want of pc GDP
years = ["中位數", "平均數", "第一分位數", "第三分位數"]
# Create pooled classification
pooled = mapclassify.Pooled(mx[years], classifier="Quantiles", k=5)

In [None]:
# Set up figure with four axis
f, axs = plt.subplots(2, 2, figsize=(12, 12))
# Flatten the array of axis so you can loop over
# in one dimension
axs = axs.flatten()
# Loop over each year
for i, y in enumerate(years):
    mx.plot(
        y,  # Year to plot
        scheme="UserDefined",  # Use our own bins
        classification_kwds={
            "bins": pooled.global_classifier.bins
        },  # Use global bins
        legend=True,  # Add a legend
        ax=axs[i],  # Plot on the corresponding axis
    )
    # Remove axis
    axs[i].set_axis_off()
    # Name the subplot with the name of the column
    axs[i].set_title(y)
# Tight layout to better use space
plt.tight_layout()
# Display figure
plt.show()

# Conclusion