[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kevin7261/Geographic-Data-Science-with-Python/blob/main/綜稅綜合所得總額資料轉換.ipynb)

# 村里收入資料轉換

https://whgis-nlsc.moi.gov.tw/Opendata/Files.aspx

[綜稅綜合所得總額各縣市鄉鎮村里統計分析表-臺北市
](https://data.gov.tw/dataset/17983
)

## [1] 讀取原始資料

In [None]:
# @title 初始值設定

PROJECT_NAME = "臺北市_村里_綜稅綜合所得總額"
SHP_FILE_NAME = "臺北市里界圖_20220915"
CSV_PATH = "https://drive.google.com/uc?export=download&id=1hgtGfaRIESHjtbLVR9SNu7isZ9KoAMuz" # 綜稅綜合所得總額各縣市鄉鎮村里統計分析表-臺北市
SHP_FILE_PATH = "https://drive.google.com/file/d/1hk976GorrQ71ctAFYUWI8Vt4Kf_8akWp/view?usp=sharing" # 臺北市里界圖_20220915
GDRIVE_PATH = "https://drive.google.com/drive/folders/1g39YQg2LLE5nf1YVDO4SUhgwa8VUaETF?usp=sharing"

In [None]:
# @title 掛載Google雲端硬碟

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# @title 安裝套件

!pip install geopandas shapely pyproj fiona -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.6/56.6 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25h

## [2] 檔案處理


In [None]:
# @title read_shape_file

import geopandas as gpd  # 用於讀取與儲存空間資料
import matplotlib.pyplot as plt  # 用於地圖預覽（未用到）
import gdown  # 用於從 Google Drive 下載檔案
import os  # 用於檔案處理
import zipfile  # 用於 ZIP 壓縮

# ✅ 從 Google Drive 網址擷取 ID
SPREADSHEET_ID_SHP = SHP_FILE_PATH.split("/d/")[1].split("/")[0]

def read_shape_file():
    # ✅ 建立工作資料夾
    os.makedirs("shapefiles", exist_ok=True)

    # ✅ 設定 ZIP 檔案名稱（暫存下載用）
    zip_path = f"{SHP_FILE_NAME}.zip"

    # ✅ 下載原始 ZIP（SHP 原始檔案壓縮包）
    gdown.download(url=f"https://drive.google.com/uc?id={SPREADSHEET_ID_SHP}", output=zip_path, quiet=True)

    # ✅ 解壓縮到資料夾
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall("shapefiles")

    # ✅ 尋找解壓後的 .shp 檔案
    shp_files = [f for f in os.listdir("shapefiles") if f.endswith(".shp")]
    if not shp_files:
        raise RuntimeError("❌ 無法找到 .shp 檔")
    shp_path = os.path.join("shapefiles", shp_files[0])

    # ✅ 讀取 Shapefile，讓 geopandas 自動處理編碼
    gdf = gpd.read_file(shp_path)

    # ✅ 設定 GeoPackage 輸出名稱（如：Taipei.gpkg）
    gpkg_path = f"{SHP_FILE_NAME}.gpkg"

    # ✅ 儲存為 GeoPackage（避免欄位寬度錯誤）
    gdf.to_file(gpkg_path, driver="GPKG")

    # ✅ 顯示處理結果
    print(f"✅ 已儲存為 GeoPackage：{gpkg_path}")
    print("📋 前幾筆資料預覽：")
    print(gdf.head())

    return gdf

In [None]:
# @title merge_shp_and_csv

import geopandas as gpd  # 處理空間資料
import pandas as pd  # 資料處理
import os  # 檔案處理
import zipfile  # 壓縮用
from IPython.display import FileLink  # 顯示下載連結

def merge_shp_and_csv():
    # ✅ 載入原始 SHP（實際會轉為 GeoPackage 處理）
    gdf = read_shape_file()

    # ✅ 讀取 CSV，處理 BOM 與欄位空白
    df = pd.read_csv(CSV_PATH, on_bad_lines="skip", engine="python")
    df.columns = df.columns.str.strip().str.replace("﻿", "", regex=False)

    # ✅ 建立合併鍵（須自行依欄位名稱對應）
    gdf["merge_key"] = gdf["FULL"].astype(str).str.strip()
    df["merge_key"] = df["縣市別"].astype(str).str.strip() + df["村里"].astype(str).str.strip()

    # ✅ 合併 GeoDataFrame 與 CSV 資料
    merged = gdf.merge(df, how="left", on="merge_key")

    # ✅ 清除所有非 ASCII 字元（避免寫入錯誤）
    for col in merged.columns:
        if merged[col].dtype == object:
            merged[col] = merged[col].astype(str).apply(
                lambda x: x.encode("ascii", "ignore").decode("ascii")[:100]
            )

    # ✅ 重建 GeoDataFrame，保留幾何欄位與 CRS
    clean_gdf = gpd.GeoDataFrame(merged, geometry="geometry", crs=gdf.crs)

    # ✅ 輸出為 GeoPackage 格式
    gpkg_path = f"{PROJECT_NAME}.gpkg"
    clean_gdf.to_file(gpkg_path, driver="GPKG")

    # ✅ 顯示結果
    print(f"✅ 已合併並儲存為 GeoPackage：{gpkg_path}")

# ✅ 執行（需事先定義 PROJECT_NAME 與 CSV_PATH）
merge_shp_and_csv()

In [None]:
# @title upload_to_drive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import os

# ✅ 取得 Google Drive 資料夾 ID
SPREADSHEET_ID_GDRIVE = GDRIVE_PATH.split("/folders/")[1].split("?")[0]

def upload_to_drive(file_path):
    # ✅ 授權與初始化
    auth.authenticate_user()
    gauth = GoogleAuth()
    gauth.credentials = GoogleCredentials.get_application_default()
    drive = GoogleDrive(gauth)

    # ✅ 檔案路徑與檔名
    source_path = os.path.join(os.getcwd(), file_path)
    filename = os.path.basename(file_path)

    # ✅ 搜尋該資料夾中是否有同名檔案 → 若有就刪除
    file_list = drive.ListFile({
        'q': f"'{SPREADSHEET_ID_GDRIVE}' in parents and trashed = false and title = '{filename}'"
    }).GetList()

    for f in file_list:
        f.Delete()
        print(f"🗑️ 已刪除舊檔案：{f['title']} (ID: {f['id']})")

    # ✅ 建立新檔並上傳
    upload_file = drive.CreateFile({
        'title': filename,
        'parents': [{'id': SPREADSHEET_ID_GDRIVE}]
    })
    upload_file.SetContentFile(source_path)
    upload_file.Upload()

    print(f"✅ 成功上傳：{filename} 到 Google Drive 資料夾（ID: {SPREADSHEET_ID_GDRIVE}）")
    print(f"📁 連結：https://drive.google.com/file/d/{upload_file['id']}/view?usp=sharing")

In [None]:
upload_to_drive(f"{PROJECT_NAME}.gpkg")