## 同じファイル名

In [1]:
import os

# 検索対象ディレクトリ
data_dir = "/work/a06/rmori/spillover/data"

# 条件に一致するファイルパスを格納
matching_paths = []

# 再帰的にCSVファイルを探索
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if "tone-a01" in file:
            full_path = os.path.join(root, file)
            matching_paths.append(full_path)

# 結果を表示
for path in matching_paths:
    print(path)


/work/a06/rmori/spillover/data/1982/tone-a01.csv
/work/a06/rmori/spillover/data/1988/tone-a01.csv
/work/a06/rmori/spillover/data/1966/tone-a01.csv
/work/a06/rmori/spillover/data/1954/tone-a01.csv
/work/a06/rmori/spillover/data/1975/tone-a01.csv
/work/a06/rmori/spillover/data/1972/tone-a01.csv
/work/a06/rmori/spillover/data/1978/tone-a01.csv
/work/a06/rmori/spillover/data/1959/tone-a01.csv
/work/a06/rmori/spillover/data/1985/tone-a01.csv
/work/a06/rmori/spillover/data/1961/tone-a01.csv
/work/a06/rmori/spillover/data/1965/tone-a01.csv
/work/a06/rmori/spillover/data/1981/tone-a01.csv
/work/a06/rmori/spillover/data/1957/tone-a01.csv
/work/a06/rmori/spillover/data/1976/tone-a01.csv
/work/a06/rmori/spillover/data/1971/tone-a01.csv
/work/a06/rmori/spillover/data/1962/tone-a01.csv
/work/a06/rmori/spillover/data/1968/tone-a01.csv
/work/a06/rmori/spillover/data/1986/tone-a01.csv
/work/a06/rmori/spillover/data/1958/tone-a01.csv
/work/a06/rmori/spillover/data/1960/tone-a01.csv
/work/a06/rmori/spil

## キーワード

In [None]:
word1 = "生活保護"
word2 = "尾道市"
word3 = ""

In [None]:
import os
import re

# 空白（全角・半角）を削除したバージョンも用意
def remove_spaces(text):
    return re.sub(r'[\s\u3000]', '', text)  # \s: 半角空白、\u3000: 全角空白

# 検索対象ディレクトリ
data_dir = "/work/a06/rmori/spillover/data"

# 条件に一致するファイルパスを格納
matching_paths = []

# 再帰的にCSVファイルを探索
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.endswith(".csv"):
            full_path = os.path.join(root, file)
            try:
                with open(full_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    stripped = remove_spaces(content)
                    if (word1 in stripped) and (word2 in stripped) and (word3 in stripped):
                        matching_paths.append(full_path)
            except UnicodeDecodeError:
                try:
                    with open(full_path, 'r', encoding='shift_jis') as f:
                        content = f.read()
                        stripped = remove_spaces(content)
                        if (word1 in stripped) and (word2 in stripped) and (word3 in stripped):
                            matching_paths.append(full_path)
                except Exception as e:
                    print(f"❌ 読込失敗: {full_path} ({e})")

# 結果を表示
for path in matching_paths:
    print(path)



#

In [None]:
import os
import pandas as pd
import re

# 対象とするキーワード（列名候補）
keywords = ["耕地面積", "製造業出荷額", "製造業事業所", "商店数"]  # 必要に応じて増やす

# 合併した旧市町村
merged_towns = [
    "戸坂村", "中山村", "井口村", "沼田町", "安佐町", "可部町", "祇園町", "安古市町", "佐東町",
    "高陽町", "瀬野川町", "白木町", "熊野跡村", "安芸町", "矢野町", "船越町", "五日市"
]

# 空白除去用関数
def normalize_text(text):
    """
    空白、全角スペース、改行などをすべて削除して比較用文字列を返す
    """
    return re.sub(r'[\s\u3000\r\n]+', '', str(text))

# 各項目ごとに結果を記録
results_by_keyword = {kw: [] for kw in keywords}

for path in sorted(matching_paths):
    match = re.search(r"(\d{4})", path)
    if not match:
        continue
    year = int(match.group(1))

    try:
        df = pd.read_csv(path, header=None, encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(path, header=None, encoding='shift_jis', errors='ignore')
    except Exception as e:
        print(f"❌ 読込失敗: {path} ({e})")
        continue

    # 広島市の行番号を探す
    hiroshima_row = None
    for r in range(df.shape[0]):
        for c in range(df.shape[1]):
            val = df.iat[r, c]
            if pd.isna(val):
                continue
            if "広島市" in normalize_text(val):
                hiroshima_row = r
                break
        if hiroshima_row is not None:
            break
    if hiroshima_row is None:
        print(f"⚠️ 広島市が見つからない: {path}")
        continue

    # 各キーワードに対して処理
    for keyword in keywords:
        # 広島市より上で最後に見つかった該当列を探す
        keyword_col = None
        for r in range(hiroshima_row):
            for c in range(df.shape[1]):
                val = df.iat[r, c]
                if pd.isna(val):
                    continue
                if keyword in normalize_text(val):
                    keyword_col = c  # 最後のものを保持
        if keyword_col is None:
            print(f"⚠️ {keyword} が見つからない: {path}")
            continue

        # 広島市の値を取得
        hiroshima_val = pd.to_numeric(df.iat[hiroshima_row, keyword_col], errors='coerce')
        if pd.isna(hiroshima_val):
            hiroshima_val = 0

        # 合併旧市町村の値を加算
        for r in range(df.shape[0]):
            found = False
            for c in range(df.shape[1]):
                val = df.iat[r, c]
                if pd.isna(val):
                    continue
                if any(town in normalize_text(val) for town in merged_towns):
                    found = True
                    break
            if found:
                add_val = pd.to_numeric(df.iat[r, keyword_col], errors='coerce')
                if not pd.isna(add_val):
                    hiroshima_val += add_val

        results_by_keyword[keyword].append((year, hiroshima_val))

# ✅ 出力：DataFrame化して表示
for keyword, data in results_by_keyword.items():
    df_result = pd.DataFrame(data, columns=["year", keyword])
    df_result.set_index("year", inplace=True)
    print(f"\n📊 {keyword}（広島市＋旧市町村）:")
    print(df_result)
