In [2]:
import pandas as pd

# === 讀取資料（假設你已經有 DataFrame）
df = pd.read_csv("EPS元_ROE-1000_0_上市.csv", encoding="utf-8-sig")

# === 移除非財務欄位（保留 '代號', '名稱' 等基本欄位 + 所有含 EPS 的欄位）
base_cols = ["代號", "名稱"]
eps_cols = [col for col in df.columns if "EPS" in col and "平均" not in col]  # 排除「平均 EPS」

df_eps = df[base_cols + eps_cols].copy()

# === 長格式轉換（melt）
df_long = df_eps.melt(id_vars=base_cols, var_name="原始欄位名稱", value_name="value")

# === 拆成 year_quarter 與 financial_metric（例如 EPS）
df_long["year_quarter"] = df_long["原始欄位名稱"].str.extract(r"(\d{2}Q\d)")
df_long["year_quarter"] = "20" + df_long["year_quarter"]  # 補上完整年份
df_long["financial_metric"] = "EPS"  # 根據欄位內容指定指標

# === 最終欄位順序
df_final = df_long[["代號", "名稱", "year_quarter", "financial_metric", "value"]]

# === 輸出確認（可寫入CSV）
print(df_final.head())

     代號  名稱 year_quarter financial_metric value
0  1235  興泰       2022Q1              EPS  0.24
1  1236  宏亞       2022Q1              EPS  3.89
2  1301  台塑       2022Q1              EPS   2.6
3  1304  台聚       2022Q1              EPS   0.5
4  1305  華夏       2022Q1              EPS  0.78


In [3]:
import os

folder_path = 'G:\ML for stock\整合財報\goodinfo_downloads'

# 列出該資料夾下所有檔案與資料夾名稱
for filename in os.listdir(folder_path):
    print(filename)


1.ipynb
EPS元_ROE-1000_0_上市.csv
EPS元_ROE-1000_0_上櫃.csv
EPS元_ROE0_1000_上市.csv
EPS元_ROE0_1000_上櫃.csv
業外損益億_ROE-1000_0_上市.csv
業外損益億_ROE-1000_0_上櫃.csv
業外損益億_ROE0_1000_上市.csv
業外損益億_ROE0_1000_上櫃.csv
營業利益億_ROE-1000_0_上市.csv
營業利益億_ROE-1000_0_上櫃.csv
營業利益億_ROE0_1000_上市.csv
營業利益億_ROE0_1000_上櫃.csv
營業收入億_ROE-1000_0_上市.csv
營業收入億_ROE-1000_0_上櫃.csv
營業收入億_ROE0_1000_上市.csv
營業收入億_ROE0_1000_上櫃.csv
營業毛利億_ROE-1000_0_上市.csv
營業毛利億_ROE-1000_0_上櫃.csv
營業毛利億_ROE0_1000_上市.csv
營業毛利億_ROE0_1000_上櫃.csv


  folder_path = 'G:\ML for stock\整合財報\goodinfo_downloads'


In [5]:
import os
import pandas as pd
import re

folder_path = r'G:\ML for stock\整合財報\goodinfo_downloads'

all_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
all_dfs = []

def convert_season(col):
    match = re.match(r"(\d{2})Q(\d)", col)
    if match:
        year = int(match.group(1))
        quarter = match.group(2)
        year = 2000 + year if year < 50 else 1900 + year
        return f"{year}Q{quarter}"
    return col

for file in all_files:
    file_path = os.path.join(folder_path, file)
    try:
        df = pd.read_csv(file_path)

        # 抓 EPS 欄位
        eps_columns = [col for col in df.columns if re.match(r"^\d{2}Q\d EPS \(元\)$", col)]
        id_vars = [col for col in df.columns if col not in eps_columns]

        melted_df = df.melt(id_vars=id_vars, value_vars=eps_columns,
                            var_name="原始欄位", value_name="EPS")

        melted_df["季別"] = melted_df["原始欄位"].apply(lambda x: convert_season(x[:4]))
        melted_df["來源檔名"] = file  # 增加一欄來記錄來源

        final_df = melted_df.drop(columns=["原始欄位"])
        final_df = final_df[["代號", "名稱", "季別", "EPS", "來源檔名"] + [col for col in id_vars if col not in ["代號", "名稱"]]]

        all_dfs.append(final_df)

    except Exception as e:
        print(f"處理檔案 {file} 時發生錯誤：{e}")

# 合併所有檔案
combined_df = pd.concat(all_dfs, ignore_index=True)

# 儲存成新的 CSV
combined_df.to_csv(os.path.join(folder_path, "EPS_整合後.csv"), index=False, encoding="utf-8-sig")
print("✅ 所有檔案處理完畢，結果已輸出為 EPS_整合後.csv")


處理檔案 EPS_整合後.csv 時發生錯誤：value_name (EPS) cannot match an element in the DataFrame columns.
✅ 所有檔案處理完畢，結果已輸出為 EPS_整合後.csv
