In [1]:
import os
import pandas as pd

# === 資料夾設定 ===
INPUT_DIR = os.getcwd()  # 假設檔案在當前目錄
OUTPUT_DIR = os.path.join(os.getcwd(), "mops_merged")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# === 收集所有 mops_YYYY_MM.csv 檔案 ===
csv_files = [f for f in os.listdir(INPUT_DIR) if f.startswith("mops_") and f.endswith(".csv")]

if not csv_files:
    print("❌ 找不到任何 mops_YYYY_MM.csv 檔案")
    exit()

all_data = []

# === 讀取並合併 ===
print(f"📥 開始讀取 {len(csv_files)} 個檔案...")
for file in sorted(csv_files):
    file_path = os.path.join(INPUT_DIR, file)
    try:
        df = pd.read_csv(file_path, dtype=str)
        all_data.append(df)
        print(f"✅ 讀取：{file}")
    except Exception as e:
        print(f"⚠️ 錯誤讀取 {file}：{e}")

# === 合併與去重 ===
merged_df = pd.concat(all_data, ignore_index=True)

# 去除重複資料（根據 年度、月份、公司代號、與市場別）
deduped_df = merged_df.drop_duplicates(subset=["年度", "月份", "市場別", "公司代號"])

# === 輸出整合檔 ===
output_file = os.path.join(OUTPUT_DIR, "all_mops_data.csv")
deduped_df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"\n🎉 整合完成，共 {len(deduped_df)} 筆資料")
print(f"📦 輸出檔案位置：{output_file}")


📥 開始讀取 46 個檔案...
✅ 讀取：mops_2017_01.csv
✅ 讀取：mops_2017_02.csv
✅ 讀取：mops_2017_03.csv
✅ 讀取：mops_2017_04.csv
✅ 讀取：mops_2017_05.csv
✅ 讀取：mops_2017_06.csv
✅ 讀取：mops_2017_07.csv
✅ 讀取：mops_2017_08.csv
✅ 讀取：mops_2017_09.csv
✅ 讀取：mops_2017_10.csv
✅ 讀取：mops_2017_11.csv
✅ 讀取：mops_2017_12.csv
✅ 讀取：mops_2018_01.csv
✅ 讀取：mops_2018_02.csv
✅ 讀取：mops_2018_03.csv
✅ 讀取：mops_2018_04.csv
✅ 讀取：mops_2018_05.csv
✅ 讀取：mops_2018_06.csv
✅ 讀取：mops_2018_07.csv
✅ 讀取：mops_2018_08.csv
✅ 讀取：mops_2018_09.csv
✅ 讀取：mops_2018_10.csv
✅ 讀取：mops_2018_11.csv
✅ 讀取：mops_2018_12.csv
✅ 讀取：mops_2019_01.csv
✅ 讀取：mops_2019_02.csv
✅ 讀取：mops_2019_03.csv
✅ 讀取：mops_2019_04.csv
✅ 讀取：mops_2019_05.csv
✅ 讀取：mops_2019_06.csv
✅ 讀取：mops_2019_07.csv
✅ 讀取：mops_2019_08.csv
✅ 讀取：mops_2019_09.csv
✅ 讀取：mops_2019_10.csv
✅ 讀取：mops_2019_11.csv
✅ 讀取：mops_2019_12.csv
✅ 讀取：mops_2020_01.csv
✅ 讀取：mops_2020_02.csv
✅ 讀取：mops_2020_03.csv
✅ 讀取：mops_2020_04.csv
✅ 讀取：mops_2020_05.csv
✅ 讀取：mops_2020_06.csv
✅ 讀取：mops_2020_07.csv
✅ 讀取：mops_2020_08.csv
✅ 讀取：mops_2020_

In [2]:
import os
import pandas as pd

# === 資料夾設定 ===
INPUT_DIR = os.getcwd()
OUTPUT_DIR = os.path.join(os.getcwd(), "mops_merged")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# === 尋找所有 mops_YYYY_MM.csv ===
csv_files = [f for f in os.listdir(INPUT_DIR) if f.startswith("mops_") and f.endswith(".csv")]

if not csv_files:
    print("❌ 找不到任何 mops_YYYY_MM.csv 檔案")
    exit()

all_data = []

print(f"📥 開始讀取 {len(csv_files)} 個檔案...")
for file in sorted(csv_files):
    file_path = os.path.join(INPUT_DIR, file)
    try:
        df = pd.read_csv(file_path, dtype=str)
        all_data.append(df)
        print(f"✅ 讀取：{file}")
    except Exception as e:
        print(f"⚠️ 錯誤讀取 {file}：{e}")

# === 合併所有資料 ===
merged_df = pd.concat(all_data, ignore_index=True)

# === 轉換「營業收入-當月營收」為數值（保留 NaN，不補 0）===
merged_df["_營收數值"] = (
    merged_df["營業收入-當月營收"]
    .astype(str)
    .str.replace(",", "", regex=False)
    .replace("", pd.NA)
    .astype(float)
)

# === 找每個公司代號中營收最大那一筆 ===
idx = merged_df.groupby("公司代號")["_營收數值"].idxmax()
max_df = merged_df.loc[idx].drop(columns=["_營收數值"]).reset_index(drop=True)

# === 儲存檔案 ===
output_file = os.path.join(OUTPUT_DIR, "all_mops_max.csv")
max_df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"\n🎉 整合完成，共 {len(max_df)} 筆資料")
print(f"📦 檔案儲存於：{output_file}")


📥 開始讀取 70 個檔案...
✅ 讀取：mops_2017_01.csv
✅ 讀取：mops_2017_02.csv
✅ 讀取：mops_2017_03.csv
✅ 讀取：mops_2017_04.csv
✅ 讀取：mops_2017_05.csv
✅ 讀取：mops_2017_06.csv
✅ 讀取：mops_2017_07.csv
✅ 讀取：mops_2017_08.csv
✅ 讀取：mops_2017_09.csv
✅ 讀取：mops_2017_10.csv
✅ 讀取：mops_2017_11.csv
✅ 讀取：mops_2017_12.csv
✅ 讀取：mops_2018_01.csv
✅ 讀取：mops_2018_02.csv
✅ 讀取：mops_2018_03.csv
✅ 讀取：mops_2018_04.csv
✅ 讀取：mops_2018_05.csv
✅ 讀取：mops_2018_06.csv
✅ 讀取：mops_2018_07.csv
✅ 讀取：mops_2018_08.csv
✅ 讀取：mops_2018_09.csv
✅ 讀取：mops_2018_10.csv
✅ 讀取：mops_2018_11.csv
✅ 讀取：mops_2018_12.csv
✅ 讀取：mops_2019_01.csv
✅ 讀取：mops_2019_02.csv
✅ 讀取：mops_2019_03.csv
✅ 讀取：mops_2019_04.csv
✅ 讀取：mops_2019_05.csv
✅ 讀取：mops_2019_06.csv
✅ 讀取：mops_2019_07.csv
✅ 讀取：mops_2019_08.csv
✅ 讀取：mops_2019_09.csv
✅ 讀取：mops_2019_10.csv
✅ 讀取：mops_2019_11.csv
✅ 讀取：mops_2019_12.csv
✅ 讀取：mops_2020_01.csv
✅ 讀取：mops_2020_02.csv
✅ 讀取：mops_2020_03.csv
✅ 讀取：mops_2020_04.csv
✅ 讀取：mops_2020_05.csv
✅ 讀取：mops_2020_06.csv
✅ 讀取：mops_2020_07.csv
✅ 讀取：mops_2020_08.csv
✅ 讀取：mops_2020_

In [4]:
import os
import pandas as pd

# === 資料夾設定 ===
INPUT_DIR = os.getcwd()
OUTPUT_DIR = os.path.join(os.getcwd(), "mops_merged")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# === 找出所有 mops_YYYY_MM.csv ===
csv_files = [f for f in os.listdir(INPUT_DIR) if f.startswith("mops_") and f.endswith(".csv")]

if not csv_files:
    print("❌ 找不到任何 mops_YYYY_MM.csv 檔案")
    exit()

all_data = []

print(f"📥 開始讀取 {len(csv_files)} 個檔案...")
for file in sorted(csv_files):
    file_path = os.path.join(INPUT_DIR, file)
    try:
        df = pd.read_csv(file_path, dtype=str)
        all_data.append(df)
        print(f"✅ 讀取：{file}")
    except Exception as e:
        print(f"⚠️ 錯誤讀取 {file}：{e}")

# === 合併所有資料 ===
merged_df = pd.concat(all_data, ignore_index=True)

# === 加一個數值欄位用於比較（不改原欄位內容）===
merged_df["_營收數值"] = (
    merged_df["營業收入-當月營收"]
    .astype(str)
    .str.replace(",", "", regex=False)
    .replace("", pd.NA)
    .astype(float)
)

# === 根據「年度 + 月份 + 公司代號」分組，取最大單月營收那筆 ===
idx = (
    merged_df
    .groupby(["年度", "月份", "公司代號"])["_營收數值"]
    .idxmax()
)

max_df = merged_df.loc[idx].drop(columns=["_營收數值"]).reset_index(drop=True)

# === 儲存輸出 ===
output_file = os.path.join(OUTPUT_DIR, "all_mops_max_per_month.csv")
max_df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"\n🎉 完成！每月每公司保留最大營收資料，共 {len(max_df)} 筆")
print(f"📦 檔案儲存於：{output_file}")


📥 開始讀取 99 個檔案...
✅ 讀取：mops_2017_01.csv
✅ 讀取：mops_2017_02.csv
✅ 讀取：mops_2017_03.csv
✅ 讀取：mops_2017_04.csv
✅ 讀取：mops_2017_05.csv
✅ 讀取：mops_2017_06.csv
✅ 讀取：mops_2017_07.csv
✅ 讀取：mops_2017_08.csv
✅ 讀取：mops_2017_09.csv
✅ 讀取：mops_2017_10.csv
✅ 讀取：mops_2017_11.csv
✅ 讀取：mops_2017_12.csv
✅ 讀取：mops_2018_01.csv
✅ 讀取：mops_2018_02.csv
✅ 讀取：mops_2018_03.csv
✅ 讀取：mops_2018_04.csv
✅ 讀取：mops_2018_05.csv
✅ 讀取：mops_2018_06.csv
✅ 讀取：mops_2018_07.csv
✅ 讀取：mops_2018_08.csv
✅ 讀取：mops_2018_09.csv
✅ 讀取：mops_2018_10.csv
✅ 讀取：mops_2018_11.csv
✅ 讀取：mops_2018_12.csv
✅ 讀取：mops_2019_01.csv
✅ 讀取：mops_2019_02.csv
✅ 讀取：mops_2019_03.csv
✅ 讀取：mops_2019_04.csv
✅ 讀取：mops_2019_05.csv
✅ 讀取：mops_2019_06.csv
✅ 讀取：mops_2019_07.csv
✅ 讀取：mops_2019_08.csv
✅ 讀取：mops_2019_09.csv
✅ 讀取：mops_2019_10.csv
✅ 讀取：mops_2019_11.csv
✅ 讀取：mops_2019_12.csv
✅ 讀取：mops_2020_01.csv
✅ 讀取：mops_2020_02.csv
✅ 讀取：mops_2020_03.csv
✅ 讀取：mops_2020_04.csv
✅ 讀取：mops_2020_05.csv
✅ 讀取：mops_2020_06.csv
✅ 讀取：mops_2020_07.csv
✅ 讀取：mops_2020_08.csv
✅ 讀取：mops_2020_