In [17]:
import os
import pandas as pd
from pathlib import Path


In [18]:
# --- 1. Đường dẫn ---
base_dir = Path(r"C:\Downloads\Corn_data_3_region\corn_condition")
out_file = Path(r"C:\Downloads\Corn_data_merged_3_region\corn_condition_all_years_3region.csv")


In [19]:
# --- 2. Bảng ánh xạ vùng ---
REGION_MAP = {
    # Temperate Humid (9)
    "IA":"Temperate Humid","IL":"Temperate Humid","IN":"Temperate Humid",
    "MO":"Temperate Humid","OH":"Temperate Humid","MI":"Temperate Humid",
    "MN":"Temperate Humid","WI":"Temperate Humid","PA":"Temperate Humid",
    # Semi-Arid Continental (6)
    "ND":"Semi-Arid Continental","SD":"Semi-Arid Continental","NE":"Semi-Arid Continental",
    "KS":"Semi-Arid Continental","CO":"Semi-Arid Continental","TX":"Semi-Arid Continental",
    # Subtropical Humid (3)
    "KY":"Subtropical Humid","TN":"Subtropical Humid","NC":"Subtropical Humid"
}

In [20]:
# --- 3. Gộp dữ liệu ---
all_dfs = []

for year_dir in sorted(base_dir.iterdir()):
    if not year_dir.is_dir():
        continue

    for csv_file in sorted(year_dir.glob("*.csv")):
        df = pd.read_csv(csv_file)
        df.columns = [c.strip().lower() for c in df.columns]

        if "state_alpha" in df.columns:
            df["region"] = df["state_alpha"].map(REGION_MAP)
        else:
            print(f" Thiếu cột state_alpha trong {csv_file.name}")

        # --- Chèn region sau state_alpha ---
        if "region" in df.columns and "state_alpha" in df.columns:
            cols = list(df.columns)
            region_pos = cols.index("state_alpha") + 1
            cols.remove("region")
            cols.insert(region_pos, "region")
            df = df[cols]

        all_dfs.append(df)

In [21]:
# --- 4. Gộp tất cả ---
merged = pd.concat(all_dfs, ignore_index=True)

# --- 5. Ghi file tổng ---
mode, header = ("w", True) if not out_file.exists() else ("a", False)
merged.to_csv(out_file, mode=mode, header=header, index=False, encoding="utf-8-sig")

print(f" Đã gộp xong dữ liệu Corn Condition + Region!")
print(f" File đầu ra: {out_file}")
print(f" Tổng dòng: {len(merged):,} | Cột: {len(merged.columns)}")
print(" Cột:", merged.columns.tolist())
print(" Các vùng:", merged['region'].dropna().unique())

 Đã gộp xong dữ liệu Corn Condition + Region!
 File đầu ra: C:\Downloads\Corn_data_merged_3_region\corn_condition_all_years_3region.csv
 Tổng dòng: 8,214 | Cột: 11
 Cột: ['year', 'state_name', 'state_alpha', 'region', 'week_ending', 'week', 'excellent', 'good', 'fair', 'poor', 'very poor']
 Các vùng: ['Semi-Arid Continental' 'Temperate Humid' 'Subtropical Humid']
