In [1]:
# ==========================================
# Import Required Libraries
# ==========================================
import os
import glob
import pandas as pd


# ==========================================
# Function: concat_2025_sales_data
# Purpose :
#   - Concatenate all 2025 sales files (.xlsb)
#   - Keep only relevant columns
#   - Standardize column data types
# ==========================================
def concat_2025_sales_data(path: str) -> pd.DataFrame:
    """
    Concatenate and clean 2025 sales data from multiple files.

    Parameters
    ----------
    path : str
        Directory containing sales_raw_2025_*.xlsb files.

    Returns
    -------
    pd.DataFrame
        Cleaned 2025 sales dataset.
    """
    # Step 1: Read and combine all .xlsb sales files
    df = pd.concat(
        [
            pd.read_excel(
                file,
                sheet_name="TT",
                usecols=[1, 7, 9, 10, 11, 12, 14, 15, 16],
            )
            for file in glob.glob(os.path.join(path, "sales_raw_2025_*.xlsb"))
        ],
        ignore_index=True,
    )

    # Step 2: Standardize column data types
    df = df.astype(
        {
            "MÃ KHÁCH": "string",
            "MÃ HÀNG": "string",
            "SLTT": "int16",
            "TGTT-GTT": "int32",
            "TGTT-GV": "int32",
            "TGTT-GSCK": "int32",
            "NĂM": "int16",
            "THÁNG": "int8",
            "TUẦN": "int8",
        }
    )

    return df


# ==========================================
# Execution
# ==========================================
if __name__ == "__main__":
    input_path = r"D:\footwear_retail_chain_project\0. input_data\sales\raw_data"
    output_file = os.path.join(input_path, "sales_raw_2025.csv")

    sales_2025 = concat_2025_sales_data(input_path)

    # Export to CSV
    sales_2025.to_csv(output_file, index=False)

    print(f"✅ 2025 sales data concatenated and saved to: {output_file}")

✅ 2025 sales data concatenated and saved to: D:\footwear_retail_chain_project\0. input_data\sales\raw_data\sales_raw_2025.csv
