In [1]:
# ==========================================
# Import Required Libraries
# ==========================================
import os
import pandas as pd
import numpy as np


# ==========================================
# Function: clean_assortment_data
# Purpose : 
#   - Read all raw assortment CSV files in a folder
#   - Reshape each dataset from wide to long format (melt)
#   - Standardize data types
#   - Concatenate into a single DataFrame
# ==========================================
def clean_assortment_data(assortment_folder: str) -> pd.DataFrame:
    """
    Clean and reshape assortment data from multiple CSV files.

    Parameters
    ----------
    assortment_folder : str
        Path to the folder containing raw assortment CSV files.

    Returns
    -------
    pd.DataFrame
        A cleaned and concatenated DataFrame with standardized columns.
    """

    # List to store intermediate cleaned DataFrames
    cleaned_assortment_dfs = []

    # Iterate through each CSV file in the folder
    for file_name in os.listdir(assortment_folder):
        if file_name.endswith('.csv'):

            # Construct full file path
            file_path = os.path.join(assortment_folder, file_name)

            # Load raw data
            raw_df = pd.read_csv(file_path)

            # Reshape (melt) wide-format data into long format
            melted_df = pd.melt(
                frame=raw_df,
                id_vars=['season', 'style_color', 'style_color_rank', 'order_decision'],
                var_name='store_code',
                value_name='assortment'
            )

            # Append to list
            cleaned_assortment_dfs.append(melted_df)

    # Concatenate all melted DataFrames into one
    concat_df = pd.concat(cleaned_assortment_dfs, ignore_index=True)

    # Standardize column data types
    concat_df = concat_df.astype({
        'season': 'string',
        'style_color': 'string',
        'style_color_rank': 'string',
        'order_decision': 'int32',
        'store_code': 'int32',
        'assortment': 'int32'
    })

    return concat_df


# ==========================================
# Execution
# ==========================================
if __name__ == "__main__":
    # Input and output paths
    input_folder = r"D:\footwear_retail_chain_project\0. input_data\assortment\raw_data"
    output_file = r"D:\footwear_retail_chain_project\0. input_data\assortment\processed_data\assortment_master.csv"

    # Process and save data
    final_df = clean_assortment_data(input_folder)
    final_df.to_csv(output_file, index=False)