In [1]:
# Import libraries
import os
import pandas as pd
import numpy as np

In [2]:
def clean_assortment_data(assortment_folder: str):

    # Create an empty list to store cleaned dfs
    cleaned_assortment_df = []

    # Iterate through each csv file in the assortment folder
    for assortment_file in os.listdir(assortment_folder):
        if assortment_file.endswith('.csv'):

            # Read the data
            assortment_file_path = os.path.join(assortment_folder, assortment_file)
            raw_assortment_df = pd.read_csv(assortment_file_path)

            # Melt the data
            melted_assortment_df = pd.melt(
                frame=raw_assortment_df,
                id_vars=['season', 'style_color', 'style_color_rank', 'order_decision'],
                var_name='store_code',
                value_name='assortment'
            )

            # Append the melted df to the list
            cleaned_assortment_df.append(melted_assortment_df)

    # Concatenate all cleaned dfs
    concat_assortment_df = pd.concat(cleaned_assortment_df, ignore_index=True)

    # Standardize the data types
    concat_assortment_df['season'] = concat_assortment_df['season'].astype(str)
    concat_assortment_df['style_color'] = concat_assortment_df['style_color'].astype(str)
    concat_assortment_df['style_color_rank'] = concat_assortment_df['style_color_rank'].astype(str)
    concat_assortment_df['order_decision'] = concat_assortment_df['order_decision'].astype('int32')
    concat_assortment_df['store_code'] = concat_assortment_df['store_code'].astype('int32')
    concat_assortment_df['assortment'] = concat_assortment_df['assortment'].astype('int32')

    return concat_assortment_df

In [3]:
assortment_folder = 'D:\\retail_planning_project\\0. input_data\\assortment\\raw_data'
concat_assortment_df = clean_assortment_data(assortment_folder)
concat_assortment_df.to_csv('D:\\retail_planning_project\\0. input_data\\assortment\\processed_data\\assortment.csv', index=False)