## Setup

In [None]:
import pandas as pd

In [17]:
# The dataframe is relatively large and we want to visualise it as a scrollable element
pd.set_option('display.max_columns', None)

In [18]:
# Importing the dataset
df = pd.read_csv('cleaned_data.csv')

In [None]:
# Check if succeeded
df.shape

(818375, 23)

In [None]:
# Only adjust the copy version
data = df.copy()

In [21]:
data.head()

Unnamed: 0.1,Unnamed: 0,DimPostingDateId,DimGenderId,DimItemId,ItemNo2,StyleID,Style,Color,Size,Gender,Category,SUB Category,SUB Category2,Quality,Origin Country,WHSSalesPriceDKK,RRSalesPriceDKK,OrderHeaderNumber,OrderLineNumber,Quantity,Amount,Discount,Cost
0,0,20220312,2,347900,F15404308_CLR000021,F15404308,Hoys pants 6528,Black,XL,WOMENSWEAR,Trousers,Trousers,Default,Woven,China,315.0,800.0,103099,20001,1.0,294.63,17.71,152.91
1,1,20220312,2,347900,F15404308_CLR000021,F15404308,Hoys pants 6528,Black,XL,WOMENSWEAR,Trousers,Trousers,Default,Woven,China,315.0,800.0,103106,20002,2.0,589.26,35.34,305.8
2,2,20220312,1,778,M00012003_CLR000508,M00012003,Kronos o-n ss 273,White mel,S,MENSWEAR,Tops,Crew neck,Short sleeve,Jersey,Turkey,90.0,250.0,103035,20002,1.0,78.12,0.0,42.41
3,3,20220312,2,348547,F19123672_CLR000021,F19123672,Majan ss shirt 9942,Black,L,WOMENSWEAR,Shirts,Shirts,Short sleeve,Woven,China,195.0,500.0,IN0001122,60002,2.0,379.45,0.0,147.84
4,4,20220312,2,348547,F19123672_CLR000021,F19123672,Majan ss shirt 9942,Black,L,WOMENSWEAR,Shirts,Shirts,Short sleeve,Woven,China,195.0,500.0,103113,40003,2.0,379.45,0.0,162.08


In [None]:
# Drop the index from SQL export
data.drop(columns=["Unnamed: 0"], axis=1, inplace=True)

In [None]:
# Parse timestamp
data["DimPostingDateId"] = pd.to_datetime(data["DimPostingDateId"], format="%Y%m%d")

## Create product info table

In [None]:
# Select only product relalted features
product_info = data[['ItemNo2','Color','Gender','Category','SUB Category', 'SUB Category2', 'Quality', 'Origin Country']].drop_duplicates()
product_info.reset_index(drop=True, inplace=True)
product_info

Unnamed: 0,ItemNo2,Color,Gender,Category,SUB Category,SUB Category2,Quality,Origin Country
0,F15404308_CLR000021,Black,WOMENSWEAR,Trousers,Trousers,Default,Woven,China
1,M00012003_CLR000508,White mel,MENSWEAR,Tops,Crew neck,Short sleeve,Jersey,Turkey
2,F19123672_CLR000021,Black,WOMENSWEAR,Shirts,Shirts,Short sleeve,Woven,China
3,F19123672_CLR000289,Clear Cream,WOMENSWEAR,Shirts,Shirts,Short sleeve,Woven,China
4,M00012307_CLR000650,Blue Iris Mel,MENSWEAR,Tops,V-neck,Short sleeve,Jersey,Turkey
...,...,...,...,...,...,...,...,...
695,M24400088_190414TCX,FOREST NIGHT,MENSWEAR,Shirts,Shirts,Default,Woven,China
696,F24200090_CLR000021,Black,WOMENSWEAR,Skirts,Maxi,Default,Woven,China
697,M24300006_CLR000589,Black Blizzard,MENSWEAR,Jeans,Jeans,Default,Denim,Turkey
698,F24400186_CLR000021,Black,WOMENSWEAR,Knitwear,Cardigan,Long sleeve,Knit,China


## Sample by week

In [None]:
# Sample weekly data
data_id_time_target = data.set_index("DimPostingDateId").groupby('ItemNo2').resample('W')['Quantity'].sum().reset_index()
data_id_time_target

Unnamed: 0,ItemNo2,DimPostingDateId,Quantity
0,F00001015_CLR000021,2022-03-20,66.0
1,F00001015_CLR000021,2022-03-27,92.0
2,F00001015_CLR000021,2022-04-03,90.0
3,F00001015_CLR000021,2022-04-10,70.0
4,F00001015_CLR000021,2022-04-17,78.0
...,...,...,...
56821,S00029_CLR001336,2024-08-11,0.0
56822,S00029_CLR001336,2024-08-18,4.0
56823,S00029_CLR001336,2024-08-25,2.0
56824,S00029_CLR001336,2024-09-01,1.0


## Calc MAE

In [30]:
def calc_mae_by_product_id(product_id, df):
    # Select records by product_id
    product_data = df.loc[df["ItemNo2"] == product_id]
    
    # Check if contains enough records
    if product_data.shape[0] < 16:
        return
    
    forecast = product_data.iloc[-16:-8]
    forecast.reset_index(drop=True, inplace=True)
    target = product_data.iloc[-8:]
    target.reset_index(drop=True, inplace=True)

    # Calc MAE
    result = (abs(target["Quantity"] - forecast["Quantity"])).mean()
    return result


In [None]:
# Apply mae calculation and store in a new column
product_info["naive_mae"] = product_info["ItemNo2"].apply(calc_mae_by_product_id, args=(data_id_time_target,))

In [32]:
product_info.head(20)

Unnamed: 0,ItemNo2,Color,Gender,Category,SUB Category,SUB Category2,Quality,Origin Country,naive_mae
0,F15404308_CLR000021,Black,WOMENSWEAR,Trousers,Trousers,Default,Woven,China,38.625
1,M00012003_CLR000508,White mel,MENSWEAR,Tops,Crew neck,Short sleeve,Jersey,Turkey,26.75
2,F19123672_CLR000021,Black,WOMENSWEAR,Shirts,Shirts,Short sleeve,Woven,China,65.75
3,F19123672_CLR000289,Clear Cream,WOMENSWEAR,Shirts,Shirts,Short sleeve,Woven,China,66.25
4,M00012307_CLR000650,Blue Iris Mel,MENSWEAR,Tops,V-neck,Short sleeve,Jersey,Turkey,1.375
5,F15301618_CLR000289,Clear Cream,WOMENSWEAR,Tops,Tank,Sleeveless,Jersey,China,11.875
6,F00013006_CLR999254,3173 BLUE STRIPE,WOMENSWEAR,Tops,Tank,Long sleeve,Jersey,Turkey,1.5
7,M20300010_161103TCX,Pure Cashmere,MENSWEAR,Tops,Crew neck,Short sleeve,Jersey,Turkey,28.375
8,F00012030_CLR000021,Black,WOMENSWEAR,Tops,V-neck,Short sleeve,Woven,China,4.625
9,M20300010_CLR000337,Grey mel,MENSWEAR,Tops,Crew neck,Short sleeve,Jersey,Turkey,59.875
