In [1]:
%load_ext autoreload
%autoreload 2

import IPython
from pathlib import Path
import os
locals = IPython.extract_module_locals() # type: ignore
notebook_name = "/".join(locals[1]["__vsc_ipynb_file__"].split("/"))
os.chdir(Path(notebook_name).parent.parent)

# 1. **Process `H&M Personalized Fashion Recommendations` data**

- Based on `base` data create `intermediate` (processed) equivalents.
- Remove old data, leave relations from the interval `{01-01-2019}`-present. \
Explanation: This is due to computational limitations. Do the same with users and items non-existent in the relation set.
- Split for Train/Valid relation sets with `{validation_ratio}` validation ratio. Remove users and items from the validation set which do not occur in the train set. \
Explanation: The used model (Matrix Factorization) doesn't do well for predicting links for new users/items. (Cold Start Problem). To simplify and make model converge, these entities have been filtered.
- Add path column to items with link to the images.
- Remap users and items. \
Explanation: This step makes retrieval of the corresponding tensor representations in the model easier.

In [10]:
dataset = "hm"
prefix = "sep_2020"

In [11]:
!python -m retail_recommender_system.scripts.process --dataset {dataset} --prefix {prefix}

In addition, using fork() with Python in general is a recipe for mysterious
deadlocks and crashes.

The most likely reason you are seeing this error is because you are using the
multiprocessing module on Linux, which uses fork() by default. This will be
fixed in Python 3.14. Until then, you want to use the "spawn" context instead.

See https://docs.pola.rs/user-guide/misc/multiprocessing/ for details.

or by setting POLARS_ALLOW_FORKING_THREAD=1.

  pid, fd = os.forkpty()


2025-01-12 16:07:54,348 - retail_recommender_system.utils - INFO - Setting seed to 0
{'relations': (shape: (532_146, 7)
┌───────────┬───────────┬──────────┬──────────┬──────────┬──────────┬──────────┐
│ t_dat     ┆ customer_ ┆ article_ ┆ price    ┆ sales_ch ┆ customer ┆ article_ │
│ ---       ┆ id        ┆ id       ┆ ---      ┆ annel_id ┆ _id_map  ┆ id_map   │
│ date      ┆ ---       ┆ ---      ┆ f64      ┆ ---      ┆ ---      ┆ ---      │
│           ┆ str       ┆ i64      ┆          ┆ i64      ┆ u32      ┆ u32      │
╞═══════════╪═══════════╪══════════╪══════════╪══════════╪══════════╪══════════╡
│ 2020-09-0 ┆ 0001f8cef ┆ 82067100 ┆ 0.016932 ┆ 1        ┆ 5        ┆ 11616    │
│ 2         ┆ 6b9702d54 ┆ 1        ┆          ┆          ┆          ┆          │
│           ┆ abf66fd89 ┆          ┆          ┆          ┆          ┆          │
│           ┆ eb2…      ┆          ┆          ┆          ┆          ┆          │
│ 2020-09-0 ┆ 0006bfcec ┆ 88235400 ┆ 0.015237 ┆ 2        ┆ 12       ┆ 

# 2. **View results**

In [12]:
from retail_recommender_system.data.loader import load_dataset, DataConfig

dataset = load_dataset(DataConfig(dataset=dataset, prefix=prefix))
dataset.load()

In [13]:
train_relations, valid_relations = dataset.data['relations']
users = dataset.data['users']
items = dataset.data['items']

In [14]:
train_relations

t_dat,customer_id,article_id,price,sales_channel_id,customer_id_map,article_id_map
date,str,i64,f64,i64,u32,u32
2020-09-02,"""0001f8cef6b9702d54abf66fd89eb2…",820671001,0.016932,1,5,11616
2020-09-02,"""0006bfcec82c6c132276c0a5549ae1…",882354001,0.015237,2,12,19023
2020-09-02,"""0006bfcec82c6c132276c0a5549ae1…",817354001,0.025407,2,12,11319
2020-09-02,"""0006bfcec82c6c132276c0a5549ae1…",817354001,0.025407,2,12,11319
2020-09-02,"""000ed4cabda96069b547ae531c66de…",899002003,0.010153,1,26,20957
…,…,…,…,…,…,…
2020-09-16,"""7410e5896ef0fd378bd97c74c7f31f…",873679001,0.020322,1,63379,17810
2020-09-16,"""7410e5896ef0fd378bd97c74c7f31f…",915526001,0.027102,1,63379,22478
2020-09-16,"""7419d2e6af57a6c59447c28206aaff…",835704001,0.025407,2,63402,13056
2020-09-16,"""74234ed50fac1163140de754618c20…",875350003,0.016932,2,63418,18048


In [15]:
valid_relations

t_dat,customer_id,article_id,price,sales_channel_id,customer_id_map,article_id_map
date,str,i64,f64,i64,u32,u32
2020-09-16,"""742db48fc7d3bfc7e7a959dffdb32f…",935357001,0.135576,2,63435,23527
2020-09-16,"""74886ae5b37e63e0316a131a3bdff7…",899122001,0.084729,1,63624,20967
2020-09-16,"""7492bfb6b3413ad38759b4b37da975…",896342002,0.033881,1,63643,20679
2020-09-16,"""74a6ae65588e4da1f2cac7e540dd7f…",589017003,0.01761,2,63680,1943
2020-09-16,"""74aad783e43f1ad36f13eae7182b71…",906794001,0.033881,2,63687,21744
…,…,…,…,…,…,…
2020-09-22,"""ffd4cf2217de4a0a3f9f610cdec334…",856440002,0.042356,2,136427,15141
2020-09-22,"""fff2282977442e327b45d8c89afde2…",929511001,0.059305,2,136487,23339
2020-09-22,"""fff2282977442e327b45d8c89afde2…",891322004,0.042356,2,136487,20096
2020-09-22,"""fff4d3a8b1f3b60af93e78c30a7cb4…",833459002,0.006763,1,136495,12839


In [16]:
users

customer_id,FN,Active,club_member_status,fashion_news_frequency,age,postal_code,customer_id_map
str,f64,f64,str,str,i64,str,u32
"""00000dbacae5abe5e23885899a1fa4…",,,"""ACTIVE""","""NONE""",49,"""52043ee2162cf5aa7ee79974281641…",0
"""000058a12d5b43e67d225668fa1f8d…",,,"""ACTIVE""","""NONE""",24,"""64f17e6a330a85798e4998f62d0930…",1
"""0000757967448a6cb83efb3ea7a3fb…",,,"""ACTIVE""","""NONE""",20,"""fe7b8e2b3fafb89ca90db17ffeeae0…",2
"""000172a9c322560c849754ffbdfdb2…",,,"""ACTIVE""","""NONE""",45,"""4ca377c955c160866d5662b33aa1af…",3
"""0001d44dbe7f6c4b35200abdb052c7…",1.0,1.0,"""ACTIVE""","""Regularly""",44,"""930b19ae7db8abb5a27f4da1021775…",4
…,…,…,…,…,…,…,…
"""fffbdd2f8e59d45c0fb50a14b0ea55…",,,"""ACTIVE""","""NONE""",26,"""0406e31906c25e040240480fb578bf…",136509
"""fffd0248a95c2e49fee876ff93598e…",1.0,1.0,"""ACTIVE""","""Regularly""",20,"""eb7d84eea6e2e679ef924d79252ac6…",136510
"""fffef3b6b73545df065b521e19f64b…",1.0,1.0,"""ACTIVE""","""Regularly""",29,"""47258851e6f73dd2583ef4775814f9…",136511
"""ffffbbf78b6eaac697a8a5dfbfd2bf…",,,"""ACTIVE""","""NONE""",24,"""7aa399f7e669990daba2d92c577b52…",136512


In [17]:
items

article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,perceived_colour_value_id,perceived_colour_value_name,perceived_colour_master_id,perceived_colour_master_name,department_no,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc,path,article_id_map
i64,i64,str,i64,str,str,i64,str,i64,str,i64,str,i64,str,i64,str,str,str,i64,str,i64,str,i64,str,str,str,u32
108775044,108775,"""Strap top""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",10,"""White""",3,"""Light""",9,"""White""",1676,"""Jersey Basic""","""A""","""Ladieswear""",1,"""Ladieswear""",16,"""Womens Everyday Basics""",1002,"""Jersey Basic""","""Jersey top with narrow shoulde…",""".data/hm/base/images/010/01087…",0
111565001,111565,"""20 den 1p Stockings""",304,"""Underwear Tights""","""Socks & Tights""",1010016,"""Solid""",9,"""Black""",4,"""Dark""",5,"""Black""",3608,"""Tights basic""","""B""","""Lingeries/Tights""",1,"""Ladieswear""",62,"""Womens Nightwear, Socks & Tigh""",1021,"""Socks and Tights""","""Semi shiny nylon stockings wit…",""".data/hm/base/images/011/01115…",1
111586001,111586,"""Shape Up 30 den 1p Tights""",273,"""Leggings/Tights""","""Garment Lower body""",1010016,"""Solid""",9,"""Black""",4,"""Dark""",5,"""Black""",3608,"""Tights basic""","""B""","""Lingeries/Tights""",1,"""Ladieswear""",62,"""Womens Nightwear, Socks & Tigh""",1021,"""Socks and Tights""","""Tights with built-in support t…",""".data/hm/base/images/011/01115…",2
111593001,111593,"""Support 40 den 1p Tights""",304,"""Underwear Tights""","""Socks & Tights""",1010016,"""Solid""",9,"""Black""",4,"""Dark""",5,"""Black""",3608,"""Tights basic""","""B""","""Lingeries/Tights""",1,"""Ladieswear""",62,"""Womens Nightwear, Socks & Tigh""",1021,"""Socks and Tights""","""Semi shiny tights that shape t…",""".data/hm/base/images/011/01115…",3
111609001,111609,"""200 den 1p Tights""",304,"""Underwear Tights""","""Socks & Tights""",1010016,"""Solid""",9,"""Black""",4,"""Dark""",5,"""Black""",3608,"""Tights basic""","""B""","""Lingeries/Tights""",1,"""Ladieswear""",62,"""Womens Nightwear, Socks & Tigh""",1021,"""Socks and Tights""","""Opaque matt tights. 200 denier…",""".data/hm/base/images/011/01116…",4
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
952267001,952267,"""Heavy plain overknee tights 1p""",304,"""Underwear Tights""","""Socks & Tights""",1010013,"""Other pattern""",9,"""Black""",4,"""Dark""",5,"""Black""",3608,"""Tights basic""","""B""","""Lingeries/Tights""",1,"""Ladieswear""",62,"""Womens Nightwear, Socks & Tigh""",1021,"""Socks and Tights""","""Fine-knit tights with an elast…",""".data/hm/base/images/095/09522…",23702
952938001,952938,"""Elton top""",254,"""Top""","""Garment Upper body""",1010001,"""All over pattern""",13,"""Beige""",2,"""Medium Dusty""",1,"""Mole""",1641,"""Jersey""","""A""","""Ladieswear""",1,"""Ladieswear""",18,"""Womens Trend""",1005,"""Jersey Fancy""","""Fitted top in jersey with a ro…",""".data/hm/base/images/095/09529…",23703
953450001,953450,"""5pk regular Placement1""",302,"""Socks""","""Socks & Tights""",1010014,"""Placement print""",9,"""Black""",4,"""Dark""",5,"""Black""",7188,"""Socks Bin""","""F""","""Menswear""",3,"""Menswear""",26,"""Men Underwear""",1021,"""Socks and Tights""","""Socks in a fine-knit cotton bl…",""".data/hm/base/images/095/09534…",23704
953763001,953763,"""SPORT Malaga tank""",253,"""Vest top""","""Garment Upper body""",1010016,"""Solid""",9,"""Black""",4,"""Dark""",5,"""Black""",1919,"""Jersey""","""A""","""Ladieswear""",1,"""Ladieswear""",2,"""H&M+""",1005,"""Jersey Fancy""","""Loose-fitting sports vest top …",""".data/hm/base/images/095/09537…",23705
