# 1a Articles dynamic features


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import logging
import pandas as pd

sys.path.append("..")

from src.utils.core_utils import setup_logging

In [3]:
# Initialize logging
file_log = "articles_dynamic_features.log"
root_logger = setup_logging(level=logging.DEBUG, log_file=file_log, remove_existing=True)

2025-06-03 12:07:19,881 - src.utils.core_utils - INFO - Creating file handler with level: 10
2025-06-03 12:07:19,882 - src.utils.core_utils - DEBUG - Logging setup complete to articles_dynamic_features.log


In [4]:
from src.features_articles import ArticleDynamicFeaturePipeline, ArticleDynamicFeaturePipelineConfig
from src.features_articles import load_optimized_raw_data

  from .autonotebook import tqdm as notebook_tqdm


# Run pipeline


In [5]:
articles_dynamic_features_config = ArticleDynamicFeaturePipelineConfig.create_default()

In [6]:
articles_dynamic_feature_pipeline = ArticleDynamicFeaturePipeline(articles_dynamic_features_config)

In [7]:
articles_dynamic_feature_pipeline.setup()

2025-06-03 12:07:24,796 - src.features_articles - INFO - Setting up ArticleDynamicFeaturePipeline
2025-06-03 12:07:24,797 - src.features_articles - DEBUG - Config: {
  "config_processor": {
    "encoding_strategy": "ordinal",
    "categorical_features": [],
    "numerical_features": [
      "weekly_sales_count",
      "weekly_avg_price",
      "cumulative_mean_age",
      "cumulative_sales_count"
    ],
    "one_hot_features": [],
    "start_week_num": 52,
    "end_week_num": 104
  },
  "subsample": 0.05,
  "seed": 42
}


<src.features_articles.ArticleDynamicFeaturePipeline at 0x10d04aed0>

In [8]:
results_articles_dynamic = articles_dynamic_feature_pipeline.run()

2025-06-03 12:07:24,818 - src.features_articles - INFO - Loading data for ArticleDynamicFeaturePipeline
2025-06-03 12:07:24,818 - src.feature_extraction - INFO - Loading optimized raw data from transactions train 0.05 42
2025-06-03 12:07:24,818 - src.feature_extraction - DEBUG - Loading data from ../data/transactions_train_sample_0.05_42.parquet
2025-06-03 12:07:24,947 - src.feature_extraction - INFO - Loading optimized raw data from articles train 0.05 42
2025-06-03 12:07:24,948 - src.feature_extraction - DEBUG - Loading data from ../data/articles_sample_0.05_42.parquet
2025-06-03 12:07:24,988 - src.feature_extraction - INFO - Loading optimized raw data from customers train 0.05 42
2025-06-03 12:07:24,989 - src.feature_extraction - DEBUG - Loading data from ../data/customers_sample_0.05_42.parquet
2025-06-03 12:07:25,004 - src.features_articles - INFO - Processing article dynamic features
2025-06-03 12:07:25,041 - src.features_articles - INFO - Generating a cross join of articles and 

# Checks


In [9]:
from src.utils.data_checks import test_feature_article_dynamic_feature_pipeline, test_feature_article_dynamic_feature

In [10]:
print(results_articles_dynamic.data.shape)
print(results_articles_dynamic.data.columns)
print(results_articles_dynamic.data.isnull().sum())
display(results_articles_dynamic.data.head())

(4101246, 6)
Index(['article_id', 'week_num', 'weekly_sales_count', 'weekly_avg_price',
       'cumulative_mean_age', 'cumulative_sales_count'],
      dtype='object')
article_id                0
week_num                  0
weekly_sales_count        0
weekly_avg_price          0
cumulative_mean_age       0
cumulative_sales_count    0
dtype: int64


Unnamed: 0,article_id,week_num,weekly_sales_count,weekly_avg_price,cumulative_mean_age,cumulative_sales_count
0,108775015,52,0.0,0.033479,32.0,0.0
1,108775015,53,0.0,0.033179,32.0,0.0
2,108775015,54,0.0,0.032718,32.0,0.0
3,108775015,55,0.0,0.032448,32.0,0.0
4,108775015,56,0.0,0.032142,32.0,0.0


In [11]:
# Load raw data
articles = load_optimized_raw_data(data_type="articles", subsample=0.05, seed=42)
transactions_train = load_optimized_raw_data(data_type="transactions", sample="train", subsample=0.05, seed=42)
transactions_valid = load_optimized_raw_data(data_type="transactions", sample="valid", subsample=0.05, seed=42)
transactions_test = load_optimized_raw_data(data_type="transactions", sample="test", subsample=0.05, seed=42)
customers = load_optimized_raw_data(data_type="customers", subsample=0.05, seed=42)
transactions = pd.concat([transactions_train, transactions_valid, transactions_test], axis=0, ignore_index=True)

2025-06-03 12:07:26,985 - src.feature_extraction - INFO - Loading optimized raw data from articles train 0.05 42
2025-06-03 12:07:26,986 - src.feature_extraction - DEBUG - Loading data from ../data/articles_sample_0.05_42.parquet
2025-06-03 12:07:27,021 - src.feature_extraction - INFO - Loading optimized raw data from transactions train 0.05 42
2025-06-03 12:07:27,022 - src.feature_extraction - DEBUG - Loading data from ../data/transactions_train_sample_0.05_42.parquet
2025-06-03 12:07:27,040 - src.feature_extraction - INFO - Loading optimized raw data from transactions valid 0.05 42
2025-06-03 12:07:27,041 - src.feature_extraction - DEBUG - Loading data from ../data/transactions_valid_sample_0.05_42.parquet
2025-06-03 12:07:27,044 - src.feature_extraction - INFO - Loading optimized raw data from transactions test 0.05 42
2025-06-03 12:07:27,044 - src.feature_extraction - DEBUG - Loading data from ../data/transactions_test_sample_0.05_42.parquet
2025-06-03 12:07:27,046 - src.feature_ex

In [12]:
test_feature_article_dynamic_feature_pipeline(
    results_articles_dynamic=results_articles_dynamic,
    articles=articles,
    pipeline_config=articles_dynamic_features_config,
)

Metadata checks passed


In [13]:
test_feature_article_dynamic_feature(
    results_articles_dynamic=results_articles_dynamic,
    transactions=transactions,
    customers=customers,
    pipeline_config=articles_dynamic_features_config,
    article_ids=results_articles_dynamic.data.article_id.unique()[:5],
)

Article ID 108775015
Week number 62
Numbers matched for week 62
Week number 63
Numbers matched for week 63
Week number 66
Numbers matched for week 66
Week number 73
Numbers matched for week 73
--------------------------------------------------------------------------------
Article ID 108775044
Week number 53
Numbers matched for week 53
Week number 55
Numbers matched for week 55
Week number 60
Numbers matched for week 60
Week number 68
Numbers matched for week 68
Week number 82
Numbers matched for week 82
Week number 83
Numbers matched for week 83
Week number 88
Numbers matched for week 88
Week number 91
Numbers matched for week 91
Week number 93
Numbers matched for week 93
--------------------------------------------------------------------------------
Article ID 108775051
--------------------------------------------------------------------------------
Article ID 110065001
Week number 53
Numbers matched for week 53
Week number 54
Numbers matched for week 54
Week number 55
Numbers match