# 1C Customers dynamic features


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import logging
import pandas as pd

sys.path.append("..")

from src.utils.core_utils import setup_logging

# Initialize logging
file_log = "customer_dynamic_feature.log"
root_logger = setup_logging(level=logging.DEBUG, log_file=file_log, remove_existing=True)

In [None]:
from src.feature_customers import CustomerDynamicFeaturePipelineConfig, CustomerDynamicFeaturePipeline

# Run pipeline


In [None]:
customer_dynamic_feature_config = CustomerDynamicFeaturePipelineConfig.create_default()

In [None]:
customer_dynamic_feature_pipeline = CustomerDynamicFeaturePipeline(customer_dynamic_feature_config)

In [None]:
customer_dynamic_feature_pipeline.setup()

In [None]:
results_customer_dynamic_feature = customer_dynamic_feature_pipeline.run()

# Checks


In [None]:
results_customer_dynamic_feature.data.info()
print(results_customer_dynamic_feature.data.shape)
display(results_customer_dynamic_feature.data.head())
print(results_customer_dynamic_feature.feature_names)

In [None]:
# Check the shape of the dataframe should match number of customers * number of weeks
print(results_customer_dynamic_feature.data.shape)
print(results_customer_dynamic_feature.data.customer_id.nunique())
print(results_customer_dynamic_feature.data.week_num.value_counts())

## Check that the avg embeddings make sense


In [None]:
import numpy as np

from src.feature_customers import CustomerDynamicFeatureProcessor
from src.feature_extraction import load_optimized_raw_data
from src.features_articles import ArticleEmbeddingResult, get_path_to_article_features
from src.utils.data_checks import test_feature_customer_avg_embedding, test_feature_customer_avg_embedding_pipeline

In [None]:
subsample = customer_dynamic_feature_config.subsample
seed = customer_dynamic_feature_config.seed
transactions_train = load_optimized_raw_data(data_type="transactions", sample="train", subsample=subsample, seed=seed)
transactions_valid = load_optimized_raw_data(data_type="transactions", sample="valid", subsample=subsample, seed=seed)
transactions_test = load_optimized_raw_data(data_type="transactions", sample="test", subsample=subsample, seed=seed)
path_article_embeddings = get_path_to_article_features(feature_type="embedding", subsample=1, seed=seed)
results_article_embeddings = ArticleEmbeddingResult.load(path_to_dir=path_article_embeddings)

In [None]:
transactions = pd.concat([transactions_train, transactions_valid, transactions_test], ignore_index=True)
del transactions_train, transactions_valid, transactions_test

In [None]:
# Test metadata of pipeline
test_feature_customer_avg_embedding_pipeline(
    results_customer_dynamic_feature,
    transactions,
    customer_dynamic_feature_config,
)

In [None]:
# Test accuracy of avg embeddings
for customer_id in results_customer_dynamic_feature.data.customer_id.unique()[:5]:
    test_feature_customer_avg_embedding(
        results_customer_dynamic_feature,
        transactions,
        results_article_embeddings,
        customer_dynamic_feature_config,
        customer_id,
    )
    print("=" * 80)