In [None]:
# !pip install --upgrade uv
# !uv pip install --all-extras --system --requirement pyproject.toml

In [None]:
import sys, warnings
from pathlib import Path
root_dir = str(Path().absolute().parent)
if root_dir not in sys.path:
    print(f"Adding the following directory to the PYTHONPATH: {root_dir}")
    sys.path.append(root_dir)

In [10]:
import torch
import polars as pl
from pprint import pprint
from loguru import logger
from recsys.config import settings
from sentence_transformers import SentenceTransformer
from recsys.features.articles import (
                                    compute_features_articles,
                                    generate_embeddings_for_dataframe,
                                    )
from recsys.features.customers import DatasetSampler, compute_features_customers
from recsys.features.transactions import compute_features_transactions
from recsys.features.interaction import generate_interaction_data
from recsys.raw_data_sources import h_and_m as h_and_m_raw_data
from recsys.features.ranking import compute_ranking_dataset
from recsys.hopsworks_integration import feature_store
warnings.filterwarnings("ignore")

### Parameters

In [None]:
pprint(dict(settings))

In [None]:
DatasetSampler.get_supported_sizes()

### Connecting to Hopswork

In [None]:
project, fs = feature_store.get_feature_store()

In [None]:
project

In [None]:
fs

### Analyze H&M Data

In [None]:
articles_df = h_and_m_raw_data.extract_articles_df()
articles_df.shape

In [None]:
articles_df.head(3)

In [None]:
articles_df.null_count()

### Feature Engineering

In [None]:
articles_df = compute_features_articles(articles_df)
articles_df.shape

In [None]:
articles_df.head(3)

In [None]:
for i, desc in enumerate(articles_df["article_description"].head(n=3)):
    logger.info(f"Item {i+1}:\n{desc}")

In [None]:
device = (
        "cuda"
        if torch.cuda.is_available()
        else "mps"
        if torch.backends.mps.is_available()
        else "cpu"
        )
logger.info(
        f"Loading '{settings.FEATURES_EMBEDDING_MODEL_ID}' embedding model to {device=}"
        )

# Load the embedding model from SentenceTransformer's model registry.
embed_model = SentenceTransformer(settings.FEATURES_EMBEDDING_MODEL_ID, device=device)

In [None]:
articles_df = generate_embeddings_for_dataframe(
                                                articles_df, "article_description", 
                                                embed_model, 
                                                batch_size=128
                                                )  # Reduce batch size if getting OOM errors.
articles_df[["article_description", "embeddings"]].head(3)

In [None]:
articles_df["image_url"][0]