In [None]:
import os
import hopsworks
import polars as pl

In [None]:
with open('data/hopsworks-api-key.txt', 'r') as file:
    os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()
    
project = hopsworks.login()

## Get data

In [None]:
from data_collection_preprocessing.load_data import pipeline

In [None]:
raw_df = pipeline(no_of_hits=2000) # 2000 is the max

## Feature processing

In [None]:
from data_collection_preprocessing.embeddings import embed_text
from data_collection_preprocessing.load_data import item_condition_to_ordinal

In [None]:
# Represent designers_title as an embedding
df = raw_df.with_columns(
    pl.col('designers_title')
    .map_elements(embed_text, return_dtype=pl.List(pl.Float32))
    .alias('designers_title_embedding')
)

In [None]:
# Convert condition to ordinal number
df = df.with_columns(
    pl.col('condition')
    .map_elements(item_condition_to_ordinal, return_dtype=pl.UInt32)
    .alias('condition_ordinal')
)

# Drop the old condition column
df = df.drop(['condition'])

In [None]:
df

## Save data

In [None]:
fs = project.get_feature_store() 

In [None]:
grailed_items_fg = fs.get_or_create_feature_group(
    name='grailed_items',
    description='Sold Grailed items',
    version=2,
    primary_key=['id'],
    event_time="sold_at",
    # expectation_suite=aq_expectation_suite
)

In [None]:
grailed_items_fg.insert(df)