# Experimenting rec approaches

In [1]:
import duckdb
import torch
import torchrec

In [2]:
CUT_TIMESTAMP = '2017-01-01'  # Fetch games up to this day

In [3]:
duckdb_conn = duckdb.connect('../data/steam.duckdb', read_only=True)

In [4]:
games_df = duckdb_conn.sql(f"SELECT * FROM game_rolling_features WHERE DATE(game_review_day) <= '{CUT_TIMESTAMP}'").pl()

In [5]:
dim_games_df =duckdb_conn.sql(f"SELECT * FROM dim_games WHERE DATE(game_prerelease_date) <= '{CUT_TIMESTAMP}'").pl()

In [6]:
reviews_df = duckdb_conn.sql(f"SELECT * FROM fact_reviews WHERE DATE(timestamp_created) <= '{CUT_TIMESTAMP}'").pl()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [7]:
users_df = duckdb_conn.sql(f"SELECT * FROM dim_users WHERE DATE(first_review_timestamp) <= '{CUT_TIMESTAMP}'").pl()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [8]:
entity_count = duckdb_conn.sql(f"SELECT * FROM mart_entities_cumulative WHERE review_day = '{CUT_TIMESTAMP}'").pl()

In [9]:
duckdb_conn.close()

In [10]:
num_reviews = int(entity_count["cumulative_review_count"].first())
num_users = int(entity_count["cumulative_user_count"].first())
num_games = int(entity_count["cumulative_game_count"].first())

In [13]:
ec = torchrec.EmbeddingCollection(
    device="cpu",
    tables=[
        torchrec.EmbeddingConfig(name="user_index", embedding_dim=128, num_embeddings=num_users + 1),
        torchrec.EmbeddingConfig(name="game_index", embedding_dim=128, num_embeddings=num_games + 1),
    ]
)

In [14]:
ec

EmbeddingCollection(
  (embeddings): ModuleDict(
    (user_index): Embedding(1840558, 128)
    (game_index): Embedding(9099, 128)
  )
)

In [15]:
features = torchrec.KeyedJaggedTensor.from_jt_dict(
    {
        "user_index": torchrec.JaggedTensor(values=torch.tensor([1, 2, 3]), lengths=torch.tensor([1, 1, 1])),
        "game_index": torchrec.JaggedTensor(values=torch.tensor([1, 2, 3]), lengths=torch.tensor([1, 1, 1])),
    }
)

In [16]:
print(features.values())

tensor([1, 2, 3, 1, 2, 3])


In [17]:
embeddings = ec(features)

torch.Tensor

(torch.Size([3, 128]), torch.Size([3, 128]))