In [20]:
import pandas as pd
import torch
from torch_frame import TensorFrame, stype
from torch_frame.nn import (
    StypeWiseFeatureEncoder,
    EmbeddingEncoder,
    LinearBucketEncoder,
)
from torch_frame.data import Dataset
from torch.nn import LayerNorm

Let's start by creating initial embeddings for our top 150 players using Torch Frame

In [None]:
# load our players data in from the CSV
players = pd.read_csv("../data/player_features.csv")

In [21]:
# channels controls the dimension size each column will have for our player rows after encoding
channels = 128

# set the stypes for each column in our data
col_to_stype = {
    "player_id": stype.numerical,
    "current_rank": stype.numerical,
    "dob": stype.numerical,
    "height": stype.numerical,
    "country_num": stype.categorical
}

# 2) Build a Dataset and materialize it -> computes col_stats and a TensorFrame
ds = Dataset(df=players, col_to_stype=col_to_stype).materialize()
tf_players = ds.tensor_frame
col_stats  = ds.col_stats
col_names_dict = tf_players.col_names_dict

# 3) Create the stype-wise encoder with the computed stats
stype_encoder_dict = {
    stype.categorical: EmbeddingEncoder(),
    stype.numerical:  LinearBucketEncoder(post_module=LayerNorm(channels)),
}

encoder = StypeWiseFeatureEncoder(
    out_channels=channels,
    col_stats=col_stats,
    col_names_dict=col_names_dict,
    stype_encoder_dict=stype_encoder_dict,
)

# 4) Encode
x, _meta = encoder(tf_players)  # x: [batch, num_cols, channels]