In [5]:
# Cell 1: Imports and Paths
from pathlib import Path
import pandas as pd
import numpy as np
import networkx as nx
from node2vec import Node2Vec

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
data_dir     = Path("data")
parquet_path = data_dir / "lol_champions_data.parquet"
out_graph    = data_dir / "graph_embeddings.npy"
out_meta     = data_dir / "meta_embeddings.npy"
out_names    = data_dir / "champion_names.npy"

In [8]:
df = pd.read_parquet(parquet_path)
names = df["name"].tolist()

In [9]:
G = nx.Graph()
G.add_nodes_from(names)
name_set = set(names)

for row in df.itertuples():
    src = row.name
    for nbr in (row.related_champions or []):
        if nbr in name_set:
            G.add_edge(src, nbr)

In [10]:
node2vec = Node2Vec(
    G,
    dimensions=128,
    walk_length=30,
    num_walks=100,
    workers=4,
    seed=42
)
n2v_model = node2vec.fit(window=10, min_count=1, batch_words=4)

Computing transition probabilities: 100%|██████████| 170/170 [00:00<00:00, 87231.67it/s]
Generating walks (CPU: 1): 100%|██████████| 25/25 [00:00<00:00, 3673.67it/s]
Generating walks (CPU: 3): 100%|██████████| 25/25 [00:00<00:00, 3621.40it/s]
Generating walks (CPU: 2): 100%|██████████| 25/25 [00:00<00:00, 3670.46it/s]
Generating walks (CPU: 4): 100%|██████████| 25/25 [00:00<00:00, 3549.08it/s]


In [11]:
graph_embs = np.vstack([n2v_model.wv[name] for name in names])  # (N, 128)

meta_df    = pd.get_dummies(df[["region", "role", "race"]].fillna("Unknown"))
meta_embs  = meta_df.values   

In [12]:
np.save(out_graph, graph_embs)
np.save(out_meta, meta_embs)
np.save(out_names, np.array(names))

In [13]:
print("Graph embeddings shape   :", graph_embs.shape)
print("Metadata embeddings shape:", meta_embs.shape)
print("Champion names saved to   :", out_names)

Graph embeddings shape   : (170, 128)
Metadata embeddings shape: (170, 24)
Champion names saved to   : data/champion_names.npy


# For region, role, race

In [14]:
import pandas as pd
from pathlib import Path

# Load your DataFrame
data_dir = Path("data")
df = pd.read_parquet(data_dir / "lol_champions_data.parquet")

# Option A: all columns
print("Unique counts for every column:")
print(df.nunique())

# Option B: just the categoricals you care about
for col in ["region", "role", "race", "related_champions"]:
    # if it's a list-column, count unique list-values by converting to tuple
    if df[col].dtype == object and isinstance(df[col].dropna().iloc[0], list):
        uniq = df[col].dropna().apply(tuple).nunique()
    else:
        uniq = df[col].nunique(dropna=True)
    print(f"{col:20s} → {uniq}")

Unique counts for every column:
name                 170
region                14
role                   6
race                   2
quote                168
related_champions    164
short_bio            168
full_biography       167
full_story           142
url                  170
bio_url              170
story_url            170
dtype: int64
region               → 14
role                 → 6
race                 → 2
related_champions    → 164


In [16]:
import torch
import torch.nn as nn

In [19]:
# Choose embedding sizes
region_dim, role_dim, race_dim = 8, 5, 3

# Build vocab lists
region_list = sorted(df["region"].fillna("Unknown").unique())
role_list   = sorted(df["role"].fillna("Unknown").unique())
race_list   = sorted(df["race"].fillna("Unknown").unique())

# Create mapping dicts
region2idx = {v:i for i,v in enumerate(region_list)}
role2idx   = {v:i for i,v in enumerate(role_list)}
race2idx   = {v:i for i,v in enumerate(race_list)}

In [20]:
# We’ll keep these on CPU
region_embed = nn.Embedding(len(region_list), region_dim)
role_embed   = nn.Embedding(len(role_list),   role_dim)
race_embed   = nn.Embedding(len(race_list),   race_dim)

In [21]:
# Convert DataFrame values to index tensors
# Map each row’s category to an index
region_idx = torch.tensor([region2idx[x] for x in df["region"].fillna("Unknown")], dtype=torch.long)
role_idx   = torch.tensor([role2idx[x]   for x in df["role"].fillna("Unknown")],   dtype=torch.long)
race_idx   = torch.tensor([race2idx[x]   for x in df["race"].fillna("Unknown")],   dtype=torch.long)

In [23]:
# Lookup embeddings & build final meta matrix
region_embs = region_embed(region_idx)  # (N, region_dim)
role_embs   = role_embed(role_idx)      # (N, role_dim)
race_embs   = race_embed(race_idx)      # (N, race_dim)

# Concatenate and detach
meta_learned = torch.cat([region_embs, role_embs, race_embs], dim=1)  # (N, total_dim)
meta_learned = meta_learned.detach().cpu().numpy()                    # now safe to numpy()

print("Learned meta embeddings shape:", meta_learned.shape)

Learned meta embeddings shape: (170, 16)


In [24]:
# Save learned metadata embeddings
np.save(data_dir / "meta_learned_embeddings.npy", meta_learned)
print("Saved learned meta embeddings shape:", meta_learned.shape)  # (N, 64)

Saved learned meta embeddings shape: (170, 16)
