In [None]:
import numpy as np
import torch
import networkx as nx
from karateclub import Role2Vec

def build_role2vec_embeddings(train_pos_mask, pair_circ_names, pair_mir_names, emb_dim_fallback=16):
    train_edges = [(pair_mir_names[i], pair_circ_names[i]) for i, is_pos in enumerate(train_pos_mask) if is_pos]
    if len(train_edges) == 0:
        return {}, torch.empty((2,0), dtype=torch.long), emb_dim_fallback
    G = nx.Graph()
    G.add_edges_from(train_edges)
    name_to_int = {n:i for i,n in enumerate(G.nodes())}
    G_int = nx.relabel_nodes(G, name_to_int, copy=True)
    r2v = Role2Vec()
    r2v.fit(G_int)
    emb = r2v.get_embedding()
    emb_dim = emb.shape[1]
    emb_dict = {n: emb[name_to_int[n]] for n in G.nodes()}
    edge_index_np = np.array([[u, v] for u, v in G_int.edges()], dtype=np.int64).T
    edge_index = torch.tensor(edge_index_np, dtype=torch.long) if edge_index_np.size else torch.empty((2,0), dtype=torch.long)
    return emb_dict, edge_index, emb_dim

def build_enhanced_X_from_embeddings(circ_feats, mir_feats, circ_names, mir_names, emb_dict, emb_dim, pair_circ_names, pair_mir_names):
    circ_embs = np.zeros((len(circ_names), emb_dim), dtype=np.float32)
    mir_embs = np.zeros((len(mir_names), emb_dim), dtype=np.float32)
    for i, n in enumerate(circ_names):
        circ_embs[i] = emb_dict.get(n, np.zeros(emb_dim))
    for i, n in enumerate(mir_names):
        mir_embs[i] = emb_dict.get(n, np.zeros(emb_dim))
    circ_idx_map = {n: i for i, n in enumerate(circ_names)}
    mir_idx_map = {n: i for i, n in enumerate(mir_names)}
    X = np.zeros((len(pair_circ_names), circ_feats.shape[1] + mir_feats.shape[1] + 2 * emb_dim), dtype=np.float32)
    for i, (circ_n, mir_n) in enumerate(zip(pair_circ_names, pair_mir_names)):
        circ_idx = circ_idx_map[circ_n]
        mir_idx = mir_idx_map[mir_n]
        X[i] = np.concatenate([
            mir_feats[mir_idx], mir_embs[mir_idx],
            circ_feats[circ_idx], circ_embs[circ_idx]
        ])
    return X
