In [1]:
import pickle
import sklearn
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch
from asif import ASIF, compute_embedding

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda')

chord_tokenizer = AutoTokenizer.from_pretrained("jammai/chocolm-modernbert-base-transposed")
chord_model = AutoModel.from_pretrained("jammai/chocolm-modernbert-base-transposed")
chord_model.to(device)

text_tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
text_model = AutoModel.from_pretrained("answerdotai/ModernBERT-base")
text_model.to(device)

You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.


ModernBertModel(
  (embeddings): ModernBertEmbeddings(
    (tok_embeddings): Embedding(50368, 768, padding_idx=50283)
    (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (drop): Dropout(p=0.0, inplace=False)
  )
  (layers): ModuleList(
    (0): ModernBertEncoderLayer(
      (attn_norm): Identity()
      (attn): ModernBertAttention(
        (Wqkv): Linear(in_features=768, out_features=2304, bias=False)
        (rotary_emb): ModernBertUnpaddedRotaryEmbedding(dim=64, base=160000.0, scale_base=None)
        (Wo): Linear(in_features=768, out_features=768, bias=False)
        (out_drop): Identity()
      )
      (mlp_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): ModernBertMLP(
        (Wi): Linear(in_features=768, out_features=2304, bias=False)
        (act): GELUActivation()
        (drop): Dropout(p=0.0, inplace=False)
        (Wo): Linear(in_features=1152, out_features=768, bias=False)
      )
    )
    (1-2): 2 x ModernBertEncoderLayer(
     

In [3]:
kmeans_text = pickle.load(open("kmeans_text.pkl", "rb"))
kmeans_chords = pickle.load(open("kmeans_chords.pkl", "rb"))
chords_lyrics = pickle.load(open("chords_lyrics.pkl", "rb"))

chords = [c["chords"] for c in chords_lyrics]
lyrics = [c["lyrics"] for c in chords_lyrics]

In [4]:
def extract_candidate_sets_from_clusters(kmeans, items):
    candidates_text = {cluster_id : set() for cluster_id in range(kmeans.cluster_centers_.shape[0])}
    for idx, i in enumerate(kmeans.labels_):
        candidates_text[i].add(items[idx])
    return {cluster_id : list(candidates_text[cluster_id]) for cluster_id in range(kmeans.cluster_centers_.shape[0])}

In [5]:
text_candidates = extract_candidate_sets_from_clusters(kmeans_text, lyrics)
chords_candidates = extract_candidate_sets_from_clusters(kmeans_chords, chords)

In [6]:
text_candidates[0][0]

'Num faltá moça bonita '

In [7]:
asif = ASIF(
    text_candidates,
    chords_candidates,
    kmeans_text.cluster_centers_,
    kmeans_chords.cluster_centers_,
    "text_embeddings_ls_stacked.pkl",
    "chord_embeddings_ls_stacked.pkl",
    candidate_embeddings1_rc = "relative_coordinates_text_candidate.pkl",
    candidate_embeddings2_rc = "relative_coordinates_chords_candidate.pkl"
)

In [8]:
with torch.no_grad():
    embedding_to_classify =  compute_embedding(text_tokenizer, text_model, text_candidates[0][0])

In [9]:
to_classify = np.array([embedding_to_classify[0].numpy()])
print(to_classify.shape)

(1, 768)


In [11]:
relative = asif.compute_vs_space1(to_classify)

100%|██████████| 1/1 [00:08<00:00,  8.21s/it]


In [16]:
relative.max()

np.float64(0.035355339059327376)

In [24]:
asif.candidate_embeddings1_rc.shape

(300, 3517221)

In [27]:
sim = (1 / (1 + sklearn.metrics.pairwise_distances(relative, asif.candidate_embeddings1_rc, metric="l2")))

In [32]:
sim.argmax()

np.int64(0)