In [None]:
!pip install "transformers>=4.47.1,<5.0.0"
!pip install "flash-attn>=2.6.3,<2.8" --no-build-isolation

In [20]:
from tqdm.auto import tqdm
from safetensors.torch import save_file, load_file
import torch
import os
import torch
from transformers import AutoModel, AutoProcessor, Qwen3VLForConditionalGeneration

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[INFO] Using device: {DEVICE}")

[INFO] Using device: cuda


In [21]:
import pandas as pd

def get_dataset(path="/content/megaGymDataset.csv"):
  df = pd.read_csv(path)

  df = df.drop(columns=["Unnamed: 0"], errors="ignore")

  df = df.fillna("")

  df = df.astype(str)

  columns = df.columns.tolist()

  df["data"] = df.apply(
      lambda row: (
          f"Exercise: {row['Title']}. \n"
          f"Description: {row['Desc']}. \n"
          f"Type: {row['Type']}. \n"
          f"Target Muscle: {row['BodyPart']}. \n"
          f"Equipment: {row['Equipment']}. \n"
          f"Level: {row['Level']}."
      ),
      axis=1
  )

  dataset = df[["data"]]
  return dataset

In [22]:
# Note: The extra commit hashes are required to run the models without flash_attention_2
EMBED_MODEL_PATH = "nvidia/llama-nemotron-embed-vl-1b-v2"
EMBED_COMMIT_HASH = "5b5ca69c35bf6ec1484d2d5ff238626e67a745e2"

# Load Embedding Model
embed_model = AutoModel.from_pretrained(
    EMBED_MODEL_PATH,
    revision=EMBED_COMMIT_HASH,
    dtype=torch.bfloat16,
    trust_remote_code=True,
    attn_implementation="sdpa",
    device_map="auto",
).eval()

embed_model.processor.p_max_length = 8192
embed_model.processor.max_input_tiles = 6
embed_model.processor.use_thumbnail = True


# Load Qwen3-VL-2B model and processor
GENERATION_MODEL_ID = "Qwen/Qwen3-VL-2B-Instruct"

qwen_model = Qwen3VLForConditionalGeneration.from_pretrained(
    GENERATION_MODEL_ID,
    dtype="auto",
    device_map="auto"
)

qwen_processor = AutoProcessor.from_pretrained(GENERATION_MODEL_ID)

print(f"[INFO] Loaded model: {GENERATION_MODEL_ID}")
print(f"[INFO] Device: {qwen_model.device}")

processing_llama_nemotron_vl.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nvidia/llama-nemotron-embed-vl-1b-v2:
- processing_llama_nemotron_vl.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


[INFO] Loaded model: Qwen/Qwen3-VL-2B-Instruct
[INFO] Device: cuda:0


In [23]:
def get_text_embedding(df):
  BATCH_SIZE = 8  # keep consistent
  PATH_TO_EMBEDDING_FILE="/content/text_embedding.safetensors"

  if os.path.exists(PATH_TO_EMBEDDING_FILE):
    text_embeddings = load_file(PATH_TO_EMBEDDING_FILE)
    text_embeddings = text_embeddings["text_embeddings"].to(DEVICE)
  else:
    text_embeddings = []

    for i in tqdm(range(0, len(df), BATCH_SIZE), desc="Embedding text"):
        dataset_chunk = df.iloc[i:i+BATCH_SIZE]
        texts_to_embed = dataset_chunk["data"].tolist()

        with torch.inference_mode():
            text_embed_chunk = embed_model.encode_documents(texts=texts_to_embed)

        text_embeddings.append(text_embed_chunk)

    text_embeddings = torch.cat(text_embeddings, dim=0)
    save_file({"text_embeddings": text_embeddings}, PATH_TO_EMBEDDING_FILE)

  return text_embeddings


In [24]:
def _l2_normalize(x: torch.Tensor, eps: float = 1e-12) -> torch.Tensor:
    return x / (x.norm(p=2, dim=-1, keepdim=True) + eps)

In [25]:
def match_query_to_embeddings(query: str,
                              target_embeddings_to_match: torch.Tensor,
                              top_k: int = 100) -> tuple[torch.Tensor, torch.Tensor]:

    with torch.inference_mode():
      query_embeddings = embed_model.encode_queries([query])

    # Compute cosine similarity (use cosine for normalized tensors)
    cos_sim = _l2_normalize(query_embeddings) @ _l2_normalize(target_embeddings_to_match).T

    # Flatten logits the 1D array (handle both [batch_size] and [batch_size, 1] shapes)
    cos_sim_flat = cos_sim.flatten()

    # Sort the indices
    sorted_indices = torch.argsort(cos_sim_flat, descending=True)[:top_k]

    # Get the top scores (sort by the top indicies)
    sorted_scores = cos_sim_flat[sorted_indices][:top_k]

    return sorted_scores, sorted_indices

In [26]:
def generate_workout_summary(
    workout_texts: list[str],
    model: Qwen3VLForConditionalGeneration = qwen_model,
    processor: AutoProcessor = qwen_processor,
    max_new_tokens: int = 512
  ) -> str:
  workout_combined = ""
  for i, workout in enumerate(workout_texts[:3]):
    workout_combined += f"\n \n--- Exercise {i+1} ---\n{workout}"

  prompt = f"""Your a helpful workout trainer. Below are the {len(workout_texts[:3])} workouts.
  Please provide a brief markdown summary with:
- A short 1-2 sentence overview of each workout
- Key training method highlighted
- Estimated difficulty (Beignner/Intermediate/Advanced)
- Which workout might be best for a good workout session
For example use the following format:

```markdown
# Workout summary

## <workout_name>

[details]

## <workout_name>

[details]

## <workout_name>

[details]
```

Keep the summary concise and well-formatted in markdown. Return in ```markdown``` tags so it can be easily parsed.

<workouts>
{workout_combined}
</recipes>

## Summary:
  """

  messages = [
      {
          "role": "user",
          "content": [
              {"type": "text", "text": prompt}
          ]
      }
  ]

  inputs = processor.apply_chat_template(
      messages,
      tokenize=True,
      add_generation_prompt=True,
      return_dict=True,
      return_tensors="pt"
  )
  inputs = inputs.to(model.device)

  with torch.no_grad():
      generated_ids = model.generate(
          **inputs,
          max_new_tokens=max_new_tokens,
          do_sample=True,
          temperature=0.7,
          top_p=0.9
      )

  generated_ids_trimmed = [
       out_ids[len(in_ids):]
        for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
  ]

  output_text = processor.batch_decode(
      generated_ids_trimmed,
      skip_special_tokens=True,
      clean_up_tokenization_spaces=False
  )[0]

  return output_text.strip()



In [None]:
#pass the csv path here: get_dataset('./megaGymDataset.csv')
dataset = get_dataset()
query = "beginner friendly abs workout"
text_embeddings = get_text_embedding(dataset)
result_sorted_scores, result_sorted_indices = match_query_to_embeddings(query=query,
                                                                        target_embeddings_to_match=text_embeddings,
                                                                        top_k=100)

workout_texts = [dataset.iloc[result_sorted_indices[i].item()]["data"]for i in range(3)]
result = generate_workout_summary(workout_texts)
print(result)

```markdown
# Workout summary

## Reverse Crunch

The reverse crunch targets the lower abdominals and is a beginner-friendly strength exercise performed on the floor or bench. It's ideal for building core stability and is suitable for intermediate lifters.

Key training method: Core strength training  
Estimated difficulty: Intermediate  
Best for: A good workout session when focused on lower abdominal development

## Ab Wheel Roll-Out

This exercise uses a wheel device to target the abdominal muscles, particularly the transverse abdominis. It's a strength-focused movement that enhances core endurance and stability. 

Key training method: Core strength training  
Estimated difficulty: Intermediate  
Best for: A good workout session when targeting midsection strength

## Ab Roller

The ab roller is a versatile tool for building core strength, especially for the lower abdomen. It offers a full range of motion and is great for building endurance. 

Key training method: Core strength train