In [1]:
# import csv

# # Load the CSV file and format the data
# def process_exercise_csv(csv_path: str) -> list[dict]:
#     """
#     Processes the CSV file to create a structured format similar to the exercise format.

#     Parameters:
#         csv_path (str): The file path to the CSV file.

#     Returns:
#         list[dict]: A list of dictionaries with structured data for each exercise.
#     """
#     exercises = []

#     with open(csv_path, 'r') as file:
#         reader = csv.DictReader(file)

#         # Process each row and format the content
#         for row in reader:
#             exercise_data = {
#                 "exercise": {
#                     "name": row.get("Exercise Name", "N/A"),
#                     "equipment": row.get("Equipment", "N/A"),
#                     "type": row.get("Utility", "N/A"),
#                     "movement_type": row.get("Mechanics", "N/A"),
#                     "force_type": row.get("Force", "N/A"),
#                     "difficulty": int(row.get("Difficulty (1-5)", 0))
#                 },
#                 "execution": {
#                     "setup": row.get("Preparation", "N/A"),
#                     "instructions": row.get("Execution", "N/A")
#                 },
#                 "muscles": {
#                     "main_muscle": row.get("Main_muscle", "N/A"),
#                     "target_muscles": row.get("Target_Muscles", "").split(", "),
#                     "stabilizer_muscles": row.get("Stabilizer_Muscles", "").split(", ")
#                 },
#                 "workout_parameters": {
#                     "sets": int(row.get("No_of_Sets", 0)),
#                     "reps": int(row.get("No_of_Reps", 0)),
#                     "bmi_for_exercise": float(row.get("BMI", 0))
#                 },
#                 "goals": {
#                     "target_audience": "Muscle Gain",  # Assuming this is a constant, can be customized
#                     "difficulty_level": "Intermediate" if int(row.get("Difficulty (1-5)", 0)) > 2 else "Beginner",
#                     "body_type": "Ectomorph"  # Example constant, can be adjusted based on context
#                 }
#             }

#             # Append each exercise's structured data
#             exercises.append(exercise_data)

#     return exercises

# # Example usage:
# csv_path = "updated_gym_exercise_dataset_men_musclegain.csv"  # Replace with your actual CSV file path
# structured_exercises = process_exercise_csv(csv_path)

# # Display the first two exercises' structured content as an example
# for exercise in structured_exercises[:2]:
#     print(exercise)
import csv

# Load the CSV file and format the data
def process_exercise_csv(csv_path: str) -> list[dict]:
    """
    Processes the CSV file to create a structured format similar to the exercise format,
    including gender and type_of_plan (Muscle Gain, Weight Gain, Weight Loss).

    Parameters:
        csv_path (str): The file path to the CSV file.

    Returns:
        list[dict]: A list of dictionaries with structured data for each exercise.
    """
    exercises = []

    with open(csv_path, 'r') as file:
        reader = csv.DictReader(file)

        # Process each row and format the content
        for row in reader:
            exercise_data = {
                "exercise": {
                    "name": row.get("Exercise Name", "N/A"),
                    "equipment": row.get("Equipment", "N/A"),
                    "type": row.get("Utility", "N/A"),
                    "movement_type": row.get("Mechanics", "N/A"),
                    "force_type": row.get("Force", "N/A"),
                    "difficulty": int(row.get("Difficulty (1-5)", 0))
                },
                "execution": {
                    "setup": row.get("Preparation", "N/A"),
                    "instructions": row.get("Execution", "N/A")
                },
                "muscles": {
                    "main_muscle": row.get("Main_muscle", "N/A"),
                    "target_muscles": row.get("Target_Muscles", "").split(", "),
                    "stabilizer_muscles": row.get("Stabilizer_Muscles", "").split(", ")
                },
                "workout_parameters": {
                    "sets": int(row.get("No_of_Sets", 0)),
                    "reps": int(row.get("No_of_Reps", 0)),
                    "bmi_for_exercise": float(row.get("BMI", 0))
                },
                "goals": {
                    "target_audience": row.get("type of plan", "N/A"),  # Muscle Gain, Weight Gain, Weight Loss
                    "difficulty_level": "Intermediate" if int(row.get("Difficulty (1-5)", 0)) > 2 else "Beginner",
                    # Example constant, can be adjusted based on context
                },
                "additional_info": {
                    "gender": row.get("gender", "N/A"),
                    "type_of_plan": row.get("type of plan", "N/A")
                }
            }

            # Append each exercise's structured data
            exercises.append(exercise_data)

    return exercises

# Example usage:
csv_path = "GYM_File.csv"  # Replace with your actual CSV file path
structured_exercises = process_exercise_csv(csv_path)

# Display the first two exercises' structured content as an example
for exercise in structured_exercises[:2]:
    print(exercise)



{'exercise': {'name': 'Neck Flexion', 'equipment': 'Cable', 'type': 'Basic or Auxiliary', 'movement_type': 'Isolated', 'force_type': 'Pull', 'difficulty': 2}, 'execution': {'setup': 'Sit on bench facing away from middle pulley. Place neck in harness cable attachment. Place arms on lower thighs for support.', 'instructions': 'Move head away from pulley by bending neck forward until chin touches upper chest. Return head by hyperextending neck and repeat.'}, 'muscles': {'main_muscle': 'Neck', 'target_muscles': ['Sternocleidomastoid,'], 'stabilizer_muscles': ['Rectus Abdominis', 'Obliques', '']}, 'workout_parameters': {'sets': 4, 'reps': 15, 'bmi_for_exercise': 20.28312872}, 'goals': {'target_audience': 'muscle gain', 'difficulty_level': 'Beginner'}, 'additional_info': {'gender': 'female', 'type_of_plan': 'muscle gain'}}
{'exercise': {'name': 'Neck Flexion', 'equipment': 'Lever (plate loaded)', 'type': 'Basic or Auxiliary', 'movement_type': 'Isolated', 'force_type': 'Pull', 'difficulty': 2

In [2]:
from tqdm.auto import tqdm # for progress bars, requires !pip install tqdm 
import json



# Open PDF and get lines/pages
# Note: this only focuses on text, rather than images/figures etc
def open_and_read_pdf(list: str) -> list[dict]:
    """
    Opens a PDF file, reads its text content page by page, and collects statistics.

    Parameters:
        pdf_path (str): The file path to the PDF document to be opened and read.

    Returns:
        list[dict]: A list of dictionaries, each containing the page number
        (adjusted), character count, word count, sentence count, token count, and the extracted text
        for each page.
    """
    json_text = json.dumps(list)
    page_char_count = len(json_text)

    # Calculate page token count
    page_token_count = page_char_count / 4 
    pages_and_texts = []
    for page_number, page in tqdm(enumerate(list)):  # iterate the document pages
        text = page # get plain text encoded as UTF-8
        pages_and_texts.append({"page_number": page_number ,  # adjust page numbers since our PDF starts on page 42
                                "page_char_count": len(text),
                                "page_token_count": len(text) / 4,  # 1 token = ~4 chars, see: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
                                "text": text})
    return pages_and_texts

pages_and_texts = open_and_read_pdf(structured_exercises)


0it [00:00, ?it/s]

In [3]:
import pandas as pd
df = pd.DataFrame(pages_and_texts)
df.head()

Unnamed: 0,page_number,page_char_count,page_token_count,text
0,0,6,1.5,"{'exercise': {'name': 'Neck Flexion', 'equipme..."
1,1,6,1.5,"{'exercise': {'name': 'Neck Flexion', 'equipme..."
2,2,6,1.5,"{'exercise': {'name': 'Lateral Neck Flexion', ..."
3,3,6,1.5,"{'exercise': {'name': 'Neck Flexion', 'equipme..."
4,4,6,1.5,"{'exercise': {'name': 'Lateral Neck Flexion', ..."


In [4]:
%pip install sentence-transformers



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [17]:
from sentence_transformers import SentenceTransformer
device = "cuda" if torch.cuda.is_available() else "cpu"
model=SentenceTransformer(model_name_or_path="all-mpnet-base-v2", 
                                      device=device)
sentences=[
"    Hello world!"
]
embeddings=model.encode(sentences)
embeddings_dist=dict(zip(sentences,embeddings))
for sentence, embedding in embeddings_dist.items():
  print(sentence)
  print(embedding)


    Hello world!
[ 1.91737153e-02  2.87365373e-02 -1.23540815e-02  1.58220939e-02
  7.90899619e-02 -9.76034440e-03  7.49561517e-03  5.52258082e-02
  1.88754424e-02 -2.63798703e-02 -2.68068500e-02 -3.33473235e-02
 -3.00286710e-02  3.89383473e-02  7.69484565e-02 -7.68074542e-02
  6.97796494e-02 -6.93720859e-03 -4.48980965e-02  1.21879680e-02
  1.03991563e-02  8.78110062e-03  1.08342348e-02  5.92033267e-02
  1.70315858e-02 -2.56328769e-02  9.10411682e-03  3.73560283e-03
  3.32366712e-02  5.40160621e-03 -3.08646597e-02  5.92166372e-03
  5.06224595e-02  6.56094775e-02  2.12388136e-06 -5.11630215e-02
  2.44774185e-02 -7.60708330e-03 -7.54034705e-03  3.69985867e-03
 -2.33552675e-03  2.81633567e-02 -1.65304970e-02  9.42942686e-03
  6.09955145e-03 -4.32090759e-02 -5.53827675e-04 -7.96658080e-03
  1.98367257e-02  2.02401243e-02 -4.93253069e-03  1.22366399e-02
 -6.17669933e-02 -2.02074158e-03  4.50151749e-02  3.01424600e-02
  3.64244357e-02  1.64621267e-02  1.17515689e-02 -4.62706275e-02
  5.1322

In [18]:
embeddings_text=pages_and_texts

In [19]:
for item in embeddings_text:
  text_to_embed = json.dumps(item['text'])
  item["embedding"]=model.encode(text_to_embed,convert_to_tensor=True)

In [8]:
# import numpy as np
# import pandas as pd

# # Assuming 'text_chunk_embeddings' is a tensor
# embeddings_text_np = embeddings_text.numpy()  # Convert PyTorch tensor to NumPy array

# # Save embeddings to CSV
# np.savetxt('text_chunk_embeddings.csv', embeddings_text_np, delimiter=',')


In [20]:
embeddings_text[0]

{'page_number': 0,
 'page_char_count': 6,
 'page_token_count': 1.5,
 'text': {'exercise': {'name': 'Neck Flexion',
   'equipment': 'Cable',
   'type': 'Basic or Auxiliary',
   'movement_type': 'Isolated',
   'force_type': 'Pull',
   'difficulty': 2},
  'execution': {'setup': 'Sit on bench facing away from middle pulley. Place neck in harness cable attachment. Place arms on lower thighs for support.',
   'instructions': 'Move head away from pulley by bending neck forward until chin touches upper chest. Return head by hyperextending neck and repeat.'},
  'muscles': {'main_muscle': 'Neck',
   'target_muscles': ['Sternocleidomastoid,'],
   'stabilizer_muscles': ['Rectus Abdominis', 'Obliques', '']},
  'workout_parameters': {'sets': 4,
   'reps': 15,
   'bmi_for_exercise': 20.28312872},
  'goals': {'target_audience': 'muscle gain', 'difficulty_level': 'Beginner'},
  'additional_info': {'gender': 'female', 'type_of_plan': 'muscle gain'}},
 'embedding': tensor([ 3.0419e-02, -6.3200e-03, -1.01

In [21]:
import pandas as pd
embeddings_pd=pd.DataFrame(embeddings_text)
embedding_path="embeddings_text.csv"
embeddings_pd.to_csv(embedding_path,index=False)

In [22]:
import pandas as pd
df=pd.read_csv("embeddings_text.csv")
df.head()

Unnamed: 0,page_number,page_char_count,page_token_count,text,embedding
0,0,6,1.5,"{'exercise': {'name': 'Neck Flexion', 'equipme...","tensor([ 3.0419e-02, -6.3200e-03, -1.0122e-02,..."
1,1,6,1.5,"{'exercise': {'name': 'Neck Flexion', 'equipme...","tensor([ 3.7481e-02, -2.9378e-02, -1.4067e-02,..."
2,2,6,1.5,"{'exercise': {'name': 'Lateral Neck Flexion', ...","tensor([ 3.1795e-02, -2.9549e-02, -1.4883e-02,..."
3,3,6,1.5,"{'exercise': {'name': 'Neck Flexion', 'equipme...","tensor([ 3.6791e-02, -2.9646e-02, -1.4654e-02,..."
4,4,6,1.5,"{'exercise': {'name': 'Lateral Neck Flexion', ...","tensor([ 3.2426e-02, -3.0278e-02, -1.5896e-02,..."


In [51]:

import random

import torch
import numpy as np 
import pandas as pd


# Import texts and embedding df
embedding_text = pd.read_csv("embeddings_text.csv")

# Convert embedding column back to np.array (it got converted to string when it got saved to CSV)
def tensor_to_list(tensor_str):
    # Remove 'tensor(' and ')' from the string
    tensor_str = tensor_str.strip('tensor([])')
    # Convert the remaining string to a list of floats
    return np.fromstring(tensor_str, sep=', ').tolist()

df=pd.DataFrame(embedding_text)
df.head()
# # Apply the conversion
device
embedding_text["embedding"] = embedding_text["embedding"].apply(tensor_to_list)
# Convert texts and embedding df to list of dicts
pages_and_chunks = embedding_text.to_dict(orient="records")

# Convert embeddings to torch tensor and send to device (note: NumPy arrays are float64, torch tensors are float32 by default)
embeddings = torch.tensor(np.array(embedding_text["embedding"].tolist()), dtype=torch.float32).to(device)
embeddings.shape

  return np.fromstring(tensor_str, sep=', ').tolist()


torch.Size([3702, 768])

In [52]:
embeddings

tensor([[ 0.0304, -0.0063, -0.0101,  ...,  0.0268, -0.0872, -0.0558],
        [ 0.0375, -0.0294, -0.0141,  ...,  0.0282, -0.0848, -0.0600],
        [ 0.0318, -0.0295, -0.0149,  ...,  0.0302, -0.0839, -0.0586],
        ...,
        [ 0.0208, -0.0055, -0.0155,  ...,  0.0461, -0.0455, -0.0393],
        [ 0.0251, -0.0095, -0.0186,  ...,  0.0412, -0.0404, -0.0372],
        [ 0.0224, -0.0111, -0.0199,  ...,  0.0429, -0.0370, -0.0389]],
       device='cuda:0')

In [53]:
from sentence_transformers import util,SentenceTransformer
model=SentenceTransformer(model_name_or_path="all-mpnet-base-v2", 
                                      device=device)

In [54]:
# Move both tensors to the same device (GPU in this case)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Ensure the query embedding is on the correct device
query_embedding = query_embedding.to(device)

# Ensure the embeddings are on the correct device
embeddings = embeddings.to(device)

# Now compute the dot products
dot_products = util.dot_score(a=query_embedding, b=embeddings)[0]

# Get the top k products
top_products = torch.topk(dot_products, k=10)


In [None]:
print(type(embeddings))
print(embeddings[:5])  # Print the first 5 items to inspect
embeddings.shape

<class 'torch.Tensor'>
tensor([[ 0.0304, -0.0063, -0.0101,  ...,  0.0268, -0.0872, -0.0558],
        [ 0.0375, -0.0294, -0.0141,  ...,  0.0282, -0.0848, -0.0600],
        [ 0.0318, -0.0295, -0.0149,  ...,  0.0302, -0.0839, -0.0586],
        [ 0.0368, -0.0296, -0.0147,  ...,  0.0293, -0.0847, -0.0602],
        [ 0.0324, -0.0303, -0.0159,  ...,  0.0319, -0.0831, -0.0591]],
       device='cuda:0')


torch.Size([3702, 768])

In [55]:
# Define helper function to print wrapped text 
import textwrap

def print_wrapped(text, wrap_length=80):
    wrapped_text = textwrap.fill(text, wrap_length)
    print(wrapped_text)

In [56]:
print(f"Query: '{query}'\n")
print("Results:")
# Loop through zipped together scores and indicies from torch.topk
for score, idx in zip(top_products[0], top_products[1]):
    print(f"Score: {score:.4f}")
    # Print relevant sentence chunk (since the scores are in descending order, the most relevant chunk will be first)
    print("Text:")
    print_wrapped(pages_and_chunks[idx]["text"])
    # Print the page number too so we can reference the textbook further (and check the results)
    print(f"Page number: {pages_and_chunks[idx]['page_number']}")
    print("\n")

Query: 'neck exercises'

Results:
Score: 0.6202
Text:
{'exercise': {'name': 'Wall Rear Neck Bridge', 'equipment': 'Body Weight',
'type': 'Basic or Auxiliary', 'movement_type': 'Isolated', 'force_type': 'Pull',
'difficulty': 3}, 'execution': {'setup': 'Hold small or folded cushioned mat
behind head. Facing away from wall or column position head and back of shoulders
low on wall or column with mat between. Stand with feet far away from wall or
column so body is angled back. Position hips and back straight and bend knees
just slightly. Place arms to side or hold hands on abdomen.', 'instructions':
'Push head back into mat and roll head upward. Arch spine and straighten knees.
Hyperextend neck so head is facing up. Return to original position by rolling
head down while allowing low back to straighten and knees to bend slightly.
Continue down until back of head and shoulders make contact with mat. Repeat.'},
'muscles': {'main_muscle': 'Neck', 'target_muscles': ['Splenius,'],
'stabilizer_mus

In [57]:
def retrieve_relevant_resources(query: str,
                                embeddings: torch.tensor,
                                model: SentenceTransformer=model,
                                n_resources_to_return: int=5,
                                print_time: bool=True):
    """
    Embeds a query with model and returns top k scores and indices from embeddings.
    """

    # Embed the query
    query_embedding = model.encode(query, 
                                   convert_to_tensor=True) 

    # Get dot product scores on embeddings
    dot_scores = util.dot_score(query_embedding, embeddings)[0]

    
    scores, indices = torch.topk(input=dot_scores, 
                                 k=n_resources_to_return)

    return scores, indices

def print_top_results_and_scores(query: str,
                                 embeddings: torch.tensor,
                                 pages_and_chunks: list[dict]=pages_and_chunks,
                                 n_resources_to_return: int=30):
    """
    Takes a query, retrieves most relevant resources and prints them out in descending order.

    Note: Requires pages_and_chunks to be formatted in a specific way (see above for reference).
    """
    
    scores, indices = retrieve_relevant_resources(query=query,
                                                  embeddings=embeddings,
                                                  n_resources_to_return=n_resources_to_return)
    
    print(f"Query: {query}\n")
    print("Results:")
    # Loop through zipped together scores and indicies
    for score, index in zip(scores, indices):
        print(f"Score: {score:.4f}")
        # Print relevant sentence chunk (since the scores are in descending order, the most relevant chunk will be first)
        print_wrapped(pages_and_chunks[index]["text"])
        # Print the page number too so we can reference the textbook further and check the results
        print(f"Page number: {pages_and_chunks[index]['page_number']}")
        print("\n")

In [None]:
query = "chest exercise"

# Get just the scores and indices of top related results
scores, indices = retrieve_relevant_resources(query=query,
                                              embeddings=embeddings,n_resources_to_return=30)
scores, indices

(tensor([0.6047, 0.6037, 0.6032, 0.6025, 0.5996, 0.5990, 0.5781, 0.5778, 0.5763,
         0.5730, 0.5725, 0.5708, 0.5705, 0.5692, 0.5685, 0.5681, 0.5681, 0.5679,
         0.5673, 0.5660, 0.5659, 0.5659, 0.5654, 0.5652, 0.5649, 0.5644, 0.5643,
         0.5633, 0.5631, 0.5626], device='cuda:0'),
 tensor([ 923, 1540, 3391, 2157,  306, 2774, 1553,  936,  319, 3404, 2787, 2170,
          933,  899,  298,  909,  929, 3383, 1550,  892, 1546, 1516, 1509, 1526,
         1508,  274,  312, 3397,  891,  900], device='cuda:0'))

In [58]:
print_top_results_and_scores(query=query,
                             embeddings=embeddings)

Query: neck exercises

Results:
Score: 0.6202
{'exercise': {'name': 'Wall Rear Neck Bridge', 'equipment': 'Body Weight',
'type': 'Basic or Auxiliary', 'movement_type': 'Isolated', 'force_type': 'Pull',
'difficulty': 3}, 'execution': {'setup': 'Hold small or folded cushioned mat
behind head. Facing away from wall or column position head and back of shoulders
low on wall or column with mat between. Stand with feet far away from wall or
column so body is angled back. Position hips and back straight and bend knees
just slightly. Place arms to side or hold hands on abdomen.', 'instructions':
'Push head back into mat and roll head upward. Arch spine and straighten knees.
Hyperextend neck so head is facing up. Return to original position by rolling
head down while allowing low back to straighten and knees to bend slightly.
Continue down until back of head and shoulders make contact with mat. Repeat.'},
'muscles': {'main_muscle': 'Neck', 'target_muscles': ['Splenius,'],
'stabilizer_muscles': [

In [59]:
# Get GPU available memory
import torch
gpu_memory_bytes = torch.cuda.get_device_properties(0).total_memory
gpu_memory_gb = round(gpu_memory_bytes / (2**30))
print(f"Available GPU memory: {gpu_memory_gb} GB")

Available GPU memory: 15 GB


In [60]:
# Note: the following is Gemma focused, however, there are more and more LLMs of the 2B and 7B size appearing for local use.
if gpu_memory_gb < 5.1:
    print(f"Your available GPU memory is {gpu_memory_gb}GB, you may not have enough memory to run a Gemma LLM locally without quantization.")
elif gpu_memory_gb < 8.1:
    print(f"GPU memory: {gpu_memory_gb} | Recommended model: Gemma 2B in 4-bit precision.")
    use_quantization_config = True 
    model_id = "google/gemma-2b-it"
elif gpu_memory_gb < 19.0:
    print(f"GPU memory: {gpu_memory_gb} | Recommended model: Gemma 2B in float16 or Gemma 7B in 4-bit precision.")
    use_quantization_config = False 
    model_id = "google/gemma-2b-it"
elif gpu_memory_gb > 19.0:
    print(f"GPU memory: {gpu_memory_gb} | Recommend model: Gemma 7B in 4-bit or float16 precision.")
    use_quantization_config = False 
    model_id = "google/gemma-7b-it"

print(f"use_quantization_config set to: {use_quantization_config}")
print(f"model_id set to: {model_id}")

GPU memory: 15 | Recommended model: Gemma 2B in float16 or Gemma 7B in 4-bit precision.
use_quantization_config set to: False
model_id set to: google/gemma-2b-it


In [61]:
%pip install bitsandbytes accelerate
%pip install flash-attn

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [62]:
# os.environ['HF_TOKEN'] = "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"

In [63]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers.utils import is_flash_attn_2_available 

# 1. Create quantization config for smaller model loading (optional)
# Requires !pip install bitsandbytes accelerate, see: https://github.com/TimDettmers/bitsandbytes, https://huggingface.co/docs/accelerate/
# For models that require 4-bit quantization (use this if you have low GPU memory available)
from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                         bnb_4bit_compute_dtype=torch.float16)

# Bonus: Setup Flash Attention 2 for faster inference, default to "sdpa" or "scaled dot product attention" if it's not available
# Flash Attention 2 requires NVIDIA GPU compute capability of 8.0 or above, see: https://developer.nvidia.com/cuda-gpus
# Requires !pip install flash-attn, see: https://github.com/Dao-AILab/flash-attention 
if (is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
  attn_implementation = "flash_attention_2"
else:
  attn_implementation = "sdpa"
print(f"[INFO] Using attention implementation: {attn_implementation}")

# 2. Pick a model we'd like to use (this will depend on how much GPU memory you have available)
#model_id = "google/gemma-7b-it"
model_id = model_id # (we already set this above)
print(f"[INFO] Using model_id: {model_id}")

# 3. Instantiate tokenizer (tokenizer turns text into numbers ready for the model) 
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id)

# 4. Instantiate the model
llm_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id, 
                                                 torch_dtype=torch.float16, # datatype to use, we want float16
                                                 quantization_config=quantization_config if use_quantization_config else None,
                                                 low_cpu_mem_usage=False, # use full memory 
                                                 attn_implementation=attn_implementation) # which attention version to use

if not use_quantization_config: # quantization takes care of device setting automatically, so if it's not used, send model to GPU 
    llm_model.to("cuda")

[INFO] Using attention implementation: sdpa
[INFO] Using model_id: google/gemma-2b-it


`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [64]:
llm_model


GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear(in_features=16384, out_features=2048, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
      )
    )
    (norm): GemmaR

In [65]:
def get_model_num_params(model: torch.nn.Module):
    return sum([param.numel() for param in model.parameters()])

get_model_num_params(llm_model)

2506172416

In [66]:
def get_model_mem_size(model: torch.nn.Module):
    """
    Get how much memory a PyTorch model takes up.

    See: https://discuss.pytorch.org/t/gpu-memory-that-model-uses/56822
    """
    # Get model parameters and buffer sizes
    mem_params = sum([param.nelement() * param.element_size() for param in model.parameters()])
    mem_buffers = sum([buf.nelement() * buf.element_size() for buf in model.buffers()])

    # Calculate various model sizes
    model_mem_bytes = mem_params + mem_buffers # in bytes
    model_mem_mb = model_mem_bytes / (1024**2) # in megabytes
    model_mem_gb = model_mem_bytes / (1024**3) # in gigabytes

    return {"model_mem_bytes": model_mem_bytes,
            "model_mem_mb": round(model_mem_mb, 2),
            "model_mem_gb": round(model_mem_gb, 2)}

get_model_mem_size(llm_model)

{'model_mem_bytes': 5012354048, 'model_mem_mb': 4780.15, 'model_mem_gb': 4.67}

In [71]:
input_text = "Create a 7-day beginner gym workout plan focused on weight loss. The plan should include exercises for each day of the week, and it should cover a mix of strength training and cardio. Please specify the exercises, sets, and reps for each workout. For male "
print(f"Input text:\n{input_text}")
scores,indices=retrieve_relevant_resources(query=input_text,embeddings= embeddings,n_resources_to_return=10)
retrieved_passages = ""
for index in indices:
    retrieved_passages += pages_and_chunks[index]["text"] + " "
combined_input = f"Query: {input_text}\nRelevant Information: {retrieved_passages}"
# Create prompt template for instruction-tuned model
dialogue_template = [
    {"role": "user",
     "content":input_text}
]

# Apply the chat template
prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
                                       tokenize=False, # keep as raw text (not tokenized)
                                       add_generation_prompt=True)
print(f"\nPrompt (formatted):\n{prompt}")

Input text:
Create a 7-day beginner gym workout plan focused on weight loss. The plan should include exercises for each day of the week, and it should cover a mix of strength training and cardio. Please specify the exercises, sets, and reps for each workout. For male 

Prompt (formatted):
<bos><start_of_turn>user
Create a 7-day beginner gym workout plan focused on weight loss. The plan should include exercises for each day of the week, and it should cover a mix of strength training and cardio. Please specify the exercises, sets, and reps for each workout. For male<end_of_turn>
<start_of_turn>model



In [72]:
torch.cuda.empty_cache()

In [73]:

# Tokenize the input text (turn it into numbers) and send it to GPU
input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
print(f"Model input (tokenized):\n{input_ids}\n")

# Generate outputs passed on the tokenized input
# See generate docs: https://huggingface.co/docs/transformers/v4.38.2/en/main_classes/text_generation#transformers.GenerationConfig 
outputs = llm_model.generate(**input_ids,
                             max_new_tokens=256) # define the maximum number of new tokens to create
print(f"Model output (tokens):\n{outputs[0]}\n")

Model input (tokenized):
{'input_ids': tensor([[     2,      2,    106,   1645,    108,   4912,    476, 235248, 235324,
         235290,   1311,  53070,  15185,  29639,   1780,  14779,    611,   5171,
           4783, 235265,    714,   1780,   1412,   3707,  21426,    604,   1853,
           1744,    576,    573,   2788, 235269,    578,    665,   1412,   3098,
            476,   7345,    576,   7268,   4770,    578,  27921, 235265,   5651,
          23730,    573,  21426, 235269,   9032, 235269,    578,  63926,    604,
           1853,  29639, 235265,   1699,   9202,    107,    108,    106,   2516,
            108]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}



Model output (tokens):
tensor([     2,      2,    106,   1645,    108,   4912,    476, 235248, 235324,
        235290,   1311,  53070,  15185,  29639,   1780,  14779,    611,   5171,
          4783, 235265,    714,   1780,   1412,   3707,  21426,    604,   1853,
          1744,    576,    573,   2788, 235269,    578,    665,   1412,   3098,
           476,   7345,    576,   7268,   4770,    578,  27921, 235265,   5651,
         23730,    573,  21426, 235269,   9032, 235269,    578,  63926,    604,
          1853,  29639, 235265,   1699,   9202,    107,    108,    106,   2516,
           108,    688,   7366, 235248, 235274, 235292,  47085,    578,   6271,
        101857,    688,    109, 235287,  98118,  12703,  47085,   5471, 235292,
        235248, 235304,   9032,    576, 235248, 235274, 235276, 235290, 235274,
        235284,  63926,    108, 235287,  98118,  12703,  20830,    484, 235292,
        235248, 235304,   9032,    576, 235248, 235274, 235276, 235290, 235274,
        235284,  

In [74]:
# Decode the output tokens to text
outputs_decoded = tokenizer.decode(outputs[0])
print(f"Model output (decoded):\n{outputs_decoded}\n")

Model output (decoded):
<bos><bos><start_of_turn>user
Create a 7-day beginner gym workout plan focused on weight loss. The plan should include exercises for each day of the week, and it should cover a mix of strength training and cardio. Please specify the exercises, sets, and reps for each workout. For male<end_of_turn>
<start_of_turn>model
**Day 1: Chest and Triceps**

* Dumbbell Chest Press: 3 sets of 10-12 reps
* Dumbbell Flyes: 3 sets of 10-12 reps
* Triceps Pushdowns: 3 sets of 10-12 reps

**Day 2: Back and Biceps**

* Barbell Rows: 3 sets of 10-12 reps
* Dumbbell Deadlifts: 3 sets of 10-12 reps
* Hammer Curls: 3 sets of 10-12 reps

**Day 3: Legs and Abs**

* Squats: 3 sets of 10-12 reps
* Deadlifts: 3 sets of 10-12 reps
* Lunges: 3 sets of 10-12 reps
* Plank: 3 sets of 30-60 seconds

**Day 4: Shoulders and Core**

* Overhead Press: 3 sets of 10-12 reps
* Lateral Raises: 3 sets of 10-12 reps
* Front Raises: 3 sets of 10-1

