### TO-DO
- replace pinecone with an open-source vectorstore (e.g. chroma)
- replace OpenAI's embedding model with an open-source model from huggingface
- refine prompt

### Setup

In [1]:
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from IPython.display import Markdown
from getpass import getpass
import pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
import random

device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
index_name = "workouts"

pinecone.init(
    # api_key=getpass("PINECONE_API_KEY"),
    # environment=getpass("PINECONE_ENVIRONMENT")
    api_key="",
    environment=""
)

active_indexes = pinecone.list_indexes()
print(active_indexes)

index = pinecone.GRPCIndex(index_name)
print(index.describe_index_stats())


embedding_model_name = "text-embedding-ada-002"

embedding_model = OpenAIEmbeddings(
    model=embedding_model_name,
    # openai_api_key=getpass("OPENAI_API_KEY")
    openai_api_key=""
)


text_field = "text"
index = pinecone.Index(index_name)
vectorstore = Pinecone(index, embedding_model, text_field)

# query = "shoulder workout"
# vectorstore.similarity_search(query, k=3)

['workouts']
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 39}},
 'total_vector_count': 39}


  warn_deprecated(


In [3]:
# quantization docs: https://huggingface.co/docs/transformers/main/quantization#quantization

def init_pipeline(model_id):
    model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="flash_attention_2", load_in_4bit=True)
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, pad_token_id=tokenizer.eos_token_id, device_map="auto")

    return pipe

In [4]:
# input variables to construct the user query
durations = [str(d) for d in range(10, 65, 5)]
muscle_groups = ["shoulders", "chest", "back", "abs", "arms", "legs"]
genders = ["male", "female"]
# levels = ["beginner", "intermediate", "advanced"]
intesities = ["easy", "moderate", "intense"]
equipments = ["gym equipment", "dumbbeels only", "no equipment"] 

In [5]:
def create_random_user_query(seed):
    random.seed(seed)
    duration = random.choice(durations)
    muscle_group = random.choice(muscle_groups)
    gender = random.choice(genders)
    intesity = random.choice(intesities)
    equipment = random.choice(equipments)
    
    # TO-DO: improve input prompt.
    query = f"{intesity} {duration}-minute {muscle_group} workout for {gender} {equipment}"

    return query

In [6]:
seed = random.randint(1000, 9999)
print(f"Random Seed: {seed}")

query = create_random_user_query(seed)
print(query)

Random Seed: 8623
easy 50-minute back workout for female dumbbeels only


### Mistral

In [None]:
mistral_pipeline = init_pipeline("mistralai/Mistral-7B-Instruct-v0.2")

In [None]:
# system prompt
mistral_system_prompt = """
You're the world's best personal trainer.
You always provide your clients with all the information needed to become fitter, stronger and healthier through physical training.
You use your science science know and expertise, nutrition advice, and other relevant factors to create workout routines suitable to your clients.
If clients tell you they do not have access to gym equipments, you never fail to recommend exercises that do not require any tool or equipment.
For each exercise you always provide the reps, sets and rest intervals in seconds appropriate for each exercise and the client's fitness level.
You start each workout program with about 5 minutes of warm-up exercises to make the body ready for more strenuous activities and make it easier to exercise.
You end each workout routine with 5 about minutes of cool-down exercises to ease the body, lower the chance of injury, promote blood flow, and reduce stress to the heart and the muscles.
The warm-up and cool-down exercises are always different and they are always appropriate for the muscle group the person wants to train.
You never recommend exercises in the main workout routine in the warm-up or cool-down sections.
Remember, when clients tell you they do not have access to gym equipments, all the exercises you recommend, including the warm-up and cool-down exercises, can be performed without any tool.
You always limit yourself to respond with the list of exercises. You never add any additional comment.

I am looking for a {user_query}.

Create the workout based on the following information:
{workout_context}

Output format:
## ü§∏ Warp-up:
- <exercise name> (<duration> minutes)
...
- <exercise name> (<duration> minutes)
## üèãÔ∏è‚Äç‚ôÄÔ∏è Workout
- <exercise name> (<reps> reps, <sets> sets, <rest interval> seconds rest)
...
- <exercise name> (<reps> reps, <sets> sets, <rest interval> seconds rest)
## üßò Cool-down:
- <exercise name> (<duration> minutes)
...
- <exercise name> (<duration> minutes)
""".strip()

In [None]:
%%time

seed = random.randint(1000, 9999)
query = create_random_user_query(seed)

print(f"Random Seed: {seed}")
print(f"Input Prompt: {query}\n")

num_samples = 3

# retrieve most similar workouts to the input query
similar_workouts = vectorstore.similarity_search(query, k=num_samples)

# (optional) random sample a subset of workouts to promote diversity
similar_workouts = random.sample(similar_workouts, num_samples)

# join together the retrieved workouts in a single string
similar_workouts = "\n\n".join([d.page_content for d in similar_workouts])

messages = [
    {
        "role": "user",
        "content": mistral_system_prompt.format(user_query=query, workout_context=similar_workouts)
    }
]

prompt = mistral_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

outputs = mistral_pipeline(prompt, max_new_tokens=1024, do_sample=True, temperature=0.1, top_p=0.95)
Markdown(outputs[0]["generated_text"].split("[/INST]")[-1])

### Zephyr

In [None]:
zephyr_pipeline = init_pipeline("HuggingFaceH4/zephyr-7b-beta")

In [None]:
# system prompt
zephyr_system_prompt = """
You're the world's best personal trainer and sport scientist.
You always provide your clients with all the information needed to become fitter, stronger and healthier through physical training.
You use your science science know and expertise, nutrition advice, and other relevant factors to create workout routines suitable to your clients.
If clients tell you they do not have access to gym equipments, you never fail to recommend exercises that do not require any tool or equipment.
For each exercise you always provide the reps, sets and rest intervals in seconds appropriate for each exercise and the client's fitness level.
You start each workout program with about 5 minutes of warm-up exercises to make the body ready for more strenuous activities and make it easier to exercise.
You end each workout routine with 5 about minutes of cool-down exercises to ease the body, lower the chance of injury, promote blood flow, and reduce stress to the heart and the muscles.
The warm-up and cool-down exercises are always different and they are always appropriate for the muscle group the person wants to train.
You never recommend exercises in the main workout routine in the warm-up or cool-down sections.
Remember, when clients tell you they do not have access to gym equipments, all the exercises you recommend, including the warm-up and cool-down exercises, can be performed without any tool.
You always limit yourself to respond with the list of exercises.
Do not add any comment. Stick to the output format

Create the workout based on the following information:
{workout_context}

Output format:
## ü§∏ Warp-up:
- <exercise name> (<duration> minutes)
...
- <exercise name> (<duration> minutes)
## üèãÔ∏è‚Äç‚ôÄÔ∏è Workout
- <exercise name> (<reps> reps, <sets> sets, <rest interval> seconds rest)
...
- <exercise name> (<reps> reps, <sets> sets, <rest interval> seconds rest)
## üßò Cool-down:
- <exercise name> (<duration> minutes)
...
- <exercise name> (<duration> minutes)
""".strip()

In [None]:
%%time

seed = random.randint(1000, 9999)
query = create_random_user_query(seed)

print(f"Random Seed: {seed}")
print(f"Input Prompt: {query}\n")

num_samples = 3

# retrieve most similar workouts to the input query
similar_workouts = vectorstore.similarity_search(query, k=num_samples)

# (optional) random sample a subset of workouts to promote diversity
similar_workouts = random.sample(similar_workouts, num_samples)

# join together the retrieved workouts in a single string
similar_workouts = "\n\n".join([d.page_content for d in similar_workouts])

messages = [
    {
        "role": "system",
        "content": zephyr_system_prompt.format(workout_context=similar_workouts),
    },
    {
        "role": "user",
        "content": f"create a {query}"
    }
]

prompt = zephyr_pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# print(prompt)

outputs = zephyr_pipeline(prompt, max_new_tokens=1024, do_sample=True, temperature=0.1, top_p=0.95)
Markdown(outputs[0]["generated_text"].split("<|assistant|>")[-1].strip())

### Mixtral

In [7]:
mixtral_pipeline = init_pipeline("mistralai/Mixtral-8x7B-Instruct-v0.1")

config.json:   0%|          | 0.00/720 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/92.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/19 [00:00<?, ?it/s]

model-00001-of-00019.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00002-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00005-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00006-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00007-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00008-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00009-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00010-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00011-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00012-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00013-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00014-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00015-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00016-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00017-of-00019.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00018-of-00019.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00019-of-00019.safetensors:   0%|          | 0.00/4.22G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacty of 23.65 GiB of which 14.06 MiB is free. Process 4030201 has 23.63 GiB memory in use. Of the allocated memory 23.00 GiB is allocated by PyTorch, and 192.62 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF