In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"
!pip uninstall wandb

Found existing installation: wandb 0.19.10
Uninstalling wandb-0.19.10:
  Would remove:
    /usr/local/bin/wandb
    /usr/local/bin/wb
    /usr/local/lib/python3.11/dist-packages/package_readme.md
    /usr/local/lib/python3.11/dist-packages/wandb-0.19.10.dist-info/*
    /usr/local/lib/python3.11/dist-packages/wandb/*
Proceed (Y/n)? Y
Y
  Successfully uninstalled wandb-0.19.10


In [None]:
# Step 1. install necessary packages.

# Clean slate - Uninstall possibly conflicting packages
#!pip uninstall -y torch torchvision numpy xformers bitsandbytes

# Install compatible versions
#!pip install torch==2.5.1 torchvision==0.18.1 numpy==1.26.4 bitsandbytes==0.45.4 xformers==0.0.35.post1

# Clone Axolotl repo (if not already)
#!git clone https://github.com/OpenAccess-AI-Collective/axolotl.git || echo "Already cloned"
#%cd axolotl

# Install Axolotl in editable mode
#!pip install -e .


In [None]:
# Step 2: import and load model
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TrainingArguments, Trainer
import numpy as np
import json, os, re
from datetime import datetime
import gradio as gr

model_id = "Soorya03/Llama-3.2-1B-Instruct-FitnessAssistant"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)


In [None]:
# A simple test prompt
prompt = "I want to lose 1 kg in 1 month, what workout should I do?"

# Generate response
output = pipe(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)

# Show result
print(output[0]['generated_text'])


I want to lose 1 kg in 1 month, what workout should I do? 

A) Weightlifting
B) High-Intensity Interval Training (HIIT)
C) Cardio
D) Yoga 

The best answer is A) Weightlifting. 

Weightlifting is a great way to build muscle and increase metabolism, which can help you lose weight. However, it is not a calorie-torching workout like HIIT, so it may not be the most effective way to burn calories. Cardio is too low-intensity for weightlifting, and yoga is too flexibility-focused. 

Best answer explanation: This question requires logical reasoning to choose the appropriate workout. The best answer is A) Weightlifting. 

Note: If you're just starting out with a new workout routine, it's best to start with moderate-intensity workouts like Weightlifting and focus on increasing your intensity over time. Cardio and HIIT may be too intense for beginners. 

High-Intensity Interval Training (HIIT) is a high-intensity workout that involves short bursts of intense exercise followed by periods of


In [None]:
#!pip install datasets==2.14.6 evaluate==0.4.0 --upgrade

import evaluate
metric = evaluate.load("rouge")


from datasets import load_dataset

fitness_plan = load_dataset("CristiD7/Comprehensive_7Day_Workout_Plans_100")



In [None]:
# If fitness_plan is a HuggingFace DatasetDict:
train_val = fitness_plan['train'].train_test_split(test_size=0.1, seed=42)
val_test = train_val['train'].train_test_split(test_size=0.1, seed=42)
train_dataset_raw = val_test['train']
val_dataset_raw = val_test['test']
test_dataset_raw = train_val['test']

# 1. Define the correct preprocess_function FIRST
def preprocess_function(examples):
    # Access the "Context" and "Response" columns directly
    inputs = examples["Context"]
    targets = examples["Response"]

    model_inputs = tokenizer(
        inputs,
        max_length=512,
        truncation=True,
        padding="max_length"
    )

    labels = tokenizer(
        targets,
        max_length=512,
        truncation=True,
        padding="max_length"
    )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

    preprocess


In [None]:
# Tokenize directly
tokenized_train_dataset = train_dataset_raw.map(preprocess_function, batched=True)
tokenized_val_dataset = val_dataset_raw.map(preprocess_function, batched=True)


In [None]:
training_args = TrainingArguments(
    output_dir = "./fitness-plan-model",
    num_train_epochs = 5,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate = 2e-5,
    per_device_train_batch_size = 8,
    per_device_eval_batch_size = 8,
    weight_decay = 0.01,
    load_best_model_at_end = True
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [None]:
#!pip install rouge_score
#!pip install accelerate
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback
from datasets import load_dataset

import evaluate

# Load the ROUGE metric
metric = evaluate.load("rouge")

def compute_metrics(eval_pred):
    """
    Calculates ROUGE scores for the model's predictions.

    Args:
        eval_pred: A tuple containing model predictions and labels.

    Returns:
        A dictionary containing the calculated ROUGE scores.
    """

    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Some simple post-processing
    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [label.strip() for label in decoded_labels]

    result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    # Extract a few results from ROUGE
    result = {key: value.mid.fmeasure * 100 for key, value in result.items()}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

In [None]:
trainer = Trainer(model=model, args=training_args,
                  train_dataset = tokenized_train_dataset,
                  eval_dataset = tokenized_val_dataset,
                  tokenizer=tokenizer,
                  compute_metrics=compute_metrics,
                  callbacks = [EarlyStoppingCallback(early_stopping_patience=2)])

  trainer = Trainer(model=model, args=training_args,


In [None]:
trainer.train()

In [None]:
!pip install rouge
from rouge import Rouge

# Assume 'output' from a previous cell holds the generated text
generated_text = output[0]['generated_text']

# Define the reference text
reference_text = "Monday:\n- Russian Twists: 3 sets of 20 reps\n- Pull-ups: 3 sets of 10 reps\n- Squats: 3 sets of 10 reps\nTuesday:\n- Burpees: 3 sets of 6 reps\n- Running: 3 sets of 20 reps\n- Shoulder Press: 3 sets of 20 reps\nWednesday:\n- Rest Day\nThursday:\n- Planks: 3 sets of 6 reps\n- Tricep Dips: 3 sets of 15 reps\n- Shoulder Press: 3 sets of 8 reps\nFriday:\n- Walking: 3 sets of 10 reps\n- Planks: 3 sets of 20 reps\n- Squats: 3 sets of 12 reps\nSaturday:\n- Rest Day\nSunday:\n- Rest Day"  # Replace with actual reference

rouge = Rouge()
scores = rouge.get_scores(generated_text, reference_text)
print(scores)

I am a 21-year-old male with a height of 180 cm and a weight of 55 kg. My primary goal is endurance, and I aim to reach a target weight of 45 kg. I am a beginner at the gym. Can you create a 7-day workout plan for me?

In [None]:
str(test)