In [29]:
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "HuggingFaceTB/SmolLM2-360M-Instruct"
import re

In [30]:
device = "cpu"

In [31]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

In [32]:
def generate_subtasks(task_description, model, tokenizer, num_subtasks):

    # Updated prompt to be more motivational and positive
    prompt = (
        f"You are a motivational task planner. Break down the following task into exactly {num_subtasks} inspiring, clear, and actionable steps. "
        "Each subtask should be practical, specific, and easy to follow. Focus on making each step feel achievable and energizing. "
        "Encourage the person completing the task with positive language. The steps should be ordered logically and aim to make progress feel rewarding. "
        "Avoid any filler, general explanations, or placeholders."
        "be thorough or concise, depending on the setting. The goal is for someone to feel motivated and confident as they follow the steps and accomplish the task. "
        "Each subtask should include words of encouragement and be no longer than {max_length} words.\n\n"
        f"Task: {task_description}\n\n"
        "Subtasks:"
    )

    tokenizer.pad_token = tokenizer.eos_token

    with torch.no_grad():
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

        outputs = model.generate(
            **inputs,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=2,
            num_beams=1,
            early_stopping=True,
            max_new_tokens=256
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    subtasks_section = generated_text.split("Subtasks:", 1)[-1].strip()
    subtasks = []

    valid_subtask_regex = r"^[0-9]+\. .+"  # Match lines that start with a number followed by a dot and a space

    for line in subtasks_section.split("\n"):
        line = line.strip()
        if line and re.match(valid_subtask_regex, line):
            # Remove the numbering (the part before the first dot)
            subtask_text = re.sub(r"^\d+\.\s*", "", line)
            # Limit to max_length words and remove the trailing period
            truncated_subtask = " ".join(subtask_text.split()[:max_length]).rstrip(".")
            subtasks.append(truncated_subtask)

    # Ensure exactly 'num_subtasks' subtasks
    while len(subtasks) < num_subtasks:
        subtasks.append("Placeholder")
    subtasks = subtasks[:num_subtasks]  # If there are too many, truncate to the required number

    return subtasks

In [37]:
subtasks = generate_subtasks("how to cook eggs", model, tokenizer, num_subtasks=7, detail_level='high')

In [38]:
print("\n".join(f"- {subtask}" for subtask in subtasks))

- Gather ingredients: Gather all the necessary ingredients for the recipe. This includes eggs, butter, salt, pepper, milk, eggs whites, egg
- Prepare the eggs: Crack the egg into a bowl and gently pour in the milk. Let it sit for a
- Cook the Eggs: Heat a pan over medium heat and add a small amount of oil. Pour the beaten eggs
- Placeholder
- Placeholder
