In [1]:
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "HuggingFaceTB/SmolLM2-360M-Instruct"
import re

In [2]:
device = "cpu" # for GPU usage or "cpu" for CPU usage

In [3]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

In [4]:
def generate_subtasks(task_description, model, tokenizer, num_subtasks, detail_level):
    prompt = (
        f"You are a task planner. Break down the following task into exactly {num_subtasks} clear and actionable steps. "
        "Each subtask should be practical, specific, and easy to follow. The subtasks should be ordered logically, "
        "and should focus on accomplishing the task in a methodical way. Avoid any filler, general explanations, or placeholders. "
        f"The level of detail should be {'high' if detail_level == 'high' else 'low'}, meaning the subtasks should either "
        "be thorough or concise, depending on the setting. The goal is for someone to be able to follow these steps and complete "
        "the task without needing further clarification.\n\n"
        f"Task: {task_description}\n\n"
        "Subtasks:"
    )

    tokenizer.pad_token = tokenizer.eos_token

    with torch.no_grad():
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
        
        outputs = model.generate(
            **inputs,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=2,
            num_beams=1,
            early_stopping=True,
            max_new_tokens=256
        )
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    subtasks_section = generated_text.split("Subtasks:", 1)[-1].strip()
    subtasks = []
    
    valid_subtask_regex = r"^[0-9]+\. .+"  # Match lines that start with a number followed by a dot and a space
    
    for line in subtasks_section.split("\n"):
        line = line.strip()
        if line and re.match(valid_subtask_regex, line):
            # Remove the numbering (the part before the first dot)
            subtask_text = re.sub(r"^\d+\.\s*", "", line)
            subtasks.append(subtask_text.strip())

    # Ensure exactly 'num_subtasks' subtasks
    while len(subtasks) < num_subtasks:
        subtasks.append("Placeholder.")
    subtasks = subtasks[:num_subtasks]  # If there are too many, truncate to the required number
    
    return subtasks

In [11]:
subtasks = generate_subtasks("plan wedding", model, tokenizer, num_subtasks=5, detail_level='high')



In [12]:
for i, subtask in enumerate(subtasks, 1):
    print(f"{i}. {subtask}")

1. Decide on a date for the wedding.
2. Choose a venue.
3. Plan the ceremony. Create a detailed plan for how the couple will celebrate their union. This includes the order of events, music, speeches, gifts, attire, food, drinks, photography, videography, flowers, decorations, seating, lighting, sound, audio, technology, guests, timing, logistics, budget, timeline, vendors, volunteers, staff, security, parking, transportation, accommodations, insurance, permits, licenses, contracts, witnesses, officiant, photographer, florist, caterer, servers, bartenders, wait staffs, wedding party, bridesmaids, groomsmen, photographers, video editors, designers,
4. Placeholder.
5. Placeholder.
