In [0]:
import requests
import uuid
import yt.wrapper as yt

In [1]:
username = yt.get_user_name()
if yt.exists(f"//sys/users/{username}/@user_info/home_path"):
    home = yt.get(f"//sys/users/{username}/@user_info/home_path")
    working_dir = f"{home}/{uuid.uuid4().hex}"
else:
    working_dir = f"//tmp/examples/{uuid.uuid4().hex}"
yt.create("map_node", working_dir)
print(working_dir)

//tmp/examples/offline-inference_cc03dfb4-2237-4323-a356-14574e6999f6


In [2]:
names = requests.get('https://raw.githubusercontent.com/dominictarr/random-name/refs/heads/master/first-names.txt').content.decode("utf-8").split("\r\n")

yt.create("table", f"{working_dir}/names", ignore_existing=True)
yt.write_table(f"{working_dir}/names", [{"name": name} for name in names])

In [3]:
class StoriesGenerator:
    def __init__(self):
        self.model_loaded = False

    def __call__(self, row):
        import sys
        if not self.model_loaded:
            from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
            import torch

            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            self.model = AutoModelForCausalLM.from_pretrained("roneneldan/TinyStories-1M").to(self.device)
            self.tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
            self.model_loaded = True

        name = row["name"]
        prompt = f"{name} was a little child "
        print("Prompt {}".format(prompt), file=sys.stderr)
        input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device)
        output = self.model.generate(input_ids, max_length = 100, num_beams=1)
        output_text = self.tokenizer.decode(output[0], skip_special_tokens=True)

        yield {"story": output_text}

yt.create("table", f"{working_dir}/tales", force=True)

yt.run_map(
    StoriesGenerator(),
    f"{working_dir}/names",
    f"{working_dir}/tales",
    spec={
        "job_count": 20,
        "pool_trees": ["gpu_h100"],
        "mapper": {
            "gpu_limit": 1,
            "cpu_limit": 4.0,
        },
    }
)