In [None]:
!uv add ipykernel
!uv add torch
!uv add transformers
!uv add numpy
!uv add tqdm

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gc

# 清理缓存
torch.cuda.empty_cache()
gc.collect()

# 加载模型和分词器
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct", 
    device_map="cuda",
    dtype="auto", 
    trust_remote_code=False,
)

tokenizer = AutoTokenizer.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct"
)

# 创建文本生成管道
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    temperature=0.9,
    return_full_text=False,
    max_new_tokens=500,
    do_sample=False
)

# 提示词
messages = [
    {"role": "user", "content": "Create a funny joke about chickens."}
]

# 生成输出
output = pipe(messages)
print(output[0]["generated_text"])

# 应用提示词模板
prompt = pipe.tokenizer.apply_chat_template(
    messages, 
    tokenize=False
)
print(prompt)