In [2]:
from transformers import pipeline

In [3]:
# 🧠 Text Generation (GPT-2)
generator = pipeline("text-generation", model="gpt2", device="mps")
print(generator("The future of AI is", max_length=30, num_return_sequences=1))

Device set to use mps
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'The future of AI is uncertain, but there are lots of bright ideas coming out of universities that could change the world for the better.\n\nThe'}]


In [None]:
# # 📊 Text Classification (BERT)
# classifier = pipeline("sentiment-analysis")
# print(classifier("I love working with Hugging Face! Fuck!!"))

In [None]:
# # 📰 Text Summarization (BART)
# summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# article = """
# Machine learning is a field of computer science that gives computers the ability to learn without being explicitly programmed. 
# It has become one of the most important technologies in recent years, with applications in many industries.
# """
# print(summarizer(article, max_length=40, min_length=10, do_sample=False))

In [13]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

device = torch.device("mps" if torch.backends.mps.is_built() else "cpu")
print("Using device:", device)

# 1. 加载 tokenizer 和模型（使用本地缓存）
model_id = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

model.to(device)

# 2. 输入文本
prompt = "A yellow man is "
inputs = tokenizer(prompt, return_tensors="pt").to(device)

# 3. 文本生成（设置最大长度、温度等超参）
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_length=30,
        num_return_sequences=1,
        temperature=0.8,
        top_k=50,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

# 4. 解码输出
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


Using device: mps
A yellow man is _______

"This is the one who can be trusted," says the man.

The man is a man called
