In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# 1. Setup Device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 2. Load Model & Tokenizer
model_name = "gpt2"
print("Downloading/Loading GPT-2... please wait.")

tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)

# GPT-2 specific fix for padding
tokenizer.pad_token = tokenizer.eos_token
print("Model ready!")


Using device: cpu
Downloading/Loading GPT-2... please wait.
Model ready!


In [2]:
def ask_ai(prompt, max_new_tokens=50):
    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    # Generate tokens
    with torch.no_grad(): # Saves memory by not calculating gradients
        output_tokens = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=max_new_tokens,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.8,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode to text
    return tokenizer.decode(output_tokens[0], skip_special_tokens=True)

In [3]:
my_prompt = "How can AI models be fine-tuned for domain-specific tasks?"
result = ask_ai(my_prompt, max_new_tokens=60)

print("-" * 30)
print(result)
print("-" * 30)

------------------------------
How can AI models be fine-tuned for domain-specific tasks? In this paper, we explore the possibility of working with real-world models and try to find out how to improve them, and what you can do to make them better.
------------------------------
