In [1]:


from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import gc

def clean_memory():
    """Clear memory to prevent crashes"""
    torch.cuda.empty_cache()
    gc.collect()

def load_gpt_model(model_name='gpt2', device='cpu'):
    """Safely load GPT-2 model with error handling"""
    try:
        print(f"Loading {model_name} model...")
        tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        tokenizer.pad_token = tokenizer.eos_token

        # Load model with device_map for memory optimization
        model = GPT2LMHeadModel.from_pretrained(
            model_name,
            device_map='auto' if device == 'cuda' else None,
            torch_dtype=torch.float16 if device == 'cuda' else torch.float32
        ).to(device)

        return model, tokenizer

    except Exception as e:
        print(f"Error loading model: {str(e)}")
        clean_memory()
        return None, None

def generate_text_gpt(prompt, model, tokenizer, device='cpu', max_length=150):
    """Generate text with safety checks"""
    if not model or not tokenizer:
        return "Model not loaded properly"

    try:
        inputs = tokenizer.encode(prompt, return_tensors='pt').to(device)

        outputs = model.generate(
            inputs,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,
            top_k=50,
            top_p=0.92,
            temperature=0.85,
            no_repeat_ngram_size=3,
            pad_token_id=tokenizer.eos_token_id,
            early_stopping=True
        )
        return tokenizer.decode(outputs[0], skip_special_tokens=True)

    except RuntimeError as e:
        if 'CUDA out of memory' in str(e):
            clean_memory()
            return "Error: Out of GPU memory. Try reducing max_length or using CPU."
        return f"Generation error: {str(e)}"

    except Exception as e:
        return f"Unexpected error: {str(e)}"

def main():

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")

    # Load model
    model, tokenizer = load_gpt_model('gpt2', device)
    if model is None:
        print("Trying smaller distilgpt2 model...")
        model, tokenizer = load_gpt_model('distilgpt2', device)

    if model is None:
        print("Failed to load any model. Exiting.")
        return

    # Example prompts
    prompts = [
        "The future of AI will",
        "Self-driving cars need",
        "Ethical AI requires",
        "Neural networks can",
        "In 2030, artificial intelligence"
    ]

    # Generate sample outputs
    for prompt in prompts:
        print("\n" + "="*80)
        print(f"PROMPT: {prompt}")
        print("\nGenerated Text:")
        print(generate_text_gpt(prompt, model, tokenizer, device))

    # Interactive mode
    while True:
        print("\n" + "="*80)
        user_prompt = input("\nEnter your prompt (or 'quit' to exit): ").strip()
        if user_prompt.lower() in ['quit', 'exit']:
            break

        print("\nGenerated Text:")
        print(generate_text_gpt(user_prompt, model, tokenizer, device))

    clean_memory()

if __name__ == "__main__":
    main()

Using device: cuda
Loading gpt2 model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



PROMPT: The future of AI will

Generated Text:
The future of AI will be a lot more interesting when it comes to a system that is fundamentally different from our current system. We are looking at artificial intelligence to be a part of our future. We need to understand and understand it more deeply than ever before. But it's not going to be for us in the near future."

"We're not going away," he continued. "We're going to have a much better future in which humans understand and interact with and understand more about the world around us. In many ways, it's going to make more sense than it is today to do so.

But it's still early days.
.

PROMPT: Self-driving cars need

Generated Text:
Self-driving cars need to be equipped with a safe steering wheel for steering, braking and braking. They also need to have an internal sensor and actuator that can detect when people are driving and what the driver is doing.

The biggest problem that autonomous driving is having is getting people to do s