# Capstone LLM Application

This notebook is for building a custom LLM application as part of the capstone project.
Starting with GPT-2 as the base model and expanding from there.

## Setup and Dependencies

In [None]:
# Install required packages if needed
# !pip install torch transformers tiktoken numpy

In [None]:
import torch
import torch.nn.functional as F
from transformers import GPT2LMHeadModel, GPT2TokenizerFast, AutoTokenizer, AutoModelForCausalLM
import numpy as np
import warnings
warnings.filterwarnings('ignore')

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

## Load GPT-2 Model

In [None]:
# Load GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2TokenizerFast.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Set to evaluation mode
model.eval()

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print(f"Model loaded: {model_name}")
print(f"Device: {device}")
print(f"Vocabulary size: {tokenizer.vocab_size}")
print(f"Model parameters: {model.num_parameters():,}")

## Text Generation Functions

In [None]:
def generate_text(prompt, max_length=100, temperature=1.0, top_k=50, top_p=0.9, strategy="top_p"):
    """
    Generate text using different decoding strategies.
    
    Args:
        prompt (str): Input text prompt
        max_length (int): Maximum length of generated text
        temperature (float): Sampling temperature (higher = more random)
        top_k (int): Top-k sampling parameter
        top_p (float): Top-p (nucleus) sampling parameter
        strategy (str): Decoding strategy ('greedy', 'top_k', 'top_p')
    
    Returns:
        str: Generated text
    """
    # Encode the prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    
    with torch.no_grad():
        if strategy == "greedy":
            # Greedy decoding
            outputs = model.generate(
                input_ids,
                max_length=max_length,
                do_sample=False,
                pad_token_id=tokenizer.eos_token_id
            )
        elif strategy == "top_k":
            # Top-k sampling
            outputs = model.generate(
                input_ids,
                max_length=max_length,
                do_sample=True,
                top_k=top_k,
                temperature=temperature,
                pad_token_id=tokenizer.eos_token_id
            )
        elif strategy == "top_p":
            # Top-p (nucleus) sampling
            outputs = model.generate(
                input_ids,
                max_length=max_length,
                do_sample=True,
                top_p=top_p,
                temperature=temperature,
                pad_token_id=tokenizer.eos_token_id
            )
    
    # Decode and return the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

## Interactive Text Generation

In [None]:
# Test the model with different prompts and strategies
test_prompts = [
    "Once upon a time in a magical kingdom,",
    "The future of artificial intelligence is",
    "In the year 2030, technology will",
    "The most important lesson I learned was"
]

strategies = ["greedy", "top_k", "top_p"]

for prompt in test_prompts[:2]:  # Test first 2 prompts
    print(f"\n{'='*60}")
    print(f"PROMPT: {prompt}")
    print(f"{'='*60}")
    
    for strategy in strategies:
        print(f"\n--- {strategy.upper()} ---")
        generated = generate_text(
            prompt, 
            max_length=80, 
            temperature=0.8, 
            strategy=strategy
        )
        print(generated)

## Custom Application Development

This section is for building your custom LLM application.

In [None]:
# Your custom application code goes here
# Ideas:
# - Text completion assistant
# - Story generator
# - Code documentation generator
# - Creative writing helper
# - Conversation simulator

def custom_llm_app(user_input):
    """
    Your custom LLM application function.
    Modify this to implement your specific use case.
    """
    # Example: Simple completion with context
    context = "You are a helpful assistant. "
    full_prompt = context + user_input
    
    response = generate_text(
        full_prompt,
        max_length=150,
        temperature=0.7,
        strategy="top_p"
    )
    
    # Extract only the new generated text (remove the prompt)
    return response[len(full_prompt):].strip()

# Test your custom application
test_input = "What are the benefits of machine learning?"
result = custom_llm_app(test_input)
print(f"Input: {test_input}")
print(f"Output: {result}")

## Model Analysis and Experimentation

In [None]:
# Analyze model behavior with different parameters
def analyze_generation_parameters(prompt, temperatures=[0.3, 0.7, 1.0, 1.5]):
    """
    Analyze how different temperature values affect generation.
    """
    print(f"Prompt: {prompt}")
    print("\nTemperature Analysis:")
    
    for temp in temperatures:
        generated = generate_text(
            prompt,
            max_length=60,
            temperature=temp,
            strategy="top_p"
        )
        print(f"\nT={temp}: {generated}")

# Run analysis
analyze_generation_parameters("The secret to happiness is")

## Next Steps and Extensions

Ideas for extending this notebook:
1. **Fine-tuning**: Fine-tune GPT-2 on custom data
2. **Prompt Engineering**: Experiment with different prompt formats
3. **Multi-model Comparison**: Compare GPT-2 with other models
4. **Interactive UI**: Build a Streamlit or Gradio interface
5. **Retrieval Augmentation**: Add RAG capabilities
6. **Function Calling**: Add tool use capabilities
7. **Memory**: Implement conversation memory
8. **Evaluation**: Add metrics and evaluation functions

In [None]:
# Placeholder for future extensions
# Add your custom development here

print("Notebook ready for development!")
print("\nQuick usage:")
print('generate_text("Your prompt here", strategy="top_p", temperature=0.8)')