In [None]:
import sys
IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
    !pip install -q google-genai google-auth python-dotenv
    from google.colab import auth
    auth.authenticate_user()
    try:
        PROJECT_ID = input("Enter your Google Cloud Project ID (press Enter to use default ADC): ").strip()
    except Exception:
        PROJECT_ID = ""
    if PROJECT_ID:
        import os
        os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID

import os
import google.auth
from google import genai
from google.genai import types

creds, project = google.auth.default()
project = os.environ.get("GOOGLE_CLOUD_PROJECT", project)
client = genai.Client(vertexai=True, project=project, location="us-central1")
print(f"Using project: {project}")

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kgweber-cwru/coding-with-ai-wn26/blob/main/week-2-conversations/concepts.ipynb)

# Week 2: Building Conversations

## Learning Objectives
By the end of this session, you will:
- Understand how conversation history works
- Build multi-turn conversations that maintain context
- Use system prompts effectively to shape behavior
- Manage conversation length and costs
- Handle different roles (system, user, assistant)

## Setup

In [None]:
import os
from dotenv import load_dotenv
from google import genai
from google.genai import types
import google.auth

load_dotenv()
creds, project = google.auth.default()
client = genai.Client(vertexai=True, project=project, location="us-central1")

print("✓ Ready to build conversations!")

## Part 1: Understanding Conversation Structure

### The Messages List
Conversations are lists of messages exchanged between the user and the model. 
- **System Instruction**: Sets the behavior/persona (passed separately in configuration)
- **User**: The human input
- **Model**: The AI response

We can track history in a simple list:
```python
messages = [
    {"role": "user", "content": "Hello!"},
    {"role": "model", "content": "Hi! How can I help?"},
    {"role": "user", "content": "Tell me about Python."}
]
```

### A Simple Two-Turn Conversation

In [None]:
# System instruction handles the persona
system_instruction = "You are a helpful teaching assistant."

# Start with first user message
messages = [
    {"role": "user", "content": "What is a variable in programming?"}
]

# Get first response
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=[types.Content(role=m["role"], parts=[types.Part.from_text(m["content"])]) for m in messages],
    config=types.GenerateContentConfig(system_instruction=system_instruction)
)

first_answer = response.text
print("Model:", first_answer)
print("\n" + "="*50 + "\n")

# Add model's response to history
messages.append({"role": "model", "content": first_answer})

# Add follow-up question
messages.append({"role": "user", "content": "Can you give me an example in Python?"})

# Get second response - it remembers context!
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=[types.Content(role=m["role"], parts=[types.Part.from_text(m["content"])]) for m in messages],
    config=types.GenerateContentConfig(system_instruction=system_instruction)
)

print("Model:", response.text)

## Part 2: Building a Conversation Manager

Let's create a helper class to manage conversations:

In [None]:
class Conversation:
    """A simple conversation manager"""
    
    def __init__(self, system_message="You are a helpful assistant.", model="gemini-2.5-flash"):
        self.system_message = system_message
        self.messages = [] # History of user/model turns
        self.model = model
        self.total_tokens = 0
    
    def add_user_message(self, content):
        """Add a user message to the conversation"""
        self.messages.append({"role": "user", "content": content})
    
    def get_response(self, temperature=0.7, max_tokens=None):
        """Get assistant response and add to history"""
        # Convert internal message format to Vertex AI Content objects
        content_list = [
            types.Content(role=m["role"], parts=[types.Part.from_text(m["content"])]) 
            for m in self.messages
        ]

        config = types.GenerateContentConfig(
            system_instruction=self.system_message,
            temperature=temperature,
            max_output_tokens=max_tokens
        )
        
        response = client.models.generate_content(
            model=self.model,
            contents=content_list,
            config=config
        )
        
        model_message = response.text
        self.messages.append({"role": "model", "content": model_message})
        
        if response.usage_metadata:
            self.total_tokens += response.usage_metadata.total_token_count
        
        return model_message
    
    def chat(self, user_message, temperature=0.7, max_tokens=None):
        """Convenience method: add user message and get response"""
        self.add_user_message(user_message)
        return self.get_response(temperature, max_tokens)
    
    def display_history(self):
        """Display the conversation history"""
        print(f"SYSTEM: {self.system_message}")
        print("-" * 50)
        for msg in self.messages:
            role = msg["role"].upper()
            content = msg["content"]
            print(f"{role}: {content}")
            print("-" * 50)
    
    def get_token_count(self):
        """Get total tokens used"""
        return self.total_tokens

print("✓ Conversation class created!")

### Test the Conversation Manager

In [None]:
# Create a conversation with a specific persona
convo = Conversation(
    system_message="You are a friendly data science tutor. Keep answers concise but clear."
)

# Have a multi-turn conversation
print(convo.chat("What's the difference between supervised and unsupervised learning?"))
print("\n" + "="*50 + "\n")

print(convo.chat("Which one would I use for clustering?"))
print("\n" + "="*50 + "\n")

print(convo.chat("Give me an example algorithm for that."))
print("\n" + "="*50 + "\n")

print(f"Total tokens used: {convo.get_token_count()}")

### View Full Conversation History

In [None]:
convo.display_history()

## Part 3: System Instruction Strategies

The system instruction is powerful! Let's explore different personas:

In [None]:
# Persona 1: Concise expert
expert = Conversation(
    system_message="You are an expert who gives concise, technical answers. Use precise terminology."
)

# Persona 2: Beginner-friendly teacher
teacher = Conversation(
    system_message="You are a patient teacher explaining concepts to complete beginners. Use analogies and simple language."
)

# Same question to both
question = "What is a neural network?"

print("EXPERT:")
print(expert.chat(question))
print("\n" + "="*50 + "\n")

print("TEACHER:")
print(teacher.chat(question))

### Structured Output with System Instructions

In [None]:
# Request specific output format
structured = Conversation(
    system_message="""You are a medical information assistant. 
    Always structure your responses as:
    1. DEFINITION: Brief definition
    2. KEY POINTS: 3-4 bullet points
    3. NOTE: Important consideration or caution
    """
)

print(structured.chat("What is hypertension?"))
print("\n" + "="*50 + "\n")
print(structured.chat("What about hypotension?"))

## Part 4: Managing Context Window

Conversations can get too long! The model has a maximum context window (tokens it can process).

### Strategy 1: Keep Recent Messages Only

In [None]:
class ConversationWithLimit(Conversation):
    """Conversation that keeps only recent messages"""
    
    def __init__(self, system_message="You are a helpful assistant.", 
                 model="gemini-2.5-flash", max_history=6):
        super().__init__(system_message, model)
        self.max_history = max_history  # Keep last N messages
    
    def get_response(self, temperature=0.7, max_tokens=None):
        # Keep only last N messages
        if len(self.messages) > self.max_history:
            self.messages = self.messages[-self.max_history:]
        
        return super().get_response(temperature, max_tokens)

# Test it
limited = ConversationWithLimit(max_history=4)

for i in range(6):
    response = limited.chat(f"This is message number {i+1}")
    print(f"Turn {i+1}: {response[:50]}...")

print("\n" + "="*50 + "\n")
print(f"Messages in memory: {len(limited.messages)}")
print("\nCurrent history:")
limited.display_history()

### Strategy 2: Summarize Old Context

In [None]:
def summarize_conversation(messages):
    """Create a summary of the conversation so far"""
    # Format the conversation text
    convo_text = "\n".join([
        f"{msg['role']}: {msg['content']}" 
        for msg in messages
    ])
    
    summary_prompt = f"""Summarize this conversation in 2-3 sentences, 
    preserving key facts and context:
    
    {convo_text}
    """
    
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=summary_prompt,
        config=types.GenerateContentConfig(
            system_instruction="You create concise conversation summaries.",
            temperature=0.3
        )
    )
    
    return response.text

# Test it
print(summarize_conversation(limited.messages))

## Part 5: Practical Conversation Applications

### Application 1: Q&A Assistant

In [None]:
class QAAssistant:
    """Interactive Q&A assistant with memory"""
    
    def __init__(self, topic="general knowledge"):
        system_msg = f"""You are a knowledgeable assistant specialized in {topic}. 
        Answer questions clearly and build on previous context in the conversation.
        If you don't know something, say so."""
        self.convo = Conversation(system_message=system_msg)
        self.topic = topic
    
    def ask(self, question):
        return self.convo.chat(question)
    
    def history(self):
        self.convo.display_history()

# Create a Python programming assistant
python_helper = QAAssistant(topic="Python programming")

print(python_helper.ask("What are list comprehensions?"))
print("\n" + "="*50 + "\n")

print(python_helper.ask("Show me an example with filtering."))
print("\n" + "="*50 + "\n")

print(python_helper.ask("How is that different from a regular for loop?"))

### Application 2: Research Interview Assistant

In [None]:
class InterviewAssistant:
    """Helps conduct and document research interviews"""
    
    def __init__(self, research_topic):
        system_msg = f"""You are helping conduct a research interview about {research_topic}.
        Your role is to:
        1. Ask thoughtful follow-up questions
        2. Clarify ambiguous statements
        3. Probe for more details when needed
        4. Maintain a professional, curious tone
        """
        self.convo = Conversation(system_message=system_msg)
        self.topic = research_topic
    
    def respond(self, interviewee_response):
        """Process interviewee response and ask follow-up"""
        return self.convo.chat(interviewee_response)
    
    def get_summary(self):
        """Get a summary of key points from the interview"""
        return summarize_conversation(self.convo.messages)

# Example usage
interviewer = InterviewAssistant("patient experiences with telemedicine")

print("INTERVIEWER:", interviewer.respond("I started using telemedicine during COVID."))
print("\n" + "="*50 + "\n")

print("INTERVIEWER:", interviewer.respond("It was convenient but I missed the personal connection."))
print("\n" + "="*50 + "\n")

print("Interview Summary:")
print(interviewer.get_summary())

### Application 3: Debugging Assistant

In [None]:
debugging_assistant = Conversation(
    system_message="""You are a debugging assistant. When users share code and errors:
    1. Identify the likely cause
    2. Explain why it's happening
    3. Suggest a fix with code
    4. Ask clarifying questions if needed
    """
)

# Simulate debugging session
error_report = """I'm getting a KeyError in my Python code:
my_dict = {'name': 'Alice', 'age': 30}
print(my_dict['city'])
"""

print(debugging_assistant.chat(error_report))
print("\n" + "="*50 + "\n")

print(debugging_assistant.chat("How can I check if a key exists before accessing it?"))

## Part 6: Cost and Performance Considerations

In [None]:
# Compare conversation lengths
short_convo = Conversation()
for i in range(3):
    short_convo.chat(f"Question {i+1}")

long_convo = Conversation()
for i in range(10):
    long_convo.chat(f"Question {i+1}")

print(f"Short conversation (3 turns): {short_convo.get_token_count()} tokens")
print(f"Long conversation (10 turns): {long_convo.get_token_count()} tokens")
print(f"\nToken growth factor: {long_convo.get_token_count() / short_convo.get_token_count():.2f}x")

## Key Takeaways

1. **Conversations are message lists** - Just add to the list to maintain context
2. **System messages are powerful** - They shape the entire conversation behavior
3. **Context grows quickly** - Each turn includes all previous messages
4. **Manage conversation length** - Keep recent messages or summarize old ones
5. **Structure matters** - Clear roles and formatting help the model respond appropriately

## Next Week Preview

Next week, we'll explore **programmatic prompt engineering**:
- Building dynamic prompts
- Template systems
- Few-shot learning
- Output parsing

Complete the assignment to practice building conversational applications!