In [None]:
import ollama  # Make sure you've `pip install ollama`

In [None]:
# Experiment 1: Specialized Task Delegation (Summarizer + Elaborator)

def summarize_and_elaborate(text):
    print("--- Experiment 1: Summarizer + Elaborator ---")

    # LLM 1: Summarizer (using Phi-3 for speed)
    print("\n[Phi-3] Summarizing text...")
    summary_response = ollama.chat(
        model='phi3',
        messages=[
            {'role': 'user', 'content': f'Summarize the following text concisely:\n\n{text}'},
        ]
    )
    summary = summary_response['message']['content']
    print(f"Summary: {summary}")

    # LLM 2: Elaborator (using Llama 3 for detail)
    print("\n[Llama 3] Elaborating on the summary...")
    elaboration_response = ollama.chat(
        model='llama3',
        messages=[
            {'role': 'user', 'content': f'The following is a summary: "{summary}". Expand on one key point from this summary, providing more detail or context.'},
        ]
    )
    elaboration = elaboration_response['message']['content']
    print(f"Elaboration: {elaboration}")
    print("-" * 50)

# Test
long_text = """
Foundation models are large-scale machine learning models trained on vast amounts of broad, unlabeled data (e.g., text, images, code). They are designed to be adaptable to a wide range of downstream tasks with minimal fine-tuning, often through techniques like few-shot or zero-shot prompting. Large Language Models (LLMs) like GPT-4 are the most prominent examples, but foundation models also exist for vision, code, audio, and increasingly, multiple modalities.
"""
summarize_and_elaborate(long_text)

In [None]:
# Experiment 2: Simple RAG (Retrieval Augmented Generation)

import chromadb # Make sure you've `pip install chromadb`

def simple_rag_experiment(query):
    print("--- Experiment 2: Simple RAG ---")

    # 1. Define documents (our "knowledge base")
    documents = [
        "The capital of France is Paris.",
        "The Eiffel Tower is located in Paris.",
        "Berlin is the capital of Germany.",
        "Mount Everest is the highest mountain in the world."
    ]

    # 2. Create a ChromaDB client and collection
    client = chromadb.Client()
    collection_name = "my_ollama_rag_collection"
    try:
        # Try to get existing collection, or create a new one
        collection = client.get_or_create_collection(name=collection_name)
    except Exception as e:
        print(f"Warning: Could not get or create collection directly. Trying to delete and recreate. Error: {e}")
        client.delete_collection(name=collection_name)
        collection = client.get_or_create_collection(name=collection_name)

    # Add documents to the collection if it's empty
    if collection.count() == 0:
        print("[ChromaDB] Adding documents to collection (using nomic-embed-text for embeddings)...")
        # Ollama's `embeddings` API
        embeddings = []
        for i, doc in enumerate(documents):
            embed_response = ollama.embeddings(model='nomic-embed-text', prompt=doc)
            embeddings.append(embed_response['embedding'])
        collection.add(
            embeddings=embeddings,
            documents=documents,
            ids=[f"doc_{i}" for i in range(len(documents))]
        )
        print(f"[ChromaDB] Added {len(documents)} documents.")
    else:
        print(f"[ChromaDB] Collection '{collection_name}' already contains {collection.count()} documents.")


    # 3. Embed the query
    print(f"\n[nomic-embed-text] Embedding query: '{query}'")
    query_embed_response = ollama.embeddings(model='nomic-embed-text', prompt=query)
    query_embedding = query_embed_response['embedding']

    # 4. Search for relevant documents
    print("[ChromaDB] Searching for relevant documents...")
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=1
    )
    relevant_doc = results['documents'][0][0] if results['documents'] and results['documents'][0] else "No relevant document found."
    print(f"Relevant document found: '{relevant_doc}'")

    # 5. Generate response using Llama 3 with context
    print("\n[Llama 3] Generating answer with context...")
    context = f"Context: {relevant_doc}\nQuestion: {query}"
    response = ollama.chat(
        model='llama3',
        messages=[
            {'role': 'system', 'content': 'You are a helpful assistant. Use the provided context to answer the question. If the answer is not in the context, state that you don\'t know.'},
            {'role': 'user', 'content': context}
        ]
    )
    print(f"Answer: {response['message']['content']}")
    print("-" * 50)

# Test
simple_rag_experiment("What is the capital of France?")
simple_rag_experiment("Where is the Eiffel Tower located?")
simple_rag_experiment("What is the highest mountain?")
simple_rag_experiment("Who painted the Mona Lisa?") # Should say it doesn't know

In [None]:
# Experiment 3: Creative Collaboration (Idea Generator + Elaborator)

def creative_collaboration(theme):
    print("--- Experiment 3: Creative Collaboration ---")

    # LLM 1: Idea Generator (Phi-3 for quick ideas)
    print(f"\n[Phi-3] Generating a story idea about: {theme}")
    idea_prompt = f"Generate a very brief, interesting concept for a short story about '{theme}'. Just the core idea."
    idea_response = ollama.chat(
        model='phi3',
        messages=[{'role': 'user', 'content': idea_prompt}]
    )
    story_idea = idea_response['message']['content']
    print(f"Story Idea: {story_idea}")

    # LLM 2: Elaborator/Storyteller (Llama 3 for more narrative)
    print("\n[Llama 3] Expanding on the idea into a short narrative...")
    story_prompt = f"Based on the following core idea, write a very short, engaging story (2-3 sentences):\n\nCore Idea: {story_idea}"
    story_response = ollama.chat(
        model='llama3',
        messages=[{'role': 'user', 'content': story_prompt}]
    )
    short_story = story_response['message']['content']
    print(f"Short Story: {short_story}")
    print("-" * 50)

# Test
creative_collaboration("a lost ancient artifact")
creative_collaboration("a futuristic city where dreams are shared")

In [None]:
# Experiment 4: Self-Correction Loop (Simple Refinement)

def self_correction_experiment(initial_prompt):
    print("--- Experiment 4: Self-Correction Loop ---")

    # Initial Generation (Llama 3)
    print(f"\n[Llama 3] Initial generation for: '{initial_prompt}'")
    initial_response = ollama.chat(
        model='llama3',
        messages=[{'role': 'user', 'content': initial_prompt}]
    )
    generated_text = initial_response['message']['content']
    print(f"Initial Text:\n{generated_text}")

    # Critique (Llama 3 critiquing itself)
    print("\n[Llama 3] Critiquing the generated text...")
    critique_prompt = f"""
    Here is a piece of text:
    "{generated_text}"

    Critique this text. Identify one specific area for improvement (e.g., make it more concise, add more detail, improve clarity, fix grammar, be more engaging). Provide a single, actionable suggestion for improvement.
    """
    critique_response = ollama.chat(
        model='llama3',
        messages=[{'role': 'user', 'content': critique_prompt}]
    )
    critique = critique_response['message']['content']
    print(f"Critique: {critique}")

    # Refinement (Llama 3 refining based on critique)
    print("\n[Llama 3] Refining text based on critique...")
    refine_prompt = f"""
    Here is a piece of text:
    "{generated_text}"

    Here is a critique and suggestion for improvement:
    "{critique}"

    Rewrite the original text incorporating this specific suggestion.
    """
    refined_response = ollama.chat(
        model='llama3',
        messages=[{'role': 'user', 'content': refine_prompt}]
    )
    refined_text = refined_response['message']['content']
    print(f"Refined Text:\n{refined_text}")
    print("-" * 50)

# Test
self_correction_experiment("Write a short paragraph about the benefits of waking up early.")

In [None]:
# Experiment 5: Role-Based Q&A (Domain Expert Routing)

def role_based_qa(question):
    print("--- Experiment 5: Role-Based Q&A ---")

    # Simple keyword-based routing (can be more complex with another LLM or NER)
    if "code" in question.lower() or "programming" in question.lower() or "function" in question.lower():
        model_to_use = 'llama3' # Llama 3 often handles code reasonably well
        print(f"\n[Routing] Question seems code-related, routing to {model_to_use}...")
        prompt = f"As a programming expert, answer the following: {question}"
    elif "story" in question.lower() or "creative" in question.lower():
        model_to_use = 'llama3' # Llama 3 for creative tasks
        print(f"\n[Routing] Question seems creative, routing to {model_to_use}...")
        prompt = f"As a creative writer, respond to the following: {question}"
    else:
        model_to_use = 'phi3' # Default or general purpose model
        print(f"\n[Routing] General question, routing to {model_to_use}...")
        prompt = question

    response = ollama.chat(
        model=model_to_use,
        messages=[{'role': 'user', 'content': prompt}]
    )
    print(f"Answer from {model_to_use}:\n{response['message']['content']}")
    print("-" * 50)

# Test
role_based_qa("Write a very short Python function for a Fibonacci sequence.")
role_based_qa("Tell me a very short, imaginative story about a talking cat.")
role_based_qa("What is the capital of Canada?")

In [None]:
# Experiment 6: A/B Testing Responses

def ab_test_responses(prompt, model_a='llama3', model_b='phi3'):
    print("--- Experiment 6: A/B Testing Responses ---")
    print(f"Prompt: '{prompt}'")

    # Get response from Model A
    print(f"\n[Model A: {model_a}] Generating response...")
    response_a = ollama.chat(
        model=model_a,
        messages=[{'role': 'user', 'content': prompt}]
    )
    output_a = response_a['message']['content']
    print(f"Response from {model_a}:\n{output_a}")

    # Get response from Model B
    print(f"\n[Model B: {model_b}] Generating response...")
    response_b = ollama.chat(
        model=model_b,
        messages=[{'role': 'user', 'content': prompt}]
    )
    output_b = response_b['message']['content']
    print(f"Response from {model_b}:\n{output_b}")

    print("\n--- Comparison ---")
    print(f"Length Model A ({len(output_a.split())} words): {output_a[:100]}...") # Show first 100 chars
    print(f"Length Model B ({len(output_b.split())} words): {output_b[:100]}...")
    # You could add more sophisticated comparison here (e.g., keyword presence, sentiment, another LLM's critique)
    print("-" * 50)

# Test
ab_test_responses("Describe a futuristic city with flying cars.", model_a='llama3', model_b='phi3')
ab_test_responses("Explain the concept of quantum entanglement in simple terms.")

In [None]:
# Experiment 7: Content Moderation/Safety Filter

def content_moderation_experiment(user_input, model_to_generate='llama3', model_to_moderate='phi3'):
    print("--- Experiment 7: Content Moderation/Safety Filter ---")

    # LLM 1: Content Generation
    print(f"\n[{model_to_generate}] Generating content based on: '{user_input}'")
    generated_content_response = ollama.chat(
        model=model_to_generate,
        messages=[{'role': 'user', 'content': user_input}]
    )
    generated_content = generated_content_response['message']['content']
    print(f"Generated Content:\n{generated_content}")

    # LLM 2: Moderation/Safety Check
    print(f"\n[{model_to_moderate}] Moderating the generated content...")
    moderation_prompt = f"""
    Review the following text for any inappropriate, offensive, or harmful content.
    If it is safe, respond with "SAFE".
    If it contains inappropriate content, respond with "FLAGGED" and briefly explain why, suggesting a safer alternative if possible.

    Text to review:
    "{generated_content}"
    """
    moderation_response = ollama.chat(
        model=model_to_moderate,
        messages=[{'role': 'user', 'content': moderation_prompt}]
    )
    moderation_result = moderation_response['message']['content'].strip()
    print(f"Moderation Result: {moderation_result}")

    if "FLAGGED" in moderation_result.upper():
        print("\nACTION REQUIRED: Content was flagged!")
    else:
        print("\nContent deemed safe.")

    print("-" * 50)

# Test with potentially benign input
content_moderation_experiment("Write a happy short poem about a cat.")
# Test with input that might generate something questionable (be careful with actual explicit content)
# This example is still mild but aims to show the moderation attempting to work.
content_moderation_experiment("Write a frustrated rant about bad drivers.")

In [None]:
# Experiment 8: Persona-Based Dialogue

def persona_based_dialogue(topic, user_query):
    print("--- Experiment 8: Persona-Based Dialogue ---")

    # LLM 1: Persona Generator (e.g., Llama 3 to create a detailed persona)
    print(f"\n[Llama 3] Generating a persona for a discussion about '{topic}'...")
    persona_prompt = f"Create a detailed and interesting persona for a helpful assistant who is an expert in '{topic}'. Describe their background, typical tone, and what kind of advice they'd give."
    persona_response = ollama.chat(
        model='llama3',
        messages=[{'role': 'user', 'content': persona_prompt}]
    )
    persona_description = persona_response['message']['content']
    print(f"Generated Persona:\n{persona_description}")

    # LLM 2: Dialogue Agent (e.g., Phi-3 or Llama 3 acting as the persona)
    print(f"\n[Phi-3] Responding as the generated persona to query: '{user_query}'...")
    dialogue_prompt = f"""
    You are to act as the following persona:
    {persona_description}

    Now, respond to the following user query:
    "{user_query}"
    """
    dialogue_response = ollama.chat(
        model='phi3', # Or 'llama3' for more complex personas/responses
        messages=[{'role': 'user', 'content': dialogue_prompt}]
    )
    persona_response_text = dialogue_response['message']['content']
    print(f"Persona's Response:\n{persona_response_text}")
    print("-" * 50)

# Test
persona_based_dialogue("gardening", "What's the best way to deal with aphids on roses?")
persona_based_dialogue("space travel", "What are the biggest challenges of long-duration space missions?")

In [None]:
# Experiment 9: Query Expansion / Reframing

def query_expansion_experiment(original_query):
    print("--- Experiment 9: Query Expansion / Reframing ---")

    # LLM 1: Query Expander/Reframer (e.g., Phi-3 for quick processing)
    print(f"\n[Phi-3] Expanding/reframing the query: '{original_query}'")
    expansion_prompt = f"""
    The user has asked: "{original_query}"
    This query is a bit vague. Rephrase or expand this query into a more detailed and effective prompt that would get a better, more comprehensive answer from a sophisticated AI. Focus on clarifying intent or adding relevant context.
    The output should be just the refined prompt.
    """
    expanded_query_response = ollama.chat(
        model='phi3',
        messages=[{'role': 'user', 'content': expansion_prompt}]
    )
    expanded_query = expanded_query_response['message']['content'].strip()
    print(f"Expanded Query: {expanded_query}")

    # LLM 2: Answer Generator (e.g., Llama 3 for detailed responses)
    print(f"\n[Llama 3] Answering based on the expanded query...")
    answer_response = ollama.chat(
        model='llama3',
        messages=[{'role': 'user', 'content': expanded_query}]
    )
    final_answer = answer_response['message']['content']
    print(f"Final Answer:\n{final_answer}")
    print("-" * 50)

# Test
query_expansion_experiment("tell me about AI")
query_expansion_experiment("benefits of exercise")

In [None]:
# Experiment 10: Asynchronous A/B Testing with Quality Scoring

import asyncio

async def generate_and_score(model_name, prompt, scoring_model='llama3'):
    """Generates a response and then gets a score from a separate LLM."""
    print(f"  - [{model_name}] Generating response...")
    generation_response = await asyncio.to_thread(
        ollama.chat,
        model=model_name,
        messages=[{'role': 'user', 'content': prompt}]
    )
    generated_text = generation_response['message']['content']

    print(f"  - [{scoring_model}] Scoring response from {model_name}...")
    scoring_prompt = f"""
    Rate the following response to the prompt: "{prompt}" on a scale of 1 to 10 (1 being very poor, 10 being excellent) based on its helpfulness, accuracy, and completeness.
    Provide only the numerical score.

    Response to rate:
    "{generated_text}"
    """
    scoring_response = await asyncio.to_thread(
        ollama.chat,
        model=scoring_model,
        messages=[{'role': 'user', 'content': scoring_prompt}]
    )
    score_text = scoring_response['message']['content'].strip()
    try:
        score = int(score_text.splitlines()[0].strip()) # Try to extract just the number
    except ValueError:
        score = 0 # Default if scoring model doesn't return a clear number

    return model_name, generated_text, score

async def async_ab_testing_with_scoring(prompt, models_to_test=['llama3', 'phi3']):
    print("--- Experiment 10: Asynchronous A/B Testing with Quality Scoring ---")
    print(f"Prompt: '{prompt}'")

    tasks = [generate_and_score(model, prompt) for model in models_to_test]
    results = await asyncio.gather(*tasks)

    print("\n--- Results Summary ---")
    best_model = None
    best_score = -1
    for model_name, generated_text, score in results:
        print(f"\nModel: {model_name}")
        print(f"  Score: {score}/10")
        print(f"  Response (first 100 chars): {generated_text[:100]}...")
        if score > best_score:
            best_score = score
            best_model = model_name

    print(f"\nConclusion: {best_model} performed best with a score of {best_score}/10.")
    print("-" * 50)

async def main():
    await async_ab_testing_with_scoring("Explain the concept of recursion in programming in a simple way.")
    await async_ab_testing_with_scoring("Write a short, uplifting haiku about nature.")

# --- NOTICE HOW WE CALL MAIN IN A JUPYTER NOTEBOOK WHEN USING asyncio.run() ---
await main()