In [1]:
import os
from typing import List, Dict, Tuple
import random
import asyncio
from openai import AsyncOpenAI
from anthropic import AsyncAnthropic
import google.generativeai as genai
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

load_dotenv()

openai_client = AsyncOpenAI(api_key=os.getenv("SELF_RAG_OPENAI_API_KEY"))
anthropic_client = AsyncAnthropic(api_key=os.getenv("NEZ_CLAUDE_API_KEY"))
genai.configure(api_key=os.getenv("GEMINI_API"))

In [10]:
class ModelDiscussionSystem:
    def __init__(self):
        self.model_map = {
            "R1": "OpenAI",
            "R2": "Claude",
            "R3": "Gemini"
        }
        # Reverse mapping for internal use
        self.reverse_map = {v: k for k, v in self.model_map.items()}
        
    async def get_initial_responses(self, prompt: str) -> Dict[str, str]:
        """Get initial responses from all three models."""
        tasks = [
            self._get_openai_response(prompt),
            self._get_claude_response(prompt),
            self._get_gemini_response(prompt)
        ]
        responses = await asyncio.gather(*tasks)
        
        # Randomly assign response labels
        labels = list(self.reverse_map.values())
        random.shuffle(labels)
        
        return {labels[i]: resp for i, resp in enumerate(responses)}

    async def facilitate_discussion(self, initial_responses: Dict[str, str]) -> Tuple[str, str]:
        """Facilitate discussion between models about the best response."""
        discussion_prompt = self._create_discussion_prompt(initial_responses)
        
        # Get discussion responses from each model
        tasks = [
            self._get_openai_response(discussion_prompt),
            self._get_claude_response(discussion_prompt),
            self._get_gemini_response(discussion_prompt)
        ]
        discussion_responses = await asyncio.gather(*tasks)
        
        # Analyze discussion to determine chosen response and refinements
        chosen_response_label, refinements = self._analyze_discussion(discussion_responses)
        return chosen_response_label, refinements

    async def get_final_response(self, prompt: str, chosen_label: str, refinements: str) -> str:
        """Get final refined response from the chosen model."""
        final_prompt = self._create_final_prompt(prompt, refinements)
        
        # Map the response label back to the actual model
        chosen_model = self.model_map[chosen_label]
        
        if chosen_model == "OpenAI":
            return await self._get_openai_response(final_prompt)
        elif chosen_model == "Claude":
            return await self._get_claude_response(final_prompt)
        else:  # Gemini
            return await self._get_gemini_response(final_prompt)

    async def _get_openai_response(self, prompt: str) -> str:
        """Get response from OpenAI's GPT model."""
        response = await openai_client.chat.completions.create(
            model="chatgpt-4o-latest",
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content

    async def _get_claude_response(self, prompt: str) -> str:
        """Get response from Anthropic's Claude model."""
        response = await anthropic_client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1000,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.content[0].text

    async def _get_gemini_response(self, prompt: str) -> str:
        """Get response from Google's Gemini model."""
        model = genai.GenerativeModel('gemini-2.0-pro-exp-02-05')
        response = await model.generate_content_async(prompt)
        return response.text

    def _create_discussion_prompt(self, responses: Dict[str, str]) -> str:
        """Create prompt for models to discuss the best response."""
        formatted_responses = "\n\n".join([f"{label}:\n{response}" for label, response in responses.items()])
        
        return f"""Analyze these three responses to the same prompt and provide a structured evaluation. 
        Don't reveal which model you are, and refer to responses only by their labels (R1, R2, R3).

        === Responses ===
        {formatted_responses}

        Please provide your analysis in EXACTLY this format:

        === EVALUATION ===
        CHOSEN RESPONSE: [State explicitly "I choose R1" or "I choose R2" or "I choose R3"]
        
        REASONING: [Explain why this response is the strongest]

        === IMPROVEMENTS ===
        Please improve the chosen response by addressing these points:
        1. [First improvement point]
        2. [Second improvement point]
        3. [Third improvement point]

        Keep your response structured exactly as above with these exact headings."""

    def _create_final_prompt(self, original_prompt: str, refinements: str) -> str:
        """Create prompt for final refined response."""
        return f"""Original prompt: {original_prompt}

        Please provide a refined response incorporating these suggested improvements:
        {refinements}

        Give your best, most comprehensive answer incorporating these refinements."""

    def _analyze_discussion(self, discussion_responses: List[str]) -> Tuple[str, str]:
        """Analyze discussion responses to determine chosen response and refinements."""
        chosen_labels = []
        all_refinements = []
        
        # Keywords that might indicate preference
        preference_indicators = [
            "chose", "prefer", "recommend", "select", "pick", "favor", "best",
            "strongest", "most comprehensive", "most accurate", "most detailed"
        ]
        
        for response in discussion_responses:
            response_lower = response.lower()
            
            # Extract chosen label using expanded preference indicators
            for label in ["R1", "R2", "R3"]:
                for indicator in preference_indicators:
                    if f"{indicator} {label.lower()}" in response_lower:
                        chosen_labels.append(label)
                    # Check for label mentioned near positive words
                    if label.lower() in response_lower:
                        before_label = response_lower.split(label.lower())[0][-50:]  
                        if any(indicator in before_label for indicator in preference_indicators):
                            chosen_labels.append(label)
            
            # Extract refinements with more patterns
            refinement_markers = [
                "improvements:", "refinements:", "suggestions:", 
                "could be better by:", "should add:", "would benefit from:"
            ]
            
            for marker in refinement_markers:
                if marker in response_lower:
                    parts = response_lower.split(marker)
                    if len(parts) > 1:
                        refinement_text = parts[1]
                        # Find the next marker in the text, if any
                        next_marker_positions = [
                            refinement_text.find(m) 
                            for m in refinement_markers 
                            if m in refinement_text
                        ]
                        # If we found any other markers, cut off at the earliest one
                        if next_marker_positions:
                            valid_positions = [pos for pos in next_marker_positions if pos > 0]
                            if valid_positions:
                                refinement_text = refinement_text[:min(valid_positions)]
                        
                        all_refinements.append(refinement_text.strip())

        # Handle case where no clear preference is found
        if not chosen_labels:
            # Default to the first label if no preference is detected
            chosen_label = "R1"
            print("Warning: No clear preference detected in discussion. Defaulting to R1.")
        else:
            # Choose the most frequently mentioned label
            chosen_label = max(set(chosen_labels), key=chosen_labels.count)
        
        # Combine unique refinements
        unique_refinements = list(set(all_refinements))
        if not unique_refinements:
            unique_refinements = ["Please provide a more detailed and comprehensive response."]
        combined_refinements = "\n".join(unique_refinements)
        
        return chosen_label, combined_refinements

In [11]:
system = ModelDiscussionSystem()

async def get_model_discussion(prompt: str):
    """Utility function to run the entire discussion process."""
    print("Getting initial responses...")
    initial_responses = await system.get_initial_responses(prompt)
    print("\nInitial Responses:")
    for label, response in initial_responses.items():
        print(f"\n{label}:\n{response}\n{'-'*50}")
    
    print("\nFacilitating discussion...")
    chosen_label, refinements = await system.facilitate_discussion(initial_responses)
    print(f"\nChosen Response: {chosen_label}")
    print(f"Suggested Refinements:\n{refinements}")
    
    print("\nGetting final response...")
    final_response = await system.get_final_response(prompt, chosen_label, refinements)
    print(f"\nFinal Response:\n{final_response}")
    
    return {
        'initial_responses': initial_responses,
        'chosen_label': chosen_label,
        'refinements': refinements,
        'final_response': final_response
    }

In [12]:
prompt = "Explain the concept of quantum entanglement"
result = await get_model_discussion(prompt)

Getting initial responses...

Initial Responses:

R2:
Quantum entanglement is a fundamental phenomenon in quantum mechanics where two or more particles become interconnected in such a way that the state of one particle is instantly correlated with the state of the other, no matter how far apart they are. This means that measuring a property (such as spin, polarization, or position) of one entangled particle immediately affects the corresponding property of the other, even if they are separated by vast distances.

### Key Aspects of Quantum Entanglement:

1. **Non-Local Correlation** – When two particles are entangled, their properties are linked in a way that defies classical physics. Any measurement of one particle instantly influences the other, regardless of the distance between them.

2. **Superposition** – Before measurement, each entangled particle exists in a superposition of states. For example, an entangled electron's spin can be both "up" and "down" until a measurement collap

{'initial_responses': {'R2': 'Quantum entanglement is a fundamental phenomenon in quantum mechanics where two or more particles become interconnected in such a way that the state of one particle is instantly correlated with the state of the other, no matter how far apart they are. This means that measuring a property (such as spin, polarization, or position) of one entangled particle immediately affects the corresponding property of the other, even if they are separated by vast distances.\n\n### Key Aspects of Quantum Entanglement:\n\n1. **Non-Local Correlation** – When two particles are entangled, their properties are linked in a way that defies classical physics. Any measurement of one particle instantly influences the other, regardless of the distance between them.\n\n2. **Superposition** – Before measurement, each entangled particle exists in a superposition of states. For example, an entangled electron\'s spin can be both "up" and "down" until a measurement collapses it into one s

In [None]:
# Access specific parts of the result
initial_responses = result['initial_responses']
final_response = result['final_response']