In [1]:
import lionagi as li
import logging
import random
from typing import List

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
model_list = ["gpt-3.5-turbo", "gpt-4o", "gpt-4-turbo"]

def check_output_completeness(output: str) -> bool:
    if output.endswith(".") or output.endswith("!") or output.endswith("?"):
        return True
    return False

def get_random_config(
    model_list=model_list,
    temperature_range = (0.7, 1.2),
    top_p_range = (0.8, 1.0),
    frequency_penalty_range = (0.0, 0.5),
    presence_penalty_range = (0.0, 0.5),
    max_tokens = 100,  
):
    return {
        "model": random.choice(model_list),
        "temperature": random.uniform(*temperature_range),
        "top_p": random.uniform(*top_p_range),
        "frequency_penalty": random.uniform(*frequency_penalty_range),
        "presence_penalty": random.uniform(*presence_penalty_range),
        "max_tokens": max_tokens,
    }

In [3]:

import asyncio

class ExpertModel(li.iModel):
    
    def __init__(self, total_reward=0, **kwargs):
        super().__init__(**kwargs)
        self.total_reward = total_reward
        # self.architecture = None
    
    @classmethod
    def random_expert(
        cls, 
        model_list=model_list,
        temperature_range = (0.7, 1.2),
        top_p_range = (0.8, 1.0),
        frequency_penalty_range = (0.0, 0.5),
        presence_penalty_range = (0.0, 0.5),
        max_tokens = 1000,  
    ):
        config = get_random_config(
            model_list=model_list,
            temperature_range=temperature_range,
            top_p_range=top_p_range,
            frequency_penalty_range=frequency_penalty_range,
            presence_penalty_range=presence_penalty_range,
            max_tokens=max_tokens,
        )
        return cls(**config)

    async def generate_complete_output(
        self,
        instruction=None, 
        context=None,
        system=None,
        idx=0,
    ):
        branch = li.Branch(system=system, imodel=self)
        print(f"Generating output for Expert {idx+1}...")
        
        output = await branch.chat(instruction, context)
        if not check_output_completeness(output):
            output += await branch.chat('continue and complete the previous sentence')

        config = self.to_dict()
        print(f"\nLLM Parameters for Expert {idx+1}:")
        print("------------------------")
        print(f"Model: {config['model']}")
        print(f"Max Tokens per chunk: {config['max_tokens']}")
        print(f"Temperature: {config['temperature']}")
        print(f"Top P: {config['top_p']}")
        print(f"Frequency Penalty: {config['frequency_penalty']}")
        print(f"Presence Penalty: {config['presence_penalty']}")
        print("------------------------")
        
        return output
    
    async def select_best_output(
        self, candidates: List[str], instruction=None, context=None, 
        system=None, n_judge=3,
    ):
        outputs = []
        
        system = system or "Act as a critical judge"
        instruction=instruction or "Basing on context, score the model output for relevance and coherence"
        context = {"context": context}
        
        async def inner_score(candidate):
            _context = context.copy()
            _context["candidate"] = candidate
            branch = li.Branch(system=system, imodel=self)
            form = await branch.direct(
                instruction=instruction,
                context = _context,
                score=True,
                score_range=(0, 1),
                score_num_digits=3,
            )
            return form.score
        
        async def get_avg_score(candidate):
            task = [inner_score(candidate) for _ in range(n_judge)]
            return sum(await asyncio.gather(*task)) / n_judge
        
        print("Scoring the outputs...")
        tasks = [get_avg_score(candidate) for candidate in candidates]
        
        scores = await asyncio.gather(*tasks)
        for idx, candidate in enumerate(candidates):
            outputs.append((idx, candidate, scores[idx]))
        
        return sorted(outputs, key=lambda x: x[2], reverse=True)[0]
    
    async def assign_intrinsic_reward(
        self, expert_output=None, instruction=None, context=None, system=None,
    ):
        system = system or "Act as a critical judge"
        instruction=instruction or "Basing on context, select a judgement for the model output"
        
        reward = await li.direct.select(
            instruction=instruction,
            context={"context": context, "candidate": expert_output},
            choices=["highly effective", "effective", "moderate", "poor", "bad"]
        )
        
        match reward.selection:
            case "highly effective":
                return 1.0
            case "effective":
                return 0.7
            case "moderate":
                return 0.5
            case "poor":
                return 0.2
            case "bad":
                return 0.0
            case _:
                return 0.0

In [4]:
class MixtureOfExperts:
    
    def __init__(
        self, 
        num_experts: int = 4, 
        min_iterations: int = 4, 
        learning_rate: float = 0.1, 
        discount_factor: float = 0.99, 
        exploration_rate: float = 0.2, 
        max_tokens: int = 1000,
    ):
        self.num_experts=num_experts
        self.experts = [ExpertModel.random_expert(max_tokens=max_tokens) for _ in range(num_experts)]
        self.gating_model = ExpertModel.random_expert()
        self.reward_model = ExpertModel.random_expert()
        self.min_iterations = min_iterations
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.selection_history = []
        
    async def generate_expert_outputs(
        self, instruction, context, system=None, n_judge=3
    ):          
        tasks = [
            expert.generate_complete_output(instruction, context, system, idx) 
            for idx, expert in enumerate(self.experts)
        ]
        candidates = await asyncio.gather(*tasks)
        
        selected_idx, selected_output, _ = await self.gating_model.select_best_output(
            candidates=candidates, 
            context=context, 
            n_judge=n_judge
        )
        
        intrinsic_reward = await self.reward_model.assign_intrinsic_reward(
            self.experts[selected_idx], selected_output, context=context)
        
        if selected_idx in self.selection_history:
            selected_idx = random.randint(0, self.num_experts - 1)
            
        self.selection_history.append(selected_idx)
        print("Expert output generation complete!")
        logging.info("All expert outputs have been generated and the most relevant expert has been selected.")

        return selected_output, selected_idx, intrinsic_reward

    def check_termination_condition(self, iteration: int, total_reward: float) -> bool:
        if iteration >= self.min_iterations and total_reward >= 10.0:
            return True
        return False

    def update_exploration_rate(self, iteration: int):
        self.exploration_rate = max(0.1, 1.0 - (iteration / self.min_iterations))

    def update_expert_values(self, selected_expert_index: int, reward: float):
        experts_values = [expert.total_reward for expert in self.experts]
        reward = self.learning_rate * (
            reward 
            + self.discount_factor * max(experts_values) 
            - self.experts[selected_expert_index].total_reward
        )
        self.experts[selected_expert_index].total_reward += reward

        
        

In [5]:
context = "Acme Corporation is exploring investment opportunities in emerging technologies. The board seeks insights into which technologies could potentially transform their industry over the next decade."
instruction = "Evaluate the potential impact and investment viability of artificial intelligence (AI), blockchain, quantum computing, and biotechnology."

In [6]:
moe = MixtureOfExperts()
max_tokens = 1000

for iteration in range(moe.min_iterations):
    final_output, selected_expert_index, intrinsic_reward = await moe.generate_expert_outputs(instruction, context)

    print(f"Iteration {iteration+1} - Selected Expert: {selected_expert_index}, Intrinsic Reward: {intrinsic_reward}")
    print("Expert Values:", [expert.total_reward for expert in moe.experts])
    print("Final Expert Output:")
    print(final_output)
    print("------------------------")

    # # Get reward from an external expert
    # expert_reward = float(input(f"Enter expert reward for iteration {iteration+1}: "))

    expert_reward = 1.0

    # Combine intrinsic and expert rewards
    total_reward = intrinsic_reward + expert_reward

    print(f"Expert Reward: {expert_reward}, Total Reward: {total_reward}")

    # Update the value estimate of the selected expert
    moe.update_expert_values(selected_expert_index, total_reward)

    # Update exploration rate based on the current iteration
    moe.update_exploration_rate(iteration)

    # Check termination condition
    if moe.check_termination_condition(iteration, total_reward):
        print("Termination condition met. Stopping the process.")
        break

Generating output for Expert 1...
Generating output for Expert 2...
Generating output for Expert 3...
Generating output for Expert 4...

LLM Parameters for Expert 1:
------------------------
Model: gpt-3.5-turbo
Max Tokens per chunk: 1000
Temperature: 1.0526775742767198
Top P: 0.8959571551577411
Frequency Penalty: 0.39196428409026557
Presence Penalty: 0.2606009125871069
------------------------

LLM Parameters for Expert 4:
------------------------
Model: gpt-3.5-turbo
Max Tokens per chunk: 1000
Temperature: 0.9254375686464608
Top P: 0.8833056011146395
Frequency Penalty: 0.09469047343387788
Presence Penalty: 0.07564385774137677
------------------------

LLM Parameters for Expert 2:
------------------------
Model: gpt-4-turbo
Max Tokens per chunk: 1000
Temperature: 1.0800509942674748
Top P: 0.9816173025798132
Frequency Penalty: 0.3784198737544594
Presence Penalty: 0.4019939531723521
------------------------

LLM Parameters for Expert 3:
------------------------
Model: gpt-4-turbo
Max To

2024-06-06 08:37:13,916 - INFO - All expert outputs have been generated and the most relevant expert has been selected.


Expert output generation complete!
Iteration 1 - Selected Expert: 1, Intrinsic Reward: 1.0
Expert Values: [0, 0, 0, 0]
Final Expert Output:
### 1. Artificial Intelligence (AI)

**Potential Impact:**
- AI is set to revolutionize industries by enhancing automation, data analytics, customer service, and decision-making processes.
- It can lead to significant cost savings and efficiency improvements in manufacturing, healthcare, finance, and more.
- Developments in AI could also lead to the creation of new products and services, as well as improvements in safety and quality of existing offerings.

**Investment Viability:**
- High potential for return on investment due to broad applicability across various sectors.
- Continuous advancements and a growing ecosystem of startups present numerous investment opportunities.
- However, regulatory challenges and ethical considerations need careful management.

### 2. Blockchain

**Potential Impact:**
- Blockchain technology offers immense opportuni

2024-06-06 08:38:00,791 - INFO - All expert outputs have been generated and the most relevant expert has been selected.


Expert output generation complete!
Iteration 2 - Selected Expert: 1, Intrinsic Reward: 1.0
Expert Values: [0, 0.2, 0, 0]
Final Expert Output:
For Acme Corporation, a strategic evaluation of emerging technologies such as artificial intelligence (AI), blockchain, quantum computing, and biotechnology is crucial in determining their investment viability and potential impact. Here’s a detailed analysis to guide decision-making:

### Artificial Intelligence (AI)
**Potential Impact:**
- **Automation and Efficiency:** AI can revolutionize sectors by automating complex processes, reducing costs, and improving efficiency.
- **Data Analysis:** Unprecedented capabilities in processing and interpreting large sets of data can provide insights that drive business innovation.
- **Customer Interaction:** AI-driven tools like chatbots can enhance customer service and engagement.

**Investment Viability:**
- **High Return Potential:** AI applications are broad and growing, suggesting robust growth potent

2024-06-06 08:38:47,208 - INFO - All expert outputs have been generated and the most relevant expert has been selected.


Expert output generation complete!
Iteration 3 - Selected Expert: 2, Intrinsic Reward: 1.0
Expert Values: [0, 0.39980000000000004, 0, 0]
Final Expert Output:
**Evaluation of Investment Viability in Emerging Technologies for Acme Corporation**

To assist Acme Corporation in navigating investment opportunities in emerging technologies, we will assess the impact and investment viability of four key technologies: artificial intelligence (AI), blockchain, quantum computing, and biotechnology. Each of these technologies has the potential to significantly transform industries, including yours, over the next decade.

### Artificial Intelligence (AI)

**Potential Impact:**
- **Automation and Efficiency:** AI can automate routine tasks, enhance efficiencies, and optimize operations in nearly every industry, from manufacturing to services.
- **Data Analysis and Decision Making:** Advanced analytics powered by AI can help companies make better decisions by providing deeper insights into customer b

CancelledError: 