In [9]:
# ============================== #
# Diversity Control Framework Implementation
# ============================== #

# ------------------------------
# 1. Setup and Imports
# ------------------------------

# Install required libraries (uncomment if not already installed)
# !pip install torch openai sentence-transformers scikit-learn pyyaml

import os
import math
from collections import Counter
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import openai
import time
import json
import yaml  # Required for loading secrets

def load_secrets(file_path="../supporting_files/secrets.yaml"):
    """
    Load API keys from the secrets file.
    Raises an error if the file is missing or improperly formatted.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Secrets file not found at: {file_path}")
    try:
        with open(file_path, 'r') as file:
            return yaml.safe_load(file)
    except Exception as e:
        raise RuntimeError(f"Failed to load secrets file: {e}")

# Load secrets
secrets = load_secrets()

# ------------------------------
# 2. Configuration
# ------------------------------

# Pull the OpenAI API key from the secrets file
openai.api_key = secrets['openai_api_key']

# Initialize the sentence transformer model for SVS
model = SentenceTransformer('all-MiniLM-L6-v2')

# Define target entropy for diversity
TARGET_ENTROPY = 1.5  # Example value

# ------------------------------
# 3. Diversity Metrics Implementation
# ------------------------------

def calculate_entropy(responses):
    """
    Calculates Entropy Diversity (ED) of the given responses.
    
    Parameters:
    - responses (list of str): List of response texts.
    
    Returns:
    - float: Entropy value.
    """
    count = Counter(responses)
    total = len(responses)
    entropy = -sum((freq/total) * math.log(freq/total) for freq in count.values())
    return entropy

def calculate_svs(responses):
    """
    Calculates Semantic Variation Score (SVS) of the given responses.
    
    Parameters:
    - responses (list of str): List of response texts.
    
    Returns:
    - float: SVS value.
    """
    if len(responses) < 2:
        return 0
    embeddings = model.encode(responses)
    n = len(embeddings)
    similarity = cosine_similarity(embeddings)
    total_distance = 0
    count = 0
    for i in range(n):
        for j in range(i+1, n):
            distance = 1 - similarity[i][j]
            total_distance += distance
            count += 1
    return total_distance / count if count != 0 else 0

# ------------------------------
# 4. Control Algorithms Development
# ------------------------------

def dynamic_diversity_control(current_entropy, target_entropy, adjustment_factor=0.1):
    """
    Adjusts diversity level based on current entropy compared to target entropy.
    
    Parameters:
    - current_entropy (float): Current entropy value.
    - target_entropy (float): Desired target entropy.
    - adjustment_factor (float): Step size for adjustment.
    
    Returns:
    - float: Updated diversity level.
    """
    if current_entropy < target_entropy:
        new_diversity = min(current_entropy + adjustment_factor, target_entropy)
    else:
        new_diversity = max(current_entropy - adjustment_factor, target_entropy)
    return new_diversity

def context_aware_diversity_regulation(conversation_context, user_profile=None):
    """
    Determines diversity level based on conversation context and user profile.
    
    Parameters:
    - conversation_context (str): Current user input.
    - user_profile (dict, optional): User profile data.
    
    Returns:
    - float: Diversity level between 0 and 1.
    """
    topic_length = len(conversation_context.split())
    if topic_length < 10:
        return 0.2  # Low diversity for short topics
    elif topic_length < 50:
        return 0.5  # Medium diversity
    return 0.7  # High diversity for long topics

# ------------------------------
# 5. Integration with LLM (GPT-4)
# ------------------------------

def generate_response(user_content, diversity_level):
    """
    Generates a response from the LLM with controlled diversity.
    
    Parameters:
    - user_content (str): The user input content.
    - diversity_level (float): A value between 0 and 1 indicating desired diversity.
    
    Returns:
    - str: The generated response from the LLM.
    """
    try:
        response = openai.chat.completions.create(
            model="gpt-4o",  # Replace with "text-davinci-003" if GPT-4 is unavailable
            messages=[
                {"role": "user", "content": user_content}
            ],
            max_tokens=150,
            temperature=diversity_level,  # Temperature controls diversity
            top_p=0.95,
            frequency_penalty=0.0,
            presence_penalty=0.0
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating response: {e}")
        return ""

# ------------------------------
# 6. User Engagement Metrics Tracking
# ------------------------------

class EngagementMetrics:
    def __init__(self):
        self.response_lengths = []
        self.user_response_times = []
        self.interaction_durations = []
        self.repeat_interaction_count = 0
        self.total_interactions = 0

    def log_response_length(self, response):
        self.response_lengths.append(len(response.split()))

    def log_user_response_time(self, response_time):
        self.user_response_times.append(response_time)

    def log_interaction_duration(self, duration):
        self.interaction_durations.append(duration)

    def log_repeat_interaction(self):
        self.repeat_interaction_count += 1

    def log_interaction(self):
        self.total_interactions += 1

    def get_metrics(self):
        return {
            "Average Response Length": sum(self.response_lengths) / len(self.response_lengths) if self.response_lengths else 0,
            "Average User Response Time (s)": sum(self.user_response_times) / len(self.user_response_times) if self.user_response_times else 0,
            "Average Interaction Duration (s)": sum(self.interaction_durations) / len(self.interaction_durations) if self.interaction_durations else 0,
            "Repeat Interaction Rate": self.repeat_interaction_count / self.total_interactions if self.total_interactions else 0
        }

# ------------------------------
# 7. Sample Dataset Description
# ------------------------------

sample_conversations = [
    {"user": "Hi there! Can you tell me about renewable energy?", "context": ""},
    {"user": "I'm feeling down today. Any advice?", "context": ""},
    {"user": "Explain the theory of relativity.", "context": ""}
]

# ------------------------------
# 8. Running the Diversity Control Framework
# ------------------------------

metrics = EngagementMetrics()
all_responses = []

for convo in sample_conversations:
    user_input = convo["user"]
    diversity_level = context_aware_diversity_regulation(user_input)
    response = generate_response(user_input, diversity_level)
    print(f"User: {user_input}\nAgent: {response}\n")
    metrics.log_response_length(response)
    all_responses.append(response)

print("Engagement Metrics:")
print(metrics.get_metrics())

User: Hi there! Can you tell me about renewable energy?
Agent: Hello! Renewable energy refers to energy that is generated from natural resources that are replenished on a human timescale, such as sunlight, wind, rain, tides, waves, and geothermal heat. Unlike fossil fuels, which are finite and emit greenhouse gases when burned, renewable energy sources are generally more sustainable and environmentally friendly. Here are some of the main types of renewable energy:

1. **Solar Energy**: This involves capturing energy from the sun using solar panels or other technologies. Solar energy can be used for electricity generation, heating, and even powering vehicles.

2. **Wind Energy**: Wind turbines convert the kinetic energy from wind into mechanical power, which can then be converted into electricity. Wind farms can be located onshore or offshore.

3

User: I'm feeling down today. Any advice?
Agent: I'm sorry to hear that you're feeling down. Here are a few suggestions that might help lift 