diff --git a/ldai/__init__.py b/ldai/__init__.py index cb7e545..78125d7 100644 --- a/ldai/__init__.py +++ b/ldai/__init__.py @@ -1 +1,41 @@ __version__ = "0.10.1" # x-release-please-version + +# Export main client +# Export chat +from ldai.chat import TrackedChat +from ldai.client import LDAIClient +# Export judge +from ldai.judge import AIJudge +# Export models for convenience +from ldai.models import ( # Deprecated aliases for backward compatibility + AIAgentConfig, AIAgentConfigDefault, AIAgentConfigRequest, AIAgents, + AICompletionConfig, AICompletionConfigDefault, AIConfig, AIJudgeConfig, + AIJudgeConfigDefault, JudgeConfiguration, LDAIAgent, LDAIAgentConfig, + LDAIAgentDefaults, LDMessage, ModelConfig, ProviderConfig) +# Export judge types +from ldai.providers.types import EvalScore, JudgeResponse + +__all__ = [ + 'LDAIClient', + 'AIAgentConfig', + 'AIAgentConfigDefault', + 'AIAgentConfigRequest', + 'AIAgents', + 'AICompletionConfig', + 'AICompletionConfigDefault', + 'AIJudgeConfig', + 'AIJudgeConfigDefault', + 'AIJudge', + 'TrackedChat', + 'EvalScore', + 'JudgeConfiguration', + 'JudgeResponse', + 'LDMessage', + 'ModelConfig', + 'ProviderConfig', + # Deprecated exports + 'AIConfig', + 'LDAIAgent', + 'LDAIAgentConfig', + 'LDAIAgentDefaults', +] diff --git a/ldai/chat/__init__.py b/ldai/chat/__init__.py new file mode 100644 index 0000000..bcb4284 --- /dev/null +++ b/ldai/chat/__init__.py @@ -0,0 +1,188 @@ +"""TrackedChat implementation for managing AI chat conversations.""" + +import asyncio +from typing import Any, Dict, List, Optional + +from ldai.judge import AIJudge +from ldai.models import AICompletionConfig, LDMessage +from ldai.providers.ai_provider import AIProvider +from ldai.providers.types import ChatResponse, JudgeResponse +from ldai.tracker import LDAIConfigTracker + + +class TrackedChat: + """ + Concrete implementation of TrackedChat that provides chat functionality + by delegating to an AIProvider implementation. + + This class handles conversation management and tracking, while delegating + the actual model invocation to the provider. + """ + + def __init__( + self, + ai_config: AICompletionConfig, + tracker: LDAIConfigTracker, + provider: AIProvider, + judges: Optional[Dict[str, AIJudge]] = None, + logger: Optional[Any] = None, + ): + """ + Initialize the TrackedChat. + + :param ai_config: The completion AI configuration + :param tracker: The tracker for the completion configuration + :param provider: The AI provider to use for chat + :param judges: Optional dictionary of judge instances keyed by their configuration keys + :param logger: Optional logger for logging + """ + self._ai_config = ai_config + self._tracker = tracker + self._provider = provider + self._judges = judges or {} + self._logger = logger + self._messages: List[LDMessage] = [] + + async def invoke(self, prompt: str) -> ChatResponse: + """ + Invoke the chat model with a prompt string. + + This method handles conversation management and tracking, delegating to the provider's invoke_model method. + + :param prompt: The user prompt to send to the chat model + :return: ChatResponse containing the model's response and metrics + """ + # Convert prompt string to LDMessage with role 'user' and add to conversation history + user_message: LDMessage = LDMessage(role='user', content=prompt) + self._messages.append(user_message) + + # Prepend config messages to conversation history for model invocation + config_messages = self._ai_config.messages or [] + all_messages = config_messages + self._messages + + # Delegate to provider-specific implementation with tracking + response = await self._tracker.track_metrics_of( + lambda result: result.metrics, + lambda: self._provider.invoke_model(all_messages), + ) + + # Start judge evaluations as async tasks (don't await them) + if ( + self._ai_config.judge_configuration + and self._ai_config.judge_configuration.judges + and len(self._ai_config.judge_configuration.judges) > 0 + ): + evaluation_tasks = self._start_judge_evaluations(self._messages, response) + response.evaluations = evaluation_tasks + + # Add the response message to conversation history + self._messages.append(response.message) + return response + + def _start_judge_evaluations( + self, + messages: List[LDMessage], + response: ChatResponse, + ) -> List[asyncio.Task[Optional[JudgeResponse]]]: + """ + Start judge evaluations as async tasks without awaiting them. + + Returns a list of async tasks that can be awaited later. + + :param messages: Array of messages representing the conversation history + :param response: The AI response to be evaluated + :return: List of async tasks that will return judge evaluation results + """ + if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges: + return [] + + judge_configs = self._ai_config.judge_configuration.judges + + # Start all judge evaluations as tasks + async def evaluate_judge(judge_config): + judge = self._judges.get(judge_config.key) + if not judge: + if self._logger: + self._logger.warn( + f"Judge configuration is not enabled: {judge_config.key}", + ) + return None + + eval_result = await judge.evaluate_messages( + messages, response, judge_config.sampling_rate + ) + + if eval_result and eval_result.success: + self._tracker.track_eval_scores(eval_result.evals) + + return eval_result + + # Create tasks for each judge evaluation + tasks = [ + asyncio.create_task(evaluate_judge(judge_config)) + for judge_config in judge_configs + ] + + return tasks + + def get_config(self) -> AICompletionConfig: + """ + Get the underlying AI configuration used to initialize this TrackedChat. + + :return: The AI completion configuration + """ + return self._ai_config + + def get_tracker(self) -> LDAIConfigTracker: + """ + Get the underlying AI configuration tracker used to initialize this TrackedChat. + + :return: The tracker instance + """ + return self._tracker + + def get_provider(self) -> AIProvider: + """ + Get the underlying AI provider instance. + + This provides direct access to the provider for advanced use cases. + + :return: The AI provider instance + """ + return self._provider + + def get_judges(self) -> Dict[str, AIJudge]: + """ + Get the judges associated with this TrackedChat. + + Returns a dictionary of judge instances keyed by their configuration keys. + + :return: Dictionary of judge instances + """ + return self._judges + + def append_messages(self, messages: List[LDMessage]) -> None: + """ + Append messages to the conversation history. + + Adds messages to the conversation history without invoking the model, + which is useful for managing multi-turn conversations or injecting context. + + :param messages: Array of messages to append to the conversation history + """ + self._messages.extend(messages) + + def get_messages(self, include_config_messages: bool = False) -> List[LDMessage]: + """ + Get all messages in the conversation history. + + :param include_config_messages: Whether to include the config messages from the AIConfig. + Defaults to False. + :return: Array of messages. When include_config_messages is True, returns both config + messages and conversation history with config messages prepended. When False, + returns only the conversation history messages. + """ + if include_config_messages: + config_messages = self._ai_config.messages or [] + return config_messages + self._messages + return list(self._messages) diff --git a/ldai/client.py b/ldai/client.py index a8bd888..086e99b 100644 --- a/ldai/client.py +++ b/ldai/client.py @@ -1,245 +1,308 @@ -from dataclasses import dataclass -from typing import Any, Dict, List, Literal, Optional, Tuple +import logging +from typing import Any, Dict, List, Optional, Tuple import chevron from ldclient import Context from ldclient.client import LDClient +from ldai.chat import TrackedChat +from ldai.judge import AIJudge +from ldai.models import (AIAgentConfig, AIAgentConfigDefault, + AIAgentConfigRequest, AIAgents, AICompletionConfig, + AICompletionConfigDefault, AIJudgeConfig, + AIJudgeConfigDefault, JudgeConfiguration, LDMessage, + ModelConfig, ProviderConfig) +from ldai.providers.ai_provider_factory import (AIProviderFactory, + SupportedAIProvider) from ldai.tracker import LDAIConfigTracker -@dataclass -class LDMessage: - role: Literal['system', 'user', 'assistant'] - content: str +class LDAIClient: + """The LaunchDarkly AI SDK client object.""" + + def __init__(self, client: LDClient): + self._client = client + self._logger = logging.getLogger('ldclient.ai') - def to_dict(self) -> dict: + def completion_config( + self, + key: str, + context: Context, + default_value: AICompletionConfigDefault, + variables: Optional[Dict[str, Any]] = None, + ) -> AICompletionConfig: """ - Render the given message as a dictionary object. + Get the value of a completion configuration. + + :param key: The key of the completion configuration. + :param context: The context to evaluate the completion configuration in. + :param default_value: The default value of the completion configuration. + :param variables: Additional variables for the completion configuration. + :return: The completion configuration with a tracker used for gathering metrics. """ - return { - 'role': self.role, - 'content': self.content, - } + self._client.track('$ld:ai:config:function:single', context, key, 1) + model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate( + key, context, default_value.to_dict(), variables + ) -class ModelConfig: - """ - Configuration related to the model. - """ + config = AICompletionConfig( + enabled=bool(enabled), + model=model, + messages=messages, + provider=provider, + tracker=tracker, + judge_configuration=judge_configuration, + ) - def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None): - """ - :param name: The name of the model. - :param parameters: Additional model-specific parameters. - :param custom: Additional customer provided data. - """ - self._name = name - self._parameters = parameters - self._custom = custom + return config - @property - def name(self) -> str: - """ - The name of the model. + def config( + self, + key: str, + context: Context, + default_value: AICompletionConfigDefault, + variables: Optional[Dict[str, Any]] = None, + ) -> AICompletionConfig: """ - return self._name + Get the value of a model configuration. - def get_parameter(self, key: str) -> Any: - """ - Retrieve model-specific parameters. + .. deprecated:: Use :meth:`completion_config` instead. This method will be removed in a future version. - Accessing a named, typed attribute (e.g. name) will result in the call - being delegated to the appropriate property. + :param key: The key of the model configuration. + :param context: The context to evaluate the model configuration in. + :param default_value: The default value of the model configuration. + :param variables: Additional variables for the model configuration. + :return: The value of the model configuration along with a tracker used for gathering metrics. """ - if key == 'name': - return self.name - - if self._parameters is None: - return None - - return self._parameters.get(key) + return self.completion_config(key, context, default_value, variables) - def get_custom(self, key: str) -> Any: - """ - Retrieve customer provided data. + def judge_config( + self, + key: str, + context: Context, + default_value: AIJudgeConfigDefault, + variables: Optional[Dict[str, Any]] = None, + ) -> AIJudgeConfig: """ - if self._custom is None: - return None - - return self._custom.get(key) + Get the value of a judge configuration. - def to_dict(self) -> dict: - """ - Render the given model config as a dictionary object. + :param key: The key of the judge configuration. + :param context: The context to evaluate the judge configuration in. + :param default_value: The default value of the judge configuration. + :param variables: Additional variables for the judge configuration. + :return: The judge configuration with a tracker used for gathering metrics. """ - return { - 'name': self._name, - 'parameters': self._parameters, - 'custom': self._custom, - } + self._client.track('$ld:ai:judge:function:single', context, key, 1) + model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate( + key, context, default_value.to_dict(), variables + ) -class ProviderConfig: - """ - Configuration related to the provider. - """ + # Extract evaluation_metric_keys from the variation + variation = self._client.variation(key, context, default_value.to_dict()) + evaluation_metric_keys = variation.get('evaluationMetricKeys', default_value.evaluation_metric_keys or []) - def __init__(self, name: str): - self._name = name + config = AIJudgeConfig( + enabled=bool(enabled), + evaluation_metric_keys=evaluation_metric_keys, + model=model, + messages=messages, + provider=provider, + tracker=tracker, + ) - @property - def name(self) -> str: - """ - The name of the provider. - """ - return self._name + return config - def to_dict(self) -> dict: - """ - Render the given provider config as a dictionary object. + async def create_judge( + self, + key: str, + context: Context, + default_value: AIJudgeConfigDefault, + variables: Optional[Dict[str, Any]] = None, + default_ai_provider: Optional[SupportedAIProvider] = None, + ) -> Optional[AIJudge]: """ - return { - 'name': self._name, - } - + Creates and returns a new Judge instance for AI evaluation. -@dataclass(frozen=True) -class AIConfig: - enabled: Optional[bool] = None - model: Optional[ModelConfig] = None - messages: Optional[List[LDMessage]] = None - provider: Optional[ProviderConfig] = None - - def to_dict(self) -> dict: - """ - Render the given default values as an AIConfig-compatible dictionary object. - """ - return { - '_ldMeta': { - 'enabled': self.enabled or False, - }, - 'model': self.model.to_dict() if self.model else None, - 'messages': [message.to_dict() for message in self.messages] if self.messages else None, - 'provider': self.provider.to_dict() if self.provider else None, - } - - -@dataclass(frozen=True) -class LDAIAgent: - """ - Represents an AI agent configuration with instructions and model settings. - - An agent is similar to an AIConfig but focuses on instructions rather than messages, - making it suitable for AI assistant/agent use cases. - """ - enabled: Optional[bool] = None - model: Optional[ModelConfig] = None - provider: Optional[ProviderConfig] = None - instructions: Optional[str] = None - tracker: Optional[LDAIConfigTracker] = None - - def to_dict(self) -> Dict[str, Any]: - """ - Render the given agent as a dictionary object. - """ - result: Dict[str, Any] = { - '_ldMeta': { - 'enabled': self.enabled or False, - }, - 'model': self.model.to_dict() if self.model else None, - 'provider': self.provider.to_dict() if self.provider else None, - } - if self.instructions is not None: - result['instructions'] = self.instructions - return result + :param key: The key identifying the AI judge configuration to use + :param context: Standard Context used when evaluating flags + :param default_value: A default value representing a standard AI config result + :param variables: Dictionary of values for instruction interpolation. + The variables `message_history` and `response_to_evaluate` are reserved for the judge and will be ignored. + :param default_ai_provider: Optional default AI provider to use. + :return: Judge instance or None if disabled/unsupported + Example:: -@dataclass(frozen=True) -class LDAIAgentDefaults: - """ - Default values for AI agent configurations. + judge = client.create_judge( + "relevance-judge", + context, + AIJudgeConfigDefault( + enabled=True, + model=ModelConfig("gpt-4"), + provider=ProviderConfig("openai"), + evaluation_metric_keys=['$ld:ai:judge:relevance'], + messages=[LDMessage(role='system', content='You are a relevance judge.')] + ), + variables={'metric': "relevance"} + ) - Similar to LDAIAgent but without tracker and with optional enabled field, - used as fallback values when agent configurations are not available. - """ - enabled: Optional[bool] = None - model: Optional[ModelConfig] = None - provider: Optional[ProviderConfig] = None - instructions: Optional[str] = None + if judge: + result = await judge.evaluate("User question", "AI response") + if result and result.evals: + relevance_eval = result.evals.get('$ld:ai:judge:relevance') + if relevance_eval: + print('Relevance score:', relevance_eval.score) + """ + self._client.track('$ld:ai:judge:function:createJudge', context, key, 1) + + try: + # Warn if reserved variables are provided + if variables: + if 'message_history' in variables: + # Note: Python doesn't have a logger on the client, but we could add one + pass # Would log warning if logger available + if 'response_to_evaluate' in variables: + pass # Would log warning if logger available + + # Overwrite reserved variables to ensure they remain as placeholders for judge evaluation + extended_variables = dict(variables) if variables else {} + extended_variables['message_history'] = '{{message_history}}' + extended_variables['response_to_evaluate'] = '{{response_to_evaluate}}' + + judge_config = self.judge_config(key, context, default_value, extended_variables) + + if not judge_config.enabled or not judge_config.tracker: + # Would log info if logger available + return None + + # Create AI provider for the judge + provider = await AIProviderFactory.create(judge_config, self._logger, default_ai_provider) + if not provider: + return None + + return AIJudge(judge_config, judge_config.tracker, provider, self._logger) + except Exception as error: + # Would log error if logger available + return None - def to_dict(self) -> Dict[str, Any]: - """ - Render the given agent defaults as a dictionary object. + async def _initialize_judges( + self, + judge_configs: List[JudgeConfiguration.Judge], + context: Context, + variables: Optional[Dict[str, Any]] = None, + default_ai_provider: Optional[SupportedAIProvider] = None, + ) -> Dict[str, AIJudge]: """ - result: Dict[str, Any] = { - '_ldMeta': { - 'enabled': self.enabled or False, - }, - 'model': self.model.to_dict() if self.model else None, - 'provider': self.provider.to_dict() if self.provider else None, - } - if self.instructions is not None: - result['instructions'] = self.instructions - return result - + Initialize judges from judge configurations. -@dataclass -class LDAIAgentConfig: - """ - Configuration for individual agent in batch requests. + :param judge_configs: List of judge configurations + :param context: Standard Context used when evaluating flags + :param variables: Dictionary of values for instruction interpolation + :param default_ai_provider: Optional default AI provider to use + :return: Dictionary of judge instances keyed by their configuration keys + """ + judges: Dict[str, AIJudge] = {} - Combines agent key with its specific default configuration and variables. - """ - key: str - default_value: LDAIAgentDefaults - variables: Optional[Dict[str, Any]] = None + async def create_judge_for_config(judge_key: str): + judge = await self.create_judge( + judge_key, + context, + AIJudgeConfigDefault(enabled=False), + variables, + default_ai_provider, + ) + return judge_key, judge + judge_promises = [ + create_judge_for_config(judge_config.key) + for judge_config in judge_configs + ] -# Type alias for multiple agents -LDAIAgents = Dict[str, LDAIAgent] + import asyncio + results = await asyncio.gather(*judge_promises, return_exceptions=True) + for result in results: + if isinstance(result, Exception): + continue + judge_key, judge = result # type: ignore[misc] + if judge: + judges[judge_key] = judge -class LDAIClient: - """The LaunchDarkly AI SDK client object.""" + return judges - def __init__(self, client: LDClient): - self._client = client - - def config( + async def create_chat( self, key: str, context: Context, - default_value: AIConfig, + default_value: AICompletionConfigDefault, variables: Optional[Dict[str, Any]] = None, - ) -> Tuple[AIConfig, LDAIConfigTracker]: + default_ai_provider: Optional[SupportedAIProvider] = None, + ) -> Optional[TrackedChat]: """ - Get the value of a model configuration. + Creates and returns a new TrackedChat instance for AI chat conversations. - :param key: The key of the model configuration. - :param context: The context to evaluate the model configuration in. - :param default_value: The default value of the model configuration. - :param variables: Additional variables for the model configuration. - :return: The value of the model configuration along with a tracker used for gathering metrics. + :param key: The key identifying the AI completion configuration to use + :param context: Standard Context used when evaluating flags + :param default_value: A default value representing a standard AI config result + :param variables: Dictionary of values for instruction interpolation + :param default_ai_provider: Optional default AI provider to use + :return: TrackedChat instance or None if disabled/unsupported + + Example:: + + chat = await client.create_chat( + "customer-support-chat", + context, + AICompletionConfigDefault( + enabled=True, + model=ModelConfig("gpt-4"), + provider=ProviderConfig("openai"), + messages=[LDMessage(role='system', content='You are a helpful assistant.')] + ), + variables={'customerName': 'John'} + ) + + if chat: + response = await chat.invoke("I need help with my order") + print(response.message.content) + + # Access conversation history + messages = chat.get_messages() + print(f"Conversation has {len(messages)} messages") """ - self._client.track('$ld:ai:config:function:single', context, key, 1) + self._client.track('$ld:ai:config:function:createChat', context, key, 1) + if self._logger: + self._logger.debug(f"Creating chat for key: {key}") + config = self.completion_config(key, context, default_value, variables) - model, provider, messages, instructions, tracker, enabled = self.__evaluate(key, context, default_value.to_dict(), variables) + if not config.enabled or not config.tracker: + # Would log info if logger available + return None - config = AIConfig( - enabled=bool(enabled), - model=model, - messages=messages, - provider=provider, - ) + provider = await AIProviderFactory.create(config, self._logger, default_ai_provider) + if not provider: + return None - return config, tracker + judges = {} + if config.judge_configuration and config.judge_configuration.judges: + judges = await self._initialize_judges( + config.judge_configuration.judges, + context, + variables, + default_ai_provider, + ) - def agent( + return TrackedChat(config, config.tracker, provider, judges, self._logger) + + def agent_config( self, - config: LDAIAgentConfig, + key: str, context: Context, - ) -> LDAIAgent: + default_value: AIAgentConfigDefault, + variables: Optional[Dict[str, Any]] = None, + ) -> AIAgentConfig: """ Retrieve a single AI Config agent. @@ -248,39 +311,58 @@ def agent( Example:: - agent = client.agent(LDAIAgentConfig( - key='research_agent', - default_value=LDAIAgentDefaults( + agent = client.agent_config( + 'research_agent', + context, + AIAgentConfigDefault( enabled=True, model=ModelConfig('gpt-4'), instructions="You are a research assistant specializing in {{topic}}." ), variables={'topic': 'climate change'} - ), context) + ) if agent.enabled: research_result = agent.instructions # Interpolated instructions agent.tracker.track_success() - :param config: The agent configuration to use. + :param key: The agent configuration key. :param context: The context to evaluate the agent configuration in. - :return: Configured LDAIAgent instance. + :param default_value: Default agent values. + :param variables: Variables for interpolation. + :return: Configured AIAgentConfig instance. """ # Track single agent usage self._client.track( "$ld:ai:agent:function:single", context, - config.key, + key, 1 ) - return self.__evaluate_agent(config.key, context, config.default_value, config.variables) + return self.__evaluate_agent(key, context, default_value, variables) - def agents( + def agent( + self, + config: AIAgentConfigRequest, + context: Context, + ) -> AIAgentConfig: + """ + Retrieve a single AI Config agent. + + .. deprecated:: Use :meth:`agent_config` instead. This method will be removed in a future version. + + :param config: The agent configuration to use. + :param context: The context to evaluate the agent configuration in. + :return: Configured AIAgentConfig instance. + """ + return self.agent_config(config.key, context, config.default_value, config.variables) + + def agent_configs( self, - agent_configs: List[LDAIAgentConfig], + agent_configs: List[AIAgentConfigRequest], context: Context, - ) -> LDAIAgents: + ) -> AIAgents: """ Retrieve multiple AI agent configurations. @@ -290,18 +372,18 @@ def agents( Example:: - agents = client.agents([ - LDAIAgentConfig( + agents = client.agent_configs([ + AIAgentConfigRequest( key='research_agent', - default_value=LDAIAgentDefaults( + default_value=AIAgentConfigDefault( enabled=True, instructions='You are a research assistant.' ), variables={'topic': 'climate change'} ), - LDAIAgentConfig( + AIAgentConfigRequest( key='writing_agent', - default_value=LDAIAgentDefaults( + default_value=AIAgentConfigDefault( enabled=True, instructions='You are a writing assistant.' ), @@ -314,7 +396,7 @@ def agents( :param agent_configs: List of agent configurations to retrieve. :param context: The context to evaluate the agent configurations in. - :return: Dictionary mapping agent keys to their LDAIAgent configurations. + :return: Dictionary mapping agent keys to their AIAgentConfig configurations. """ # Track multiple agents usage agent_count = len(agent_configs) @@ -325,7 +407,7 @@ def agents( agent_count ) - result: LDAIAgents = {} + result: AIAgents = {} for config in agent_configs: agent = self.__evaluate_agent( @@ -338,13 +420,29 @@ def agents( return result + def agents( + self, + agent_configs: List[AIAgentConfigRequest], + context: Context, + ) -> AIAgents: + """ + Retrieve multiple AI agent configurations. + + .. deprecated:: Use :meth:`agent_configs` instead. This method will be removed in a future version. + + :param agent_configs: List of agent configurations to retrieve. + :param context: The context to evaluate the agent configurations in. + :return: Dictionary mapping agent keys to their AIAgentConfig configurations. + """ + return self.agent_configs(agent_configs, context) + def __evaluate( self, key: str, context: Context, default_dict: Dict[str, Any], variables: Optional[Dict[str, Any]] = None, - ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool]: + ) -> Tuple[Optional[ModelConfig], Optional[ProviderConfig], Optional[List[LDMessage]], Optional[str], LDAIConfigTracker, bool, Optional[Any]]: """ Internal method to evaluate a configuration and extract components. @@ -411,15 +509,31 @@ def __evaluate( enabled = variation.get('_ldMeta', {}).get('enabled', False) - return model, provider_config, messages, instructions, tracker, enabled + # Extract judge configuration + judge_configuration = None + if 'judgeConfiguration' in variation and isinstance(variation['judgeConfiguration'], dict): + judge_config = variation['judgeConfiguration'] + if 'judges' in judge_config and isinstance(judge_config['judges'], list): + judges = [ + JudgeConfiguration.Judge( + key=judge['key'], + sampling_rate=judge['samplingRate'] + ) + for judge in judge_config['judges'] + if isinstance(judge, dict) and 'key' in judge and 'samplingRate' in judge + ] + if judges: + judge_configuration = JudgeConfiguration(judges=judges) + + return model, provider_config, messages, instructions, tracker, enabled, judge_configuration def __evaluate_agent( self, key: str, context: Context, - default_value: LDAIAgentDefaults, + default_value: AIAgentConfigDefault, variables: Optional[Dict[str, Any]] = None, - ) -> LDAIAgent: + ) -> AIAgentConfig: """ Internal method to evaluate an agent configuration. @@ -427,21 +541,22 @@ def __evaluate_agent( :param context: The evaluation context. :param default_value: Default agent values. :param variables: Variables for interpolation. - :return: Configured LDAIAgent instance. + :return: Configured AIAgentConfig instance. """ - model, provider, messages, instructions, tracker, enabled = self.__evaluate( + model, provider, messages, instructions, tracker, enabled, judge_configuration = self.__evaluate( key, context, default_value.to_dict(), variables ) # For agents, prioritize instructions over messages final_instructions = instructions if instructions is not None else default_value.instructions - return LDAIAgent( - enabled=bool(enabled) if enabled is not None else default_value.enabled, + return AIAgentConfig( + enabled=bool(enabled) if enabled is not None else (default_value.enabled or False), model=model or default_value.model, provider=provider or default_value.provider, instructions=final_instructions, tracker=tracker, + judge_configuration=judge_configuration or default_value.judge_configuration, ) def __interpolate_template(self, template: str, variables: Dict[str, Any]) -> str: diff --git a/ldai/judge/__init__.py b/ldai/judge/__init__.py new file mode 100644 index 0000000..3caad65 --- /dev/null +++ b/ldai/judge/__init__.py @@ -0,0 +1,230 @@ +"""Judge implementation for AI evaluation.""" + +import random +from typing import Any, Dict, Optional + +import chevron + +from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder +from ldai.models import AIJudgeConfig, LDMessage +from ldai.providers.ai_provider import AIProvider +from ldai.providers.types import (ChatResponse, EvalScore, JudgeResponse, + StructuredResponse) +from ldai.tracker import LDAIConfigTracker + + +class AIJudge: + """ + Judge implementation that handles evaluation functionality and conversation management. + + According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate + other AI Configs using structured output. + """ + + def __init__( + self, + ai_config: AIJudgeConfig, + ai_config_tracker: LDAIConfigTracker, + ai_provider: AIProvider, + logger: Optional[Any] = None, + ): + """ + Initialize the Judge. + + :param ai_config: The judge AI configuration + :param ai_config_tracker: The tracker for the judge configuration + :param ai_provider: The AI provider to use for evaluation + :param logger: Optional logger for logging + """ + self._ai_config = ai_config + self._ai_config_tracker = ai_config_tracker + self._ai_provider = ai_provider + self._logger = logger + self._evaluation_response_structure = EvaluationSchemaBuilder.build( + ai_config.evaluation_metric_keys + ) + + async def evaluate( + self, + input_text: str, + output_text: str, + sampling_rate: float = 1.0, + ) -> Optional[JudgeResponse]: + """ + Evaluates an AI response using the judge's configuration. + + :param input_text: The input prompt or question that was provided to the AI + :param output_text: The AI-generated response to be evaluated + :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1) + :return: Evaluation results or None if not sampled + """ + try: + if not self._ai_config.evaluation_metric_keys or len(self._ai_config.evaluation_metric_keys) == 0: + if self._logger: + self._logger.warn( + 'Judge configuration is missing required evaluationMetricKeys' + ) + return None + + if not self._ai_config.messages: + if self._logger: + self._logger.warn('Judge configuration must include messages') + return None + + if random.random() > sampling_rate: + if self._logger: + self._logger.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}') + return None + + messages = self._construct_evaluation_messages(input_text, output_text) + + # Track metrics of the structured model invocation + response = await self._ai_config_tracker.track_metrics_of( + lambda result: result.metrics, + lambda: self._ai_provider.invoke_structured_model(messages, self._evaluation_response_structure) + ) + + success = response.metrics.success + + evals = self._parse_evaluation_response(response.data) + + if len(evals) != len(self._ai_config.evaluation_metric_keys): + if self._logger: + self._logger.warn('Judge evaluation did not return all evaluations') + success = False + + return JudgeResponse( + evals=evals, + success=success, + ) + except Exception as error: + if self._logger: + self._logger.error(f'Judge evaluation failed: {error}') + return JudgeResponse( + evals={}, + success=False, + error=str(error) if isinstance(error, Exception) else 'Unknown error', + ) + + async def evaluate_messages( + self, + messages: list[LDMessage], + response: ChatResponse, + sampling_ratio: float = 1.0, + ) -> Optional[JudgeResponse]: + """ + Evaluates an AI response from chat messages and response. + + :param messages: Array of messages representing the conversation history + :param response: The AI response to be evaluated + :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1) + :return: Evaluation results or None if not sampled + """ + input_text = '\r\n'.join([msg.content for msg in messages]) if messages else '' + output_text = response.message.content + + return await self.evaluate(input_text, output_text, sampling_ratio) + + def get_ai_config(self) -> AIJudgeConfig: + """ + Returns the AI Config used by this judge. + + :return: The judge AI configuration + """ + return self._ai_config + + def get_tracker(self) -> LDAIConfigTracker: + """ + Returns the tracker associated with this judge. + + :return: The tracker for the judge configuration + """ + return self._ai_config_tracker + + def get_provider(self) -> AIProvider: + """ + Returns the AI provider used by this judge. + + :return: The AI provider + """ + return self._ai_provider + + def _construct_evaluation_messages(self, input_text: str, output_text: str) -> list[LDMessage]: + """ + Constructs evaluation messages by combining judge's config messages with input/output. + + :param input_text: The input text + :param output_text: The output text to evaluate + :return: List of messages for evaluation + """ + if not self._ai_config.messages: + return [] + + messages: list[LDMessage] = [] + for msg in self._ai_config.messages: + # Interpolate message content with reserved variables + content = self._interpolate_message(msg.content, { + 'message_history': input_text, + 'response_to_evaluate': output_text, + }) + messages.append(LDMessage(role=msg.role, content=content)) + + return messages + + def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str: + """ + Interpolates message content with variables using Mustache templating. + + :param content: The message content template + :param variables: Variables to interpolate + :return: Interpolated message content + """ + # Use chevron (Mustache) for templating, with no escaping + return chevron.render(content, variables) + + def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]: + """ + Parses the structured evaluation response from the AI provider. + + :param data: The structured response data + :return: Dictionary of evaluation scores keyed by metric key + """ + results: Dict[str, EvalScore] = {} + + if not data.get('evaluations') or not isinstance(data['evaluations'], dict): + if self._logger: + self._logger.warn('Invalid response: missing or invalid evaluations object') + return results + + evaluations = data['evaluations'] + + for metric_key in self._ai_config.evaluation_metric_keys: + evaluation = evaluations.get(metric_key) + + if not evaluation or not isinstance(evaluation, dict): + if self._logger: + self._logger.warn(f'Missing evaluation for metric key: {metric_key}') + continue + + score = evaluation.get('score') + reasoning = evaluation.get('reasoning') + + if not isinstance(score, (int, float)) or score < 0 or score > 1: + if self._logger: + self._logger.warn( + f'Invalid score evaluated for {metric_key}: {score}. ' + 'Score must be a number between 0 and 1 inclusive' + ) + continue + + if not isinstance(reasoning, str): + if self._logger: + self._logger.warn( + f'Invalid reasoning evaluated for {metric_key}: {reasoning}. ' + 'Reasoning must be a string' + ) + continue + + results[metric_key] = EvalScore(score=float(score), reasoning=reasoning) + + return results diff --git a/ldai/judge/evaluation_schema_builder.py b/ldai/judge/evaluation_schema_builder.py new file mode 100644 index 0000000..c996f08 --- /dev/null +++ b/ldai/judge/evaluation_schema_builder.py @@ -0,0 +1,74 @@ +"""Internal class for building dynamic evaluation response schemas.""" + +from typing import Any, Dict + + +class EvaluationSchemaBuilder: + """ + Internal class for building dynamic evaluation response schemas. + Not exported - only used internally by Judge. + """ + + @staticmethod + def build(evaluation_metric_keys: list[str]) -> Dict[str, Any]: + """ + Build an evaluation response schema from evaluation metric keys. + + :param evaluation_metric_keys: List of evaluation metric keys + :return: Schema dictionary for structured output + """ + return { + 'title': 'EvaluationResponse', + 'description': f"Response containing evaluation results for {', '.join(evaluation_metric_keys)} metrics", + 'type': 'object', + 'properties': { + 'evaluations': { + 'type': 'object', + 'description': f"Object containing evaluation results for {', '.join(evaluation_metric_keys)} metrics", + 'properties': EvaluationSchemaBuilder._build_key_properties(evaluation_metric_keys), + 'required': evaluation_metric_keys, + 'additionalProperties': False, + }, + }, + 'required': ['evaluations'], + 'additionalProperties': False, + } + + @staticmethod + def _build_key_properties(evaluation_metric_keys: list[str]) -> Dict[str, Any]: + """ + Build properties for each evaluation metric key. + + :param evaluation_metric_keys: List of evaluation metric keys + :return: Dictionary of properties for each key + """ + result: Dict[str, Any] = {} + for key in evaluation_metric_keys: + result[key] = EvaluationSchemaBuilder._build_key_schema(key) + return result + + @staticmethod + def _build_key_schema(key: str) -> Dict[str, Any]: + """ + Build schema for a single evaluation metric key. + + :param key: Evaluation metric key + :return: Schema dictionary for the key + """ + return { + 'type': 'object', + 'properties': { + 'score': { + 'type': 'number', + 'minimum': 0, + 'maximum': 1, + 'description': f'Score between 0.0 and 1.0 for {key}', + }, + 'reasoning': { + 'type': 'string', + 'description': f'Reasoning behind the score for {key}', + }, + }, + 'required': ['score', 'reasoning'], + 'additionalProperties': False, + } diff --git a/ldai/models.py b/ldai/models.py new file mode 100644 index 0000000..c075dcf --- /dev/null +++ b/ldai/models.py @@ -0,0 +1,357 @@ +import warnings +from dataclasses import dataclass, field +from typing import Any, Dict, List, Literal, Optional, Union + +from ldai.tracker import LDAIConfigTracker + + +@dataclass +class LDMessage: + role: Literal['system', 'user', 'assistant'] + content: str + + def to_dict(self) -> dict: + """ + Render the given message as a dictionary object. + """ + return { + 'role': self.role, + 'content': self.content, + } + + +class ModelConfig: + """ + Configuration related to the model. + """ + + def __init__(self, name: str, parameters: Optional[Dict[str, Any]] = None, custom: Optional[Dict[str, Any]] = None): + """ + :param name: The name of the model. + :param parameters: Additional model-specific parameters. + :param custom: Additional customer provided data. + """ + self._name = name + self._parameters = parameters + self._custom = custom + + @property + def name(self) -> str: + """ + The name of the model. + """ + return self._name + + def get_parameter(self, key: str) -> Any: + """ + Retrieve model-specific parameters. + + Accessing a named, typed attribute (e.g. name) will result in the call + being delegated to the appropriate property. + """ + if key == 'name': + return self.name + + if self._parameters is None: + return None + + return self._parameters.get(key) + + def get_custom(self, key: str) -> Any: + """ + Retrieve customer provided data. + """ + if self._custom is None: + return None + + return self._custom.get(key) + + def to_dict(self) -> dict: + """ + Render the given model config as a dictionary object. + """ + return { + 'name': self._name, + 'parameters': self._parameters, + 'custom': self._custom, + } + + +class ProviderConfig: + """ + Configuration related to the provider. + """ + + def __init__(self, name: str): + self._name = name + + @property + def name(self) -> str: + """ + The name of the provider. + """ + return self._name + + def to_dict(self) -> dict: + """ + Render the given provider config as a dictionary object. + """ + return { + 'name': self._name, + } + + +# ============================================================================ +# Judge Types +# ============================================================================ + +@dataclass(frozen=True) +class JudgeConfiguration: + """ + Configuration for judge attachment to AI Configs. + """ + + @dataclass(frozen=True) + class Judge: + """ + Configuration for a single judge attachment. + """ + key: str + sampling_rate: float + + def to_dict(self) -> dict: + """ + Render the judge as a dictionary object. + """ + return { + 'key': self.key, + 'samplingRate': self.sampling_rate, + } + + judges: List['JudgeConfiguration.Judge'] + + def to_dict(self) -> dict: + """ + Render the judge configuration as a dictionary object. + """ + return { + 'judges': [judge.to_dict() for judge in self.judges], + } + + +# ============================================================================ +# Base AI Config Types +# ============================================================================ + +@dataclass(frozen=True) +class AIConfigDefault: + """ + Base AI Config interface for default implementations with optional enabled property. + """ + enabled: Optional[bool] = None + model: Optional[ModelConfig] = None + provider: Optional[ProviderConfig] = None + + def _base_to_dict(self) -> Dict[str, Any]: + """ + Render the base config fields as a dictionary object. + """ + return { + '_ldMeta': { + 'enabled': self.enabled or False, + }, + 'model': self.model.to_dict() if self.model else None, + 'provider': self.provider.to_dict() if self.provider else None, + } + + +@dataclass(frozen=True) +class AIConfig: + """ + Base AI Config interface without mode-specific fields. + """ + enabled: bool + model: Optional[ModelConfig] = None + provider: Optional[ProviderConfig] = None + tracker: Optional[LDAIConfigTracker] = None + + def _base_to_dict(self) -> Dict[str, Any]: + """ + Render the base config fields as a dictionary object. + """ + return { + '_ldMeta': { + 'enabled': self.enabled, + }, + 'model': self.model.to_dict() if self.model else None, + 'provider': self.provider.to_dict() if self.provider else None, + } + + +# ============================================================================ +# Completion Config Types +# ============================================================================ + +@dataclass(frozen=True) +class AICompletionConfigDefault(AIConfigDefault): + """ + Default Completion AI Config (default mode). + """ + messages: Optional[List[LDMessage]] = None + judge_configuration: Optional[JudgeConfiguration] = None + + def to_dict(self) -> dict: + """ + Render the given default values as an AICompletionConfigDefault-compatible dictionary object. + """ + result = self._base_to_dict() + result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None + if self.judge_configuration is not None: + result['judgeConfiguration'] = self.judge_configuration.to_dict() + return result + + +@dataclass(frozen=True) +class AICompletionConfig(AIConfig): + """ + Completion AI Config (default mode). + """ + messages: Optional[List[LDMessage]] = None + judge_configuration: Optional[JudgeConfiguration] = None + + def to_dict(self) -> dict: + """ + Render the given completion config as a dictionary object. + """ + result = self._base_to_dict() + result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None + if self.judge_configuration is not None: + result['judgeConfiguration'] = self.judge_configuration.to_dict() + return result + + +# ============================================================================ +# Agent Config Types +# ============================================================================ + +@dataclass(frozen=True) +class AIAgentConfigDefault(AIConfigDefault): + """ + Default Agent-specific AI Config with instructions. + """ + instructions: Optional[str] = None + judge_configuration: Optional[JudgeConfiguration] = None + + def to_dict(self) -> Dict[str, Any]: + """ + Render the given agent config default as a dictionary object. + """ + result = self._base_to_dict() + if self.instructions is not None: + result['instructions'] = self.instructions + if self.judge_configuration is not None: + result['judgeConfiguration'] = self.judge_configuration.to_dict() + return result + + +@dataclass(frozen=True) +class AIAgentConfig(AIConfig): + """ + Agent-specific AI Config with instructions. + """ + instructions: Optional[str] = None + judge_configuration: Optional[JudgeConfiguration] = None + + def to_dict(self) -> Dict[str, Any]: + """ + Render the given agent config as a dictionary object. + """ + result = self._base_to_dict() + if self.instructions is not None: + result['instructions'] = self.instructions + if self.judge_configuration is not None: + result['judgeConfiguration'] = self.judge_configuration.to_dict() + return result + + +# ============================================================================ +# Judge Config Types +# ============================================================================ + +@dataclass(frozen=True) +class AIJudgeConfigDefault(AIConfigDefault): + """ + Default Judge-specific AI Config with required evaluation metric key. + """ + messages: Optional[List[LDMessage]] = None + evaluation_metric_keys: Optional[List[str]] = None + + def to_dict(self) -> dict: + """ + Render the given judge config default as a dictionary object. + """ + result = self._base_to_dict() + result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None + if self.evaluation_metric_keys is not None: + result['evaluationMetricKeys'] = self.evaluation_metric_keys + return result + + +@dataclass(frozen=True) +class AIJudgeConfig(AIConfig): + """ + Judge-specific AI Config with required evaluation metric key. + """ + evaluation_metric_keys: List[str] = field(default_factory=list) + messages: Optional[List[LDMessage]] = None + + def to_dict(self) -> dict: + """ + Render the given judge config as a dictionary object. + """ + result = self._base_to_dict() + result['evaluationMetricKeys'] = self.evaluation_metric_keys + result['messages'] = [message.to_dict() for message in self.messages] if self.messages else None + return result + + +# ============================================================================ +# Agent Request Config +# ============================================================================ + +@dataclass +class AIAgentConfigRequest: + """ + Configuration for a single agent request. + + Combines agent key with its specific default configuration and variables. + """ + key: str + default_value: AIAgentConfigDefault + variables: Optional[Dict[str, Any]] = None + + +# Type alias for multiple agents +AIAgents = Dict[str, AIAgentConfig] + +# Type alias for all AI Config variants +AIConfigKind = Union[AIAgentConfig, AICompletionConfig, AIJudgeConfig] + + +# ============================================================================ +# Deprecated Type Aliases for Backward Compatibility +# ============================================================================ + +# Note: AIConfig is now defined above as a base class (line 169). +# For backward compatibility, code should migrate to: +# - Use AICompletionConfigDefault for default/input values +# - Use AICompletionConfig for return values + +# Deprecated: Use AIAgentConfigDefault instead +LDAIAgentDefaults = AIAgentConfigDefault + +# Deprecated: Use AIAgentConfigRequest instead +LDAIAgentConfig = AIAgentConfigRequest + +# Deprecated: Use AIAgentConfig instead (note: this was the old return type) +LDAIAgent = AIAgentConfig diff --git a/ldai/providers/__init__.py b/ldai/providers/__init__.py new file mode 100644 index 0000000..48152cc --- /dev/null +++ b/ldai/providers/__init__.py @@ -0,0 +1,28 @@ +"""AI Provider interfaces and factory for LaunchDarkly AI SDK.""" + +from ldai.providers.ai_provider import AIProvider +from ldai.providers.ai_provider_factory import (AIProviderFactory, + SupportedAIProvider) + +# Export LangChain provider if available +# TODO: Uncomment when langchain provider package is introduced +# try: +# from ldai.providers.langchain import LangChainProvider +# __all__ = [ +# 'AIProvider', +# 'AIProviderFactory', +# 'LangChainProvider', +# 'SupportedAIProvider', +# ] +# except ImportError: +# __all__ = [ +# 'AIProvider', +# 'AIProviderFactory', +# 'SupportedAIProvider', +# ] + +__all__ = [ + 'AIProvider', + 'AIProviderFactory', + 'SupportedAIProvider', +] diff --git a/ldai/providers/ai_provider.py b/ldai/providers/ai_provider.py new file mode 100644 index 0000000..cc7b21e --- /dev/null +++ b/ldai/providers/ai_provider.py @@ -0,0 +1,95 @@ +"""Abstract base class for AI providers.""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Union + +from ldai.models import AIConfigKind, LDMessage +from ldai.providers.types import ChatResponse, StructuredResponse + + +class AIProvider(ABC): + """ + Abstract base class for AI providers that implement chat model functionality. + + This class provides the contract that all provider implementations must follow + to integrate with LaunchDarkly's tracking and configuration capabilities. + + Following the AICHAT spec recommendation to use base classes with non-abstract methods + for better extensibility and backwards compatibility. + """ + + def __init__(self, logger: Optional[Any] = None): + """ + Initialize the AI provider. + + :param logger: Optional logger for logging provider operations. + """ + self.logger = logger + + async def invoke_model(self, messages: List[LDMessage]) -> ChatResponse: + """ + Invoke the chat model with an array of messages. + + This method should convert messages to provider format, invoke the model, + and return a ChatResponse with the result and metrics. + + Default implementation takes no action and returns a placeholder response. + Provider implementations should override this method. + + :param messages: Array of LDMessage objects representing the conversation + :return: ChatResponse containing the model's response + """ + if self.logger: + self.logger.warn('invokeModel not implemented by this provider') + + from ldai.models import LDMessage + from ldai.providers.types import LDAIMetrics + + return ChatResponse( + message=LDMessage(role='assistant', content=''), + metrics=LDAIMetrics(success=False, usage=None), + ) + + async def invoke_structured_model( + self, + messages: List[LDMessage], + response_structure: Dict[str, Any], + ) -> StructuredResponse: + """ + Invoke the chat model with structured output support. + + This method should convert messages to provider format, invoke the model with + structured output configuration, and return a structured response. + + Default implementation takes no action and returns a placeholder response. + Provider implementations should override this method. + + :param messages: Array of LDMessage objects representing the conversation + :param response_structure: Dictionary of output configurations keyed by output name + :return: StructuredResponse containing the structured data + """ + if self.logger: + self.logger.warn('invokeStructuredModel not implemented by this provider') + + from ldai.providers.types import LDAIMetrics + + return StructuredResponse( + data={}, + raw_response='', + metrics=LDAIMetrics(success=False, usage=None), + ) + + @staticmethod + @abstractmethod + async def create(ai_config: AIConfigKind, logger: Optional[Any] = None) -> 'AIProvider': + """ + Static method that constructs an instance of the provider. + + Each provider implementation must provide their own static create method + that accepts an AIConfigKind and returns a configured instance. + + :param ai_config: The LaunchDarkly AI configuration + :param logger: Optional logger for the provider + :return: Configured provider instance + """ + raise NotImplementedError('Provider implementations must override the static create method') diff --git a/ldai/providers/ai_provider_factory.py b/ldai/providers/ai_provider_factory.py new file mode 100644 index 0000000..3fd0f50 --- /dev/null +++ b/ldai/providers/ai_provider_factory.py @@ -0,0 +1,171 @@ +"""Factory for creating AIProvider instances based on the provider configuration.""" + +import importlib +from typing import Any, Dict, List, Literal, Optional, Tuple, Type + +from ldai.models import AIConfigKind +from ldai.providers.ai_provider import AIProvider + +# List of supported AI providers +SUPPORTED_AI_PROVIDERS: List[str] = [ + # Multi-provider packages should be last in the list + # 'langchain', # TODO: Uncomment when langchain provider package is introduced +] + +# Type representing the supported AI providers +# TODO: Update this type when provider packages are introduced +# SupportedAIProvider = Literal['langchain'] +SupportedAIProvider = Literal['none'] # Placeholder until providers are added + + +class AIProviderFactory: + """ + Factory for creating AIProvider instances based on the provider configuration. + """ + + @staticmethod + async def create( + ai_config: AIConfigKind, + logger: Optional[Any] = None, + default_ai_provider: Optional[SupportedAIProvider] = None, + ) -> Optional[AIProvider]: + """ + Create an AIProvider instance based on the AI configuration. + + This method attempts to load provider-specific implementations dynamically. + Returns None if the provider is not supported. + + :param ai_config: The AI configuration + :param logger: Optional logger for logging provider initialization + :param default_ai_provider: Optional default AI provider to use + :return: AIProvider instance or None if not supported + """ + provider_name = ai_config.provider.name.lower() if ai_config.provider else None + # Determine which providers to try based on default_ai_provider + providers_to_try = AIProviderFactory._get_providers_to_try(default_ai_provider, provider_name) + + # Try each provider in order + for provider_type in providers_to_try: + provider = await AIProviderFactory._try_create_provider(provider_type, ai_config, logger) + if provider: + return provider + + # If no provider was successfully created, log a warning + if logger: + logger.warn( + f"Provider is not supported or failed to initialize: {provider_name or 'unknown'}" + ) + return None + + @staticmethod + def _get_providers_to_try( + default_ai_provider: Optional[SupportedAIProvider], + provider_name: Optional[str], + ) -> List[SupportedAIProvider]: + """ + Determine which providers to try based on default_ai_provider and provider_name. + + :param default_ai_provider: Optional default provider to use + :param provider_name: Optional provider name from config + :return: List of providers to try in order + """ + # If default_ai_provider is set, only try that specific provider + if default_ai_provider: + return [default_ai_provider] + + # If no default_ai_provider is set, try all providers in order + provider_set = set() + + # First try the specific provider if it's supported + if provider_name and provider_name in SUPPORTED_AI_PROVIDERS: + provider_set.add(provider_name) # type: ignore + + # Then try multi-provider packages, but avoid duplicates + # TODO: Uncomment when langchain provider package is introduced + # multi_provider_packages: List[SupportedAIProvider] = ['langchain'] + # for provider in multi_provider_packages: + # provider_set.add(provider) + + # Return list of providers, converting from set + # The set contains strings that should be valid SupportedAIProvider values + return list(provider_set) # type: ignore[arg-type] + + @staticmethod + async def _try_create_provider( + provider_type: SupportedAIProvider, + ai_config: AIConfigKind, + logger: Optional[Any] = None, + ) -> Optional[AIProvider]: + """ + Try to create a provider of the specified type. + + :param provider_type: Type of provider to create + :param ai_config: AI configuration + :param logger: Optional logger + :return: AIProvider instance or None if creation failed + """ + # Handle built-in providers (part of this package) + # TODO: Uncomment when langchain provider package is introduced + # if provider_type == 'langchain': + # try: + # from ldai.providers.langchain import LangChainProvider + # return await LangChainProvider.create(ai_config, logger) + # except ImportError as error: + # if logger: + # logger.warn( + # f"Error creating LangChainProvider: {error}. " + # f"Make sure langchain and langchain-core packages are installed." + # ) + # return None + + # For future external providers, use dynamic import + provider_mappings: Dict[str, Tuple[str, str]] = { + # 'openai': ('launchdarkly_server_sdk_ai_openai', 'OpenAIProvider'), + # 'vercel': ('launchdarkly_server_sdk_ai_vercel', 'VercelProvider'), + } + + if provider_type not in provider_mappings: + return None + + package_name, provider_class_name = provider_mappings[provider_type] + return await AIProviderFactory._create_provider( + package_name, provider_class_name, ai_config, logger + ) + + @staticmethod + async def _create_provider( + package_name: str, + provider_class_name: str, + ai_config: AIConfigKind, + logger: Optional[Any] = None, + ) -> Optional[AIProvider]: + """ + Create a provider instance dynamically. + + :param package_name: Name of the package containing the provider + :param provider_class_name: Name of the provider class + :param ai_config: AI configuration + :param logger: Optional logger + :return: AIProvider instance or None if creation failed + """ + try: + # Try to dynamically import the provider + # This will work if the package is installed + module = importlib.import_module(package_name) + provider_class: Type[AIProvider] = getattr(module, provider_class_name) + + provider = await provider_class.create(ai_config, logger) + if logger: + logger.debug( + f"Successfully created AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} " + f"with package {package_name}" + ) + return provider + except (ImportError, AttributeError, Exception) as error: + # If the provider is not available or creation fails, return None + if logger: + logger.warn( + f"Error creating AIProvider for: {ai_config.provider.name if ai_config.provider else 'unknown'} " + f"with package {package_name}: {error}" + ) + return None diff --git a/ldai/providers/types.py b/ldai/providers/types.py new file mode 100644 index 0000000..de54698 --- /dev/null +++ b/ldai/providers/types.py @@ -0,0 +1,91 @@ +"""Types for AI provider responses.""" + +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +from ldai.models import LDMessage +from ldai.tracker import TokenUsage + + +@dataclass +class LDAIMetrics: + """ + Metrics information for AI operations that includes success status and token usage. + """ + success: bool + usage: Optional[TokenUsage] = None + + def to_dict(self) -> Dict[str, Any]: + """ + Render the metrics as a dictionary object. + """ + result: Dict[str, Any] = { + 'success': self.success, + } + if self.usage is not None: + result['usage'] = { + 'total': self.usage.total, + 'input': self.usage.input, + 'output': self.usage.output, + } + return result + + +@dataclass +class ChatResponse: + """ + Chat response structure. + """ + message: LDMessage + metrics: LDAIMetrics + evaluations: Optional[List[Any]] = None # List of JudgeResponse, will be populated later + + +@dataclass +class StructuredResponse: + """ + Structured response from AI models. + """ + data: Dict[str, Any] + raw_response: str + metrics: LDAIMetrics + + +@dataclass +class EvalScore: + """ + Score and reasoning for a single evaluation metric. + """ + score: float # Score between 0.0 and 1.0 + reasoning: str # Reasoning behind the provided score + + def to_dict(self) -> Dict[str, Any]: + """ + Render the evaluation score as a dictionary object. + """ + return { + 'score': self.score, + 'reasoning': self.reasoning, + } + + +@dataclass +class JudgeResponse: + """ + Response from a judge evaluation containing scores and reasoning for multiple metrics. + """ + evals: Dict[str, EvalScore] # Dictionary where keys are metric names and values contain score and reasoning + success: bool # Whether the evaluation completed successfully + error: Optional[str] = None # Error message if evaluation failed + + def to_dict(self) -> Dict[str, Any]: + """ + Render the judge response as a dictionary object. + """ + result: Dict[str, Any] = { + 'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()}, + 'success': self.success, + } + if self.error is not None: + result['error'] = self.error + return result diff --git a/ldai/testing/test_agents.py b/ldai/testing/test_agents.py index b2e80c0..755f2e5 100644 --- a/ldai/testing/test_agents.py +++ b/ldai/testing/test_agents.py @@ -2,8 +2,8 @@ from ldclient import Config, Context, LDClient from ldclient.integrations.test_data import TestData -from ldai.client import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient, - ModelConfig, ProviderConfig) +from ldai import (LDAIAgentConfig, LDAIAgentDefaults, LDAIClient, ModelConfig, + ProviderConfig) @pytest.fixture diff --git a/ldai/testing/test_model_config.py b/ldai/testing/test_model_config.py index 1ffc033..26a02c9 100644 --- a/ldai/testing/test_model_config.py +++ b/ldai/testing/test_model_config.py @@ -2,7 +2,8 @@ from ldclient import Config, Context, LDClient from ldclient.integrations.test_data import TestData -from ldai.client import AIConfig, LDAIClient, LDMessage, ModelConfig +from ldai import LDAIClient, LDMessage, ModelConfig +from ldai.models import AICompletionConfigDefault @pytest.fixture @@ -133,14 +134,14 @@ def test_model_config_handles_custom(): def test_uses_default_on_invalid_flag(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig( + default_value = AICompletionConfigDefault( enabled=True, model=ModelConfig('fakeModel', parameters={'temperature': 0.5, 'maxTokens': 4096}), messages=[LDMessage(role='system', content='Hello, {{name}}!')], ) variables = {'name': 'World'} - config, _ = ldai_client.config('missing-flag', context, default_value, variables) + config = ldai_client.config('missing-flag', context, default_value, variables) assert config.messages is not None assert len(config.messages) > 0 @@ -155,14 +156,14 @@ def test_uses_default_on_invalid_flag(ldai_client: LDAIClient): def test_model_config_interpolation(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig( + default_value = AICompletionConfigDefault( enabled=True, model=ModelConfig('fakeModel'), messages=[LDMessage(role='system', content='Hello, {{name}}!')], ) variables = {'name': 'World'} - config, _ = ldai_client.config('model-config', context, default_value, variables) + config = ldai_client.config('model-config', context, default_value, variables) assert config.messages is not None assert len(config.messages) > 0 @@ -177,9 +178,9 @@ def test_model_config_interpolation(ldai_client: LDAIClient): def test_model_config_no_variables(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) - config, _ = ldai_client.config('model-config', context, default_value, {}) + config = ldai_client.config('model-config', context, default_value, {}) assert config.messages is not None assert len(config.messages) > 0 @@ -194,10 +195,10 @@ def test_model_config_no_variables(ldai_client: LDAIClient): def test_provider_config_handling(ldai_client: LDAIClient): context = Context.builder('user-key').name("Sandy").build() - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World'} - config, _ = ldai_client.config('model-config', context, default_value, variables) + config = ldai_client.config('model-config', context, default_value, variables) assert config.provider is not None assert config.provider.name == 'fakeProvider' @@ -205,10 +206,10 @@ def test_provider_config_handling(ldai_client: LDAIClient): def test_context_interpolation(ldai_client: LDAIClient): context = Context.builder('user-key').name("Sandy").set('last', 'Beaches').build() - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World'} - config, _ = ldai_client.config( + config = ldai_client.config( 'ctx-interpolation', context, default_value, variables ) @@ -228,10 +229,10 @@ def test_multi_context_interpolation(ldai_client: LDAIClient): user_context = Context.builder('user-key').name("Sandy").build() org_context = Context.builder('org-key').kind('org').name("LaunchDarkly").set('shortname', 'LD').build() context = Context.multi_builder().add(user_context).add(org_context).build() - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World'} - config, _ = ldai_client.config( + config = ldai_client.config( 'multi-ctx-interpolation', context, default_value, variables ) @@ -249,10 +250,10 @@ def test_multi_context_interpolation(ldai_client: LDAIClient): def test_model_config_multiple(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=True, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=True, model=ModelConfig('fake-model'), messages=[]) variables = {'name': 'World', 'day': 'Monday'} - config, _ = ldai_client.config( + config = ldai_client.config( 'multiple-messages', context, default_value, variables ) @@ -270,9 +271,9 @@ def test_model_config_multiple(ldai_client: LDAIClient): def test_model_config_disabled(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[]) - config, _ = ldai_client.config('off-config', context, default_value, {}) + config = ldai_client.config('off-config', context, default_value, {}) assert config.model is not None assert config.enabled is False @@ -283,9 +284,9 @@ def test_model_config_disabled(ldai_client: LDAIClient): def test_model_initial_config_disabled(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[]) - config, _ = ldai_client.config('initial-config-disabled', context, default_value, {}) + config = ldai_client.config('initial-config-disabled', context, default_value, {}) assert config.enabled is False assert config.model is None @@ -295,9 +296,9 @@ def test_model_initial_config_disabled(ldai_client: LDAIClient): def test_model_initial_config_enabled(ldai_client: LDAIClient): context = Context.create('user-key') - default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[]) - config, _ = ldai_client.config('initial-config-enabled', context, default_value, {}) + config = ldai_client.config('initial-config-enabled', context, default_value, {}) assert config.enabled is True assert config.model is None @@ -318,9 +319,9 @@ def test_config_method_tracking(ldai_client: LDAIClient): client = LDAIClient(mock_client) context = Context.create('user-key') - default_value = AIConfig(enabled=False, model=ModelConfig('fake-model'), messages=[]) + default_value = AICompletionConfigDefault(enabled=False, model=ModelConfig('fake-model'), messages=[]) - config, tracker = client.config('test-config-key', context, default_value) + config = client.config('test-config-key', context, default_value) mock_client.track.assert_called_once_with( '$ld:ai:config:function:single', diff --git a/ldai/testing/test_tracker.py b/ldai/testing/test_tracker.py index 19c8161..2e39d98 100644 --- a/ldai/testing/test_tracker.py +++ b/ldai/testing/test_tracker.py @@ -276,7 +276,8 @@ def test_tracks_bedrock_metrics_with_error(client: LDClient): assert tracker.get_summary().usage == TokenUsage(330, 220, 110) -def test_tracks_openai_metrics(client: LDClient): +@pytest.mark.asyncio +async def test_tracks_openai_metrics(client: LDClient): context = Context.create("user-key") tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context) @@ -292,7 +293,10 @@ def to_dict(self): "completion_tokens": 110, } - tracker.track_openai_metrics(lambda: Result()) + async def get_result(): + return Result() + + await tracker.track_openai_metrics(get_result) calls = [ call( @@ -326,15 +330,16 @@ def to_dict(self): assert tracker.get_summary().usage == TokenUsage(330, 220, 110) -def test_tracks_openai_metrics_with_exception(client: LDClient): +@pytest.mark.asyncio +async def test_tracks_openai_metrics_with_exception(client: LDClient): context = Context.create("user-key") tracker = LDAIConfigTracker(client, "variation-key", "config-key", 3, "fakeModel", "fakeProvider", context) - def raise_exception(): + async def raise_exception(): raise ValueError("Something went wrong") try: - tracker.track_openai_metrics(raise_exception) + await tracker.track_openai_metrics(raise_exception) assert False, "Should have thrown an exception" except ValueError: pass diff --git a/ldai/tracker.py b/ldai/tracker.py index a049952..11b846a 100644 --- a/ldai/tracker.py +++ b/ldai/tracker.py @@ -1,7 +1,7 @@ import time from dataclasses import dataclass from enum import Enum -from typing import Dict, Optional +from typing import Any, Dict, Optional from ldclient import Context, LDClient @@ -144,7 +144,7 @@ def track_duration_of(self, func): An exception occurring during the execution of the function will still track the duration. The exception will be re-thrown. - :param func: Function to track. + :param func: Function to track (synchronous only). :return: Result of the tracked function. """ start_time = time.time() @@ -157,6 +157,90 @@ def track_duration_of(self, func): return result + async def track_metrics_of(self, metrics_extractor, func): + """ + Track metrics for a generic AI operation. + + This function will track the duration of the operation, extract metrics using the provided + metrics extractor function, and track success or error status accordingly. + + If the provided function throws, then this method will also throw. + In the case the provided function throws, this function will record the duration and an error. + A failed operation will not have any token usage data. + + :param metrics_extractor: Function that extracts LDAIMetrics from the operation result + :param func: Async function which executes the operation + :return: The result of the operation + """ + start_time = time.time() + result = None + try: + result = await func() + except Exception as err: + end_time = time.time() + duration = int((end_time - start_time) * 1000) + self.track_duration(duration) + self.track_error() + raise err + + # Track duration after successful call + end_time = time.time() + duration = int((end_time - start_time) * 1000) + self.track_duration(duration) + + # Extract metrics after successful AI call + from ldai.providers.types import LDAIMetrics + metrics = metrics_extractor(result) + + # Track success/error based on metrics + if metrics.success: + self.track_success() + else: + self.track_error() + + # Track token usage if available + if metrics.usage: + self.track_tokens(metrics.usage) + + return result + + def track_eval_scores(self, scores: Dict[str, Any]) -> None: + """ + Track evaluation scores for multiple metrics. + + :param scores: Dictionary mapping metric keys to their evaluation scores (EvalScore objects) + """ + from ldai.providers.types import EvalScore + + # Track each evaluation score individually + for metric_key, eval_score in scores.items(): + if isinstance(eval_score, EvalScore): + self._ld_client.track( + metric_key, + self._context, + self.__get_track_data(), + eval_score.score + ) + + def track_judge_response(self, judge_response: Any) -> None: + """ + Track a judge response, including evaluation scores and success status. + + :param judge_response: JudgeResponse object containing evals and success status + """ + from ldai.providers.types import JudgeResponse + + if isinstance(judge_response, JudgeResponse): + # Track evaluation scores + if judge_response.evals: + self.track_eval_scores(judge_response.evals) + + # Track success/error based on judge response + if judge_response.success: + self.track_success() + else: + self.track_error() + def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None: """ Track user feedback for an AI operation. @@ -197,7 +281,7 @@ def track_error(self) -> None: "$ld:ai:generation:error", self._context, self.__get_track_data(), 1 ) - def track_openai_metrics(self, func): + async def track_openai_metrics(self, func): """ Track OpenAI-specific operations. @@ -211,15 +295,22 @@ def track_openai_metrics(self, func): A failed operation will not have any token usage data. - :param func: Function to track. + :param func: Async function to track. :return: Result of the tracked function. """ + start_time = time.time() try: - result = self.track_duration_of(func) + result = await func() + end_time = time.time() + duration = int((end_time - start_time) * 1000) + self.track_duration(duration) self.track_success() if hasattr(result, "usage") and hasattr(result.usage, "to_dict"): self.track_tokens(_openai_to_token_usage(result.usage.to_dict())) except Exception: + end_time = time.time() + duration = int((end_time - start_time) * 1000) + self.track_duration(duration) self.track_error() raise diff --git a/pyproject.toml b/pyproject.toml index 200215c..9c1f44a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ chevron = "=0.14.0" pytest = ">=2.8" pytest-cov = ">=2.4.0" pytest-mypy = "==1.0.1" +pytest-asyncio = ">=0.21.0" mypy = "==1.18.2" pycodestyle = "^2.12.1" isort = ">=5.13.2,<7.0.0" diff --git a/setup.cfg b/setup.cfg index c178190..1fb1827 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,2 @@ [pycodestyle] -ignore = E501 +ignore = E501,W503