From 84fda071f64f4bc34730dcb41955c53e4fccd53a Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 4 Nov 2025 20:05:19 -0300 Subject: [PATCH] feat: add examples for creating and managing LLM projects and tests - Introduced `create_llm_project_with_tests.py` to demonstrate LLM project creation and various test types including rubric, character length, PII detection, and row count tests. - Added `create_project.py` to showcase project creation for different task types (LLM, tabular classification, regression, and text classification) and listing/filtering projects. - Included detailed print statements for user feedback during execution. --- .../rest-api/create_llm_project_with_tests.py | 413 ++++++++++++++++++ examples/rest-api/create_project.py | 259 +++++++++++ 2 files changed, 672 insertions(+) create mode 100644 examples/rest-api/create_llm_project_with_tests.py create mode 100644 examples/rest-api/create_project.py diff --git a/examples/rest-api/create_llm_project_with_tests.py b/examples/rest-api/create_llm_project_with_tests.py new file mode 100644 index 00000000..80723ea3 --- /dev/null +++ b/examples/rest-api/create_llm_project_with_tests.py @@ -0,0 +1,413 @@ +"""Example demonstrating how to create an LLM project and add tests to it. + +This script shows: +- Creating an LLM project +- Adding various types of tests suitable for LLM monitoring: + - LLM Rubric tests (evaluating quality criteria) + - Character length tests (checking response length) + - PII detection tests (ensuring no personal information leakage) + - Row count tests (monitoring data volume) +- Listing tests to verify creation +- Displaying test details + +Requirements: + - Set OPENLAYER_API_KEY environment variable +""" + +import os +from typing import Any + +from openlayer import Openlayer +from openlayer.types.project_create_response import ProjectCreateResponse +from openlayer.types.projects.test_create_response import TestCreateResponse +from openlayer.types.projects.test_list_response import TestListResponse + + +def create_llm_project(client: Openlayer) -> ProjectCreateResponse: + """Create an LLM-based project. + + Args: + client: Initialized Openlayer client. + + Returns: + ProjectCreateResponse containing project details. + """ + print("\n" + "=" * 70) + print("Step 1: Creating LLM Project") + print("=" * 70) + + project = client.projects.create( + name="CX Chatbot", + task_type="llm-base", + description="LLM project for monitoring customer support chatbot quality and safety", + ) + + print(f"āœ… Created LLM project: {project.name}") + print(f" Project ID: {project.id}") + print(f" Task Type: {project.task_type}") + print(f" App Link: {project.links.app}") + + return project + + +def create_llm_rubric_test(client: Openlayer, project_id: str) -> TestCreateResponse: + """Create an LLM rubric test to evaluate response quality. + + This test uses LLM-based evaluation to check if responses meet quality criteria. + + Args: + client: Initialized Openlayer client. + project_id: The ID of the project to add the test to. + + Returns: + TestCreateResponse containing test details. + """ + print("\nšŸ“ Creating LLM Rubric Test...") + + test = client.projects.tests.create( + project_id=project_id, + name="Response Quality Rubric", + description="Evaluate if chatbot responses are helpful, accurate, and professional", + type="performance", + subtype="llmRubricThresholdV2", + thresholds=[ + { + "insight_name": "llmRubricV2", + "measurement": "passRate", + "operator": ">=", + "value": 0.8, + "threshold_mode": "manual", + "insight_parameters": [ + { + "name": "rubric", + "value": "The response should be helpful, accurate, and maintain a professional tone.", + } + ], + } + ], + uses_ml_model=True, + uses_production_data=False, + uses_reference_dataset=False, + uses_training_dataset=False, + uses_validation_dataset=False, + ) + + print(f"āœ… Created test: {test.name}") + print(f" Test ID: {test.id}") + print(f" Type: {test.type}") + print(f" Subtype: {test.subtype}") + + return test + + +def create_character_length_test(client: Openlayer, project_id: str) -> TestCreateResponse: + """Create a character length test to ensure responses aren't too short or too long. + + Args: + client: Initialized Openlayer client. + project_id: The ID of the project to add the test to. + + Returns: + TestCreateResponse containing test details. + """ + print("\nšŸ“ Creating Character Length Test...") + + test = client.projects.tests.create( + project_id=project_id, + name="Response Length Check", + description="Ensure responses are between 50 and 2000 characters", + type="integrity", + subtype="characterLength", + thresholds=[ + { + "insight_name": "characterLength", + "measurement": "averageCharacterLength", + "operator": ">=", + "value": 50.0, + "threshold_mode": "manual", + "insight_parameters": [{"name": "column_name", "value": "output"}], + }, + { + "insight_name": "characterLength", + "measurement": "averageCharacterLength", + "operator": "<=", + "value": 2000.0, + "threshold_mode": "manual", + "insight_parameters": [{"name": "column_name", "value": "output"}], + }, + ], + uses_ml_model=False, + uses_production_data=False, + uses_reference_dataset=False, + uses_training_dataset=False, + uses_validation_dataset=False, + ) + + print(f"āœ… Created test: {test.name}") + print(f" Test ID: {test.id}") + print(f" Type: {test.type}") + print(f" Subtype: {test.subtype}") + + return test + + +def create_pii_detection_test(client: Openlayer, project_id: str) -> TestCreateResponse: + """Create a PII detection test to ensure no personal information is leaked. + + Args: + client: Initialized Openlayer client. + project_id: The ID of the project to add the test to. + + Returns: + TestCreateResponse containing test details. + """ + print("\nšŸ”’ Creating PII Detection Test...") + + test = client.projects.tests.create( + project_id=project_id, + name="PII Leakage Prevention", + description="Ensure chatbot responses don't contain personal identifiable information", + type="integrity", + subtype="containsPii", + thresholds=[ + { + "insight_name": "containsPii", + "measurement": "piiRowCount", + "operator": "is", + "value": 0.0, + "threshold_mode": "manual", + "insight_parameters": [{"name": "column_name", "value": "output"}], + } + ], + uses_ml_model=False, + uses_production_data=False, + uses_reference_dataset=False, + uses_training_dataset=False, + uses_validation_dataset=False, + ) + + print(f"āœ… Created test: {test.name}") + print(f" Test ID: {test.id}") + print(f" Type: {test.type}") + print(f" Subtype: {test.subtype}") + + return test + + +def create_row_count_test( + client: Openlayer, project_id: str, monitoring: bool = True +) -> TestCreateResponse: + """Create a row count test to monitor data volume. + + Args: + client: Initialized Openlayer client. + project_id: The ID of the project to add the test to. + monitoring: Whether this is a monitoring test (uses production data). + + Returns: + TestCreateResponse containing test details. + """ + print("\nšŸ“Š Creating Row Count Test...") + + test_config: dict[str, Any] = { + "project_id": project_id, + "name": "Minimum Daily Interactions", + "description": "Ensure we have sufficient data for monitoring (at least 100 rows per day)", + "type": "integrity", + "subtype": "rowCount", + "thresholds": [ + { + "insight_name": "metrics", + "measurement": "rowCount", + "operator": ">=", + "value": 100.0, + "threshold_mode": "manual", + } + ], + } + + if monitoring: + # For monitoring mode, add production data parameters + test_config["uses_production_data"] = True + test_config["uses_reference_dataset"] = False + test_config["evaluation_window"] = 86400.0 # 24 hours in seconds + test_config["delay_window"] = 3600.0 # 1 hour in seconds + else: + test_config["uses_production_data"] = False + test_config["uses_reference_dataset"] = False + + # Add common required fields + test_config["uses_ml_model"] = False + test_config["uses_training_dataset"] = False + test_config["uses_validation_dataset"] = False + + test = client.projects.tests.create(**test_config) + + print(f"āœ… Created test: {test.name}") + print(f" Test ID: {test.id}") + print(f" Type: {test.type}") + print(f" Subtype: {test.subtype}") + if monitoring: + print(f" Uses Production Data: Yes") + print(f" Evaluation Window: {test.evaluation_window}s (24 hours)") + + return test + + +def create_sentence_length_test(client: Openlayer, project_id: str) -> TestCreateResponse: + """Create a sentence length test to ensure responses are concise. + + Args: + client: Initialized Openlayer client. + project_id: The ID of the project to add the test to. + + Returns: + TestCreateResponse containing test details. + """ + print("\nšŸ“ Creating Sentence Length Test...") + + test = client.projects.tests.create( + project_id=project_id, + name="Sentence Length Check", + description="Ensure responses have appropriate sentence length (not too verbose)", + type="integrity", + subtype="sentenceLength", + thresholds=[ + { + "insight_name": "sentenceLength", + "measurement": "averageWordsPerSentence", + "operator": "<=", + "value": 30.0, + "threshold_mode": "manual", + "insight_parameters": [{"name": "column_name", "value": "output"}], + } + ], + uses_ml_model=False, + uses_production_data=False, + uses_reference_dataset=False, + uses_training_dataset=False, + uses_validation_dataset=False, + ) + + print(f"āœ… Created test: {test.name}") + print(f" Test ID: {test.id}") + print(f" Type: {test.type}") + print(f" Subtype: {test.subtype}") + + return test + + +def list_project_tests(client: Openlayer, project_id: str) -> TestListResponse: + """List all tests for a project. + + Args: + client: Initialized Openlayer client. + project_id: The ID of the project to list tests for. + + Returns: + TestListResponse containing list of tests. + """ + print("\n" + "=" * 70) + print("Step 2: Listing All Tests") + print("=" * 70) + + tests = client.projects.tests.list(project_id=project_id) + + print(f"\nšŸ“‹ Found {len(tests.items)} test(s) in the project:") + for test in tests.items: + print(f"\n • {test.name}") + print(f" ID: {test.id}") + print(f" Type: {test.type}") + print(f" Subtype: {test.subtype}") + print(f" Description: {test.description}") + + return tests + + +def display_test_summary(tests: TestListResponse) -> None: + """Display a summary of tests by type. + + Args: + tests: TestListResponse containing list of tests. + """ + print("\n" + "=" * 70) + print("Test Summary") + print("=" * 70) + + # Count tests by type + test_types: dict[str, int] = {} + test_subtypes: dict[str, int] = {} + + for test in tests.items: + test_types[test.type] = test_types.get(test.type, 0) + 1 + test_subtypes[test.subtype] = test_subtypes.get(test.subtype, 0) + 1 + + print("\nšŸ“Š Tests by Type:") + for test_type, count in test_types.items(): + print(f" {test_type}: {count}") + + print("\nšŸ” Tests by Subtype:") + for subtype, count in test_subtypes.items(): + print(f" {subtype}: {count}") + + +def main() -> None: + """Main function demonstrating LLM project creation with tests. + + This function demonstrates: + - Creating an LLM project + - Adding various types of tests suitable for LLM monitoring + - Listing and summarizing created tests + """ + # Initialize the Openlayer client + client = Openlayer( + api_key=os.environ.get("OPENLAYER_API_KEY") + ) + + print("=" * 70) + print("Openlayer: Create LLM Project with Tests") + print("=" * 70) + + try: + # Step 1: Create LLM project + project = create_llm_project(client) + + # Step 2: Create various tests for the project + print("\n" + "=" * 70) + print("Step 2: Creating Tests") + print("=" * 70) + + # Create LLM-specific rubric test + create_llm_rubric_test(client, project.id) + + # Create character length test + create_character_length_test(client, project.id) + + # Create PII detection test + create_pii_detection_test(client, project.id) + + # Create sentence length test + create_sentence_length_test(client, project.id) + + # Create row count test (monitoring mode) + create_row_count_test(client, project.id, monitoring=True) + + # Step 3: List all tests + tests = list_project_tests(client, project.id) + + # Step 4: Display summary + display_test_summary(tests) + + print("\n" + "=" * 70) + print("āœ… Successfully created LLM project with tests!") + print("=" * 70) + print(f"\nšŸ”— View your project at: {project.links.app}") + + except Exception as e: + print(f"\nāŒ Error: {e}") + raise + + +if __name__ == "__main__": + main() + diff --git a/examples/rest-api/create_project.py b/examples/rest-api/create_project.py new file mode 100644 index 00000000..2c65ab07 --- /dev/null +++ b/examples/rest-api/create_project.py @@ -0,0 +1,259 @@ +"""Example demonstrating how to create and list projects using the Openlayer SDK. + +This script shows: +- Creating projects with different task types +- Adding optional descriptions +- Listing all projects +- Filtering projects by name and task type +- Handling API responses + +Requirements: + - Set OPENLAYER_API_KEY environment variable +""" + +import os +from typing import Optional + +from openlayer import Openlayer +from openlayer.types.project_create_response import ProjectCreateResponse +from openlayer.types.project_list_response import ProjectListResponse + + +def create_llm_project(client: Openlayer, name: str, description: Optional[str] = None) -> ProjectCreateResponse: + """Create an LLM-based project. + + Args: + client: Initialized Openlayer client. + name: Name for the project. + description: Optional description for the project. + + Returns: + ProjectCreateResponse containing project details. + """ + print(f"\nšŸ“ Creating LLM project: {name}") + project = client.projects.create( + name=name, + task_type="llm-base", + description=description, + ) + print(f"āœ… Created project with ID: {project.id}") + print(f" App link: {project.links.app}") + return project + + +def create_tabular_classification_project( + client: Openlayer, name: str, description: Optional[str] = None +) -> ProjectCreateResponse: + """Create a tabular classification project. + + Args: + client: Initialized Openlayer client. + name: Name for the project. + description: Optional description for the project. + + Returns: + ProjectCreateResponse containing project details. + """ + print(f"\nšŸ“Š Creating tabular classification project: {name}") + project = client.projects.create( + name=name, + task_type="tabular-classification", + description=description, + ) + print(f"āœ… Created project with ID: {project.id}") + print(f" App link: {project.links.app}") + return project + + +def create_tabular_regression_project( + client: Openlayer, name: str, description: Optional[str] = None +) -> ProjectCreateResponse: + """Create a tabular regression project. + + Args: + client: Initialized Openlayer client. + name: Name for the project. + description: Optional description for the project. + + Returns: + ProjectCreateResponse containing project details. + """ + print(f"\nšŸ“ˆ Creating tabular regression project: {name}") + project = client.projects.create( + name=name, + task_type="tabular-regression", + description=description, + ) + print(f"āœ… Created project with ID: {project.id}") + print(f" App link: {project.links.app}") + return project + + +def create_text_classification_project( + client: Openlayer, name: str, description: Optional[str] = None +) -> ProjectCreateResponse: + """Create a text classification project. + + Args: + client: Initialized Openlayer client. + name: Name for the project. + description: Optional description for the project. + + Returns: + ProjectCreateResponse containing project details. + """ + print(f"\nšŸ“„ Creating text classification project: {name}") + project = client.projects.create( + name=name, + task_type="text-classification", + description=description, + ) + print(f"āœ… Created project with ID: {project.id}") + print(f" App link: {project.links.app}") + return project + + +def list_all_projects(client: Openlayer) -> ProjectListResponse: + """List all projects in the workspace. + + Args: + client: Initialized Openlayer client. + + Returns: + ProjectListResponse containing list of projects. + """ + print("\nšŸ“‹ Listing all projects...") + projects = client.projects.list() + print(f" Found {len(projects.items)} project(s)") + for project in projects.items: + print(f" - {project.name} ({project.task_type}) - ID: {project.id}") + return projects + + +def list_projects_by_name(client: Openlayer, name: str) -> ProjectListResponse: + """List projects filtered by name. + + Args: + client: Initialized Openlayer client. + name: Name filter for projects. + + Returns: + ProjectListResponse containing filtered projects. + """ + print(f"\nšŸ” Listing projects with name containing: {name}") + projects = client.projects.list(name=name) + print(f" Found {len(projects.items)} project(s)") + for project in projects.items: + print(f" - {project.name} ({project.task_type}) - ID: {project.id}") + return projects + + +def list_projects_by_task_type(client: Openlayer, task_type: str) -> ProjectListResponse: + """List projects filtered by task type. + + Args: + client: Initialized Openlayer client. + task_type: Task type filter (e.g., 'llm-base', 'tabular-classification'). + + Returns: + ProjectListResponse containing filtered projects. + """ + print(f"\nšŸŽÆ Listing projects with task type: {task_type}") + projects = client.projects.list(task_type=task_type) # type: ignore + print(f" Found {len(projects.items)} project(s)") + for project in projects.items: + print(f" - {project.name} ({project.task_type}) - ID: {project.id}") + return projects + + +def display_project_details(project: ProjectCreateResponse) -> None: + """Display detailed information about a project. + + Args: + project: Project response object. + """ + print(f"\nšŸ“‹ Project Details:") + print(f" ID: {project.id}") + print(f" Name: {project.name}") + print(f" Task Type: {project.task_type}") + print(f" Description: {project.description or 'N/A'}") + print(f" Created: {project.date_created}") + print(f" Updated: {project.date_updated}") + print(f" Workspace ID: {project.workspace_id}") + print(f" Inference Pipelines: {project.inference_pipeline_count}") + print(f" Total Tests: {project.goal_count}") + print(f" Development Tests: {project.development_goal_count}") + print(f" Monitoring Tests: {project.monitoring_goal_count}") + print(f" Versions: {project.version_count}") + print(f" App Link: {project.links.app}") + + +def main() -> None: + """Main function demonstrating project creation and management. + + This function demonstrates: + - Creating projects with all supported task types + - Using optional descriptions + - Listing projects with various filters + - Displaying project details + """ + # Initialize the Openlayer client + client = Openlayer( + api_key=os.environ.get("OPENLAYER_API_KEY"), + ) + + print("=" * 70) + print("Openlayer Project Creation Examples") + print("=" * 70) + + try: + # Example 1: Create an LLM project with description + llm_project = create_llm_project( + client, + name="Customer Support Chatbot", + description="LLM project for monitoring customer support chatbot interactions", + ) + display_project_details(llm_project) + + # Example 2: Create a tabular classification project + classification_project = create_tabular_classification_project( + client, + name="Churn Prediction Model", + description="Predict customer churn based on usage patterns", + ) + + # Example 3: Create a tabular regression project + regression_project = create_tabular_regression_project( + client, + name="Revenue Forecasting", + description="Predict monthly revenue based on historical data", + ) + + # Example 4: Create a text classification project + text_project = create_text_classification_project( + client, + name="Sentiment Analysis", + description="Classify customer feedback sentiment", + ) + + # Example 5: List all projects + list_all_projects(client) + + # Example 6: Filter projects by name + list_projects_by_name(client, name="Customer") + + # Example 7: Filter projects by task type + list_projects_by_task_type(client, task_type="llm-base") + + print("\n" + "=" * 70) + print("āœ… All examples completed successfully!") + print("=" * 70) + + except Exception as e: + print(f"\nāŒ Error: {e}") + raise + + +if __name__ == "__main__": + main() +