diff --git a/img/monitor-dark.png b/img/monitor-dark.png
deleted file mode 100644
index e776c80..0000000
Binary files a/img/monitor-dark.png and /dev/null differ
diff --git a/img/monitor-light.png b/img/monitor-light.png
deleted file mode 100644
index 713cb39..0000000
Binary files a/img/monitor-light.png and /dev/null differ
diff --git a/img/monitor/monitor-filter-dark.png b/img/monitor/monitor-filter-dark.png
new file mode 100644
index 0000000..f14c7be
Binary files /dev/null and b/img/monitor/monitor-filter-dark.png differ
diff --git a/img/monitor/monitor-filter-light.png b/img/monitor/monitor-filter-light.png
new file mode 100644
index 0000000..6b03fb8
Binary files /dev/null and b/img/monitor/monitor-filter-light.png differ
diff --git a/img/monitor/monitor-filter-options-dark.png b/img/monitor/monitor-filter-options-dark.png
new file mode 100644
index 0000000..0fbb638
Binary files /dev/null and b/img/monitor/monitor-filter-options-dark.png differ
diff --git a/img/monitor/monitor-filter-options-light.png b/img/monitor/monitor-filter-options-light.png
new file mode 100644
index 0000000..19d44f6
Binary files /dev/null and b/img/monitor/monitor-filter-options-light.png differ
diff --git a/img/monitor/monitor-json-dark.png b/img/monitor/monitor-json-dark.png
new file mode 100644
index 0000000..ec88218
Binary files /dev/null and b/img/monitor/monitor-json-dark.png differ
diff --git a/img/monitor/monitor-json-light.png b/img/monitor/monitor-json-light.png
new file mode 100644
index 0000000..757d84a
Binary files /dev/null and b/img/monitor/monitor-json-light.png differ
diff --git a/img/monitor/monitor-list-dark.png b/img/monitor/monitor-list-dark.png
new file mode 100644
index 0000000..5247b45
Binary files /dev/null and b/img/monitor/monitor-list-dark.png differ
diff --git a/img/monitor/monitor-list-light.png b/img/monitor/monitor-list-light.png
new file mode 100644
index 0000000..e2d1b3b
Binary files /dev/null and b/img/monitor/monitor-list-light.png differ
diff --git a/img/monitor/monitor-page-buckets-dark.png b/img/monitor/monitor-page-buckets-dark.png
new file mode 100644
index 0000000..1f1759c
Binary files /dev/null and b/img/monitor/monitor-page-buckets-dark.png differ
diff --git a/img/monitor/monitor-page-buckets-light.png b/img/monitor/monitor-page-buckets-light.png
new file mode 100644
index 0000000..b8cfb85
Binary files /dev/null and b/img/monitor/monitor-page-buckets-light.png differ
diff --git a/img/monitor/monitor-page-line-dark.png b/img/monitor/monitor-page-line-dark.png
new file mode 100644
index 0000000..c680618
Binary files /dev/null and b/img/monitor/monitor-page-line-dark.png differ
diff --git a/img/monitor/monitor-page-line-light.png b/img/monitor/monitor-page-line-light.png
new file mode 100644
index 0000000..fd72c48
Binary files /dev/null and b/img/monitor/monitor-page-line-light.png differ
diff --git a/img/monitor/monitor-regex-dark.png b/img/monitor/monitor-regex-dark.png
new file mode 100644
index 0000000..fcceb86
Binary files /dev/null and b/img/monitor/monitor-regex-dark.png differ
diff --git a/img/monitor/monitor-regex-light.png b/img/monitor/monitor-regex-light.png
new file mode 100644
index 0000000..c05e28c
Binary files /dev/null and b/img/monitor/monitor-regex-light.png differ
diff --git a/img/monitor/monitor-settings-dark.png b/img/monitor/monitor-settings-dark.png
new file mode 100644
index 0000000..d8c93cf
Binary files /dev/null and b/img/monitor/monitor-settings-dark.png differ
diff --git a/img/monitor/monitor-settings-light.png b/img/monitor/monitor-settings-light.png
new file mode 100644
index 0000000..302065f
Binary files /dev/null and b/img/monitor/monitor-settings-light.png differ
diff --git a/img/trace-dark.png b/img/trace/trace-dark.png
similarity index 100%
rename from img/trace-dark.png
rename to img/trace/trace-dark.png
diff --git a/img/trace-light.png b/img/trace/trace-light.png
similarity index 100%
rename from img/trace-light.png
rename to img/trace/trace-light.png
diff --git a/mint.json b/mint.json
index 8c8e528..7fc6288 100644
--- a/mint.json
+++ b/mint.json
@@ -144,12 +144,8 @@
"pages": ["hub/getting-started", "hub/configuration"]
},
{
- "group": "Monitoring",
- "pages": ["monitoring/introduction"]
- },
- {
- "group": "Prompt Management",
- "pages": ["prompts/quick-start", "prompts/registry", "prompts/sdk-usage"]
+ "group": "Datasets",
+ "pages": ["datasets/quick-start", "datasets/sdk-usage"]
},
{
"group": "Playgrounds",
@@ -165,10 +161,6 @@
}
]
},
- {
- "group": "Datasets",
- "pages": ["datasets/quick-start", "datasets/sdk-usage"]
- },
{
"group": "Evaluators",
"pages": ["evaluators/intro", "evaluators/custom-evaluator", "evaluators/made-by-traceloop"]
@@ -177,6 +169,14 @@
"group": "Experiments",
"pages": ["experiments/introduction", "experiments/result-overview", "experiments/running-from-code"]
},
+ {
+ "group": "Monitoring",
+ "pages": ["monitoring/introduction", "monitoring/defining-monitors", "monitoring/using-monitors"]
+ },
+ {
+ "group": "Prompt Management",
+ "pages": ["prompts/quick-start", "prompts/registry", "prompts/sdk-usage"]
+ },
{
"group": "Integrations",
"pages": ["integrations/posthog"]
diff --git a/monitoring/defining-monitors.mdx b/monitoring/defining-monitors.mdx
new file mode 100644
index 0000000..9950c58
--- /dev/null
+++ b/monitoring/defining-monitors.mdx
@@ -0,0 +1,104 @@
+---
+title: "Defining Monitors"
+description: "Learn how to create and configure monitors to evaluate your LLM outputs"
+---
+
+Monitors in Traceloop allow you to continuously evaluate your LLM outputs in real time. This guide walks you through the process of creating and configuring monitors for your specific use cases.
+
+## Creating a Monitor
+
+To create a monitor, you need to complete these steps:
+
+
+
+ Connect the SDK to your system and add decorators to your flow. See [OpenLLMetry](/openllmetry/introduction) for setup instructions.
+
+
+ Select the evaluation logic that will run on matching spans. You can define your own custom evaluators or use the pre-built ones by Traceloop. See [Evaluators](/evaluators/intro) for more details.
+
+
+ Set criteria that determine which spans the monitor will evaluate.
+
+
+ Set up how the monitor operates, including sampling rates and other advanced options.
+
+
+
+### Basic Monitor Setup
+
+Navigate to the Monitors page and click the **New** button to open the Evaluator Library. Choose the evaluator you want to run in your monitor.
+Next, you will be able to configure which spans will be monitored.
+
+## Span Filtering
+
+The span filtering modal shows the actual spans from your system, letting you see how your chosen filters apply to real data.
+Add filters by clicking on the + button.
+
+
+
+
+
+
+### Filter Options
+
+- **Environment**: Filter by a specific environment
+- **Workflow Name**: Filter by the workflow name defined in your system
+- **Service Name**: Target spans from specific services or applications
+- **AI Data**: Filter based on LLM-specific attributes like model name, token usage, streaming status, and other AI-related metadata
+- **Attributes**: Filter based on span attributes
+
+
+
+
+
+
+## Monitor Settings
+
+### Map Input
+
+You need to map the appropriate span fields to the evaluator’s input schema.
+This can be done easily by browsing through the available span field options—once you select a field, the real data is immediately displayed so you can see how it maps to the input.
+
+
+
+
+
+
+When the field data is not plain text, you can use JSON key mapping or Regex to extract the specific content you need.
+
+For example, if your content is an array and you want to extract the "text" field from the object:
+
+```json
+[{"type":"text","text":"explain who are you and what can you do in one sentence"}]
+```
+
+You can use JSON key mapping like `0.text` to extract just the text content. The JSON key mapping will be applied to the Preview table, allowing you to see the extracted result in real-time.
+
+
+
+
+
+
+You can use Regex like `text":"(.+?)"` to extract just the text content. The regex will be applied to the Preview table, allowing you to see the extracted result in real-time.
+
+
+
+
+
+
+### Advanced
+You can set a **Rate sample** to control the percentage of spans within the selected filter group that the monitor will run on.
\ No newline at end of file
diff --git a/monitoring/introduction.mdx b/monitoring/introduction.mdx
index 22c9d75..6857e1f 100644
--- a/monitoring/introduction.mdx
+++ b/monitoring/introduction.mdx
@@ -3,39 +3,23 @@ title: "Introduction"
description: "Detect hallucinations and regressions in the quality of your LLMs"
---
-One of the key features of Traceloop is the ability to monitor the quality of your LLM outputs. It helps you to detect hallucinations and regressions in the quality of your models and prompts.
+One of the key features of Traceloop is the ability to monitor the quality of your LLM outputs in **real time**. It helps you to detect hallucinations and regressions in the quality of your models and prompts.
To start monitoring your LLM outputs, make sure you installed OpenLLMetry and configured it to send data to Traceloop. If you haven't done that yet, you can follow the instructions in the [Getting Started](/openllmetry/getting-started) guide.
Next, if you're not using a [supported LLM framework](/openllmetry/tracing/supported#frameworks), [make sure to annotate workflows and tasks](/openllmetry/tracing/annotations).
-You can then define any of the following [monitors](https://app.traceloop.com/monitors/prd) to track the quality of your LLM outputs.
-
-
+
+
-## Semantic Metrics
-- **QA Relevancy:** Asses the relevant of an answer generated by a model with respect to a question. This is especially useful when running RAG pipelines.
-- **Faithfulness:** Checks whether some generated content was inferred or deducted from a given context. Relevant for RAG pipelines, entity extraction, summarization, and many other text-related tasks.
-- **Text Quality:** Evaluates the overall readability and coherence of text.
-- **Grammar Correctness:** Checks for grammatical errors in generated texts.
-- **Redundancy Detection:** Identifies repetitive content.
-- **Focus Assessment:** Measures whether a given paragraph focuses on a single subject or "jumps" between multiple ones.
+## What is a Monitor?
-## Syntactic TextMetrics
+A monitor is an evaluator that runs on a group of defined spans with specific characteristics in real time. For every span that matches the group filter, it will run the evaluator and log the monitor result. This allows you to continuously assess the quality and performance of your LLM outputs as they are generated in production.
-- **Text Length:** Checks if the length of the generated text is within a given range (constant or with respect to an input).
-- **Word Count:** Checks if the number of words in the generated text is within a given range.
+Monitors can use two types of evaluators:
-## Safety Metrics
+- **LLM-as-a-Judge**: uses a large language model to evaluate outputs based on semantic qualities. You can create custom evaluators with this method by writing prompts that capture your own criteria.
+- **Traceloop built in evaluators**: deterministic evaluations for structural validation, safety checks, and syntactic analysis.
-- **PII Detection:** Identifies personally identifiable information in generated texts or input prompts.
-- **Secret Detection:** Identifies secrets and API keys in generated texts or input prompts.
-- **Toxicity Detection:** Identifies toxic content in generated texts or input prompts.
-
-## Structural Metrics
-
-- **Regex Validation**: Ensures that the output of a model matches a given regular expression.
-- **SQL Validation**: Ensures SQL queries are syntactically correct.
-- **JSON Schema Validation**: Ensures that the output of a model matches a given JSON schema.
-- **Code Validation**: Ensures that the output of a model is valid code in a given language.
+All monitors connect to our comprehensive [Evaluators](/evaluators/intro) library, allowing you to choose the right evaluation approach for your specific use case.
diff --git a/monitoring/using-monitors.mdx b/monitoring/using-monitors.mdx
new file mode 100644
index 0000000..773ad2f
--- /dev/null
+++ b/monitoring/using-monitors.mdx
@@ -0,0 +1,79 @@
+---
+title: "Using Monitors"
+description: "Learn how to view, analyze, and act on monitor results in your LLM applications"
+---
+
+Once you've created monitors, Traceloop continuously evaluates your LLM outputs and provides insights into their performance. This guide explains how to interpret and act on monitor results.
+
+## Monitor Dashboard
+
+The Monitor Dashboard provides an overview of all active monitors and their current status.
+It shows each monitor’s health, the number of times it has run, and the most recent execution time.
+
+
+
+
+
+
+
+## Viewing Monitor Results
+
+### Real-time Monitoring
+
+Monitor results are displayed in real-time as your LLM applications generate new spans. You can view:
+
+- **Run Details**: The span value that was evaluated and its result
+- **Trend Analysis**: Performance over time
+- **Volume Metrics**: Number of evaluations performed
+- **Evaluator Output Rates**: Such as success rates for threshold-based evaluators
+
+### Monitor Results Page
+
+Click on any monitor to access its detailed results page. The monitor page provides comprehensive analytics and span-level details.
+
+#### Chart Visualizations
+
+The Monitor page includes multiple chart views to help you analyze your data, and you can switch between chart types using the selector in the top-right corner.
+
+**Line Chart View** - Shows evaluation trends over time:
+
+
+
+
+
+**Bar Chart View** - Displays evaluation results in time buckets:
+
+
+
+
+
+#### Filtering and Time Controls
+
+The top toolbar provides filtering options:
+- **Environment**: Filter by production, staging, etc.
+- **Time Range**: 24h, 7d, 14d, or custom ranges
+- **Metric**: Select which evaluator output property to measure
+- **Bucket Size**: 6h, Hourly, Daily, etc.
+- **Aggregation**: Choose average, median, sum, min, max, or count
+
+#### Matching Spans Table
+
+The bottom section shows all spans that matched your monitor's filter criteria:
+- **Timestamp**: When the evaluation occurred
+- **Input**: The actual content that was mapped to be evaluated
+- **Output**: The evaluation result/score
+- **Completed Runs**: Total successful/error evaluations
+- **Error Runs**: Failed evaluation attempts
+
+Each row includes a link icon to view the full span details in the trace explorer:
+
+
+
+
+
+
+For further information on tracing refer to [OpenLLMetry](/openllmetry/introduction).
+
+
+Ready to set up an evaluator for your monitor? Learn more about creating and configuring evaluators in the [Evaluators](/evaluators/intro) section.
+
diff --git a/openllmetry/integrations/traceloop.mdx b/openllmetry/integrations/traceloop.mdx
index 34e1eeb..05a6140 100644
--- a/openllmetry/integrations/traceloop.mdx
+++ b/openllmetry/integrations/traceloop.mdx
@@ -4,8 +4,8 @@ sidebarTitle: "Traceloop"
---
-
-
+
+
[Traceloop](https://app.traceloop.com) is a platform for observability and evaluation of LLM outputs.
diff --git a/openllmetry/introduction.mdx b/openllmetry/introduction.mdx
index 6b90e29..435e0e7 100644
--- a/openllmetry/introduction.mdx
+++ b/openllmetry/introduction.mdx
@@ -3,8 +3,8 @@ title: "What is OpenLLMetry?"
---
-
-
+
+
OpenLLMetry is an open source project that allows you to easily start monitoring and debugging the execution of your LLM app.