Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cmd/thv-operator/api/v1alpha1/mcpserver_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,13 @@ type OpenTelemetryConfig struct {
// +optional
Insecure bool `json:"insecure,omitempty"`

// UsageAnalyticsEnabled controls whether anonymous usage analytics are sent to Stacklok
// When true, anonymous tool call metrics are sent to Stacklok's collector for product analytics
// When false, no usage analytics are collected. This setting is independent of the Endpoint above.
// +kubebuilder:default=true
// +optional
UsageAnalyticsEnabled *bool `json:"usageAnalyticsEnabled,omitempty"`

// Metrics defines OpenTelemetry metrics-specific configuration
// +optional
Metrics *OpenTelemetryMetricsConfig `json:"metrics,omitempty"`
Expand Down
126 changes: 126 additions & 0 deletions cmd/thv-operator/controllers/mcpserver_analytics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package controllers

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1"
"github.com/stacklok/toolhive/pkg/registry"
"github.com/stacklok/toolhive/pkg/runner"
)

func TestAddTelemetryConfigOptions_UsageAnalytics(t *testing.T) {
t.Parallel()
tests := []struct {
name string
telemetryConfig *v1alpha1.TelemetryConfig
expectedAnalytics *bool // nil means not explicitly set
}{
{
name: "usage analytics explicitly enabled",
telemetryConfig: &v1alpha1.TelemetryConfig{
OpenTelemetry: &v1alpha1.OpenTelemetryConfig{
Enabled: true,
Endpoint: "otel-collector:4317",
UsageAnalyticsEnabled: boolPtr(true),
},
},
expectedAnalytics: boolPtr(true),
},
{
name: "usage analytics explicitly disabled",
telemetryConfig: &v1alpha1.TelemetryConfig{
OpenTelemetry: &v1alpha1.OpenTelemetryConfig{
Enabled: true,
Endpoint: "otel-collector:4317",
UsageAnalyticsEnabled: boolPtr(false),
},
},
expectedAnalytics: boolPtr(false),
},
{
name: "usage analytics not specified - uses default",
telemetryConfig: &v1alpha1.TelemetryConfig{
OpenTelemetry: &v1alpha1.OpenTelemetryConfig{
Enabled: true,
Endpoint: "otel-collector:4317",
// UsageAnalyticsEnabled is nil, should use default
},
},
expectedAnalytics: nil, // Should use system default
},
{
name: "no telemetry config",
telemetryConfig: nil,
expectedAnalytics: nil,
},
{
name: "telemetry disabled",
telemetryConfig: &v1alpha1.TelemetryConfig{
OpenTelemetry: &v1alpha1.OpenTelemetryConfig{
Enabled: false,
},
},
expectedAnalytics: nil,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
var options []runner.RunConfigBuilderOption

addTelemetryConfigOptions(&options, tt.telemetryConfig, "test-server")

// Build the config to verify the options work
ctx := context.Background()
imageMetadata := &registry.ImageMetadata{} // Empty metadata for test
envVars := make(map[string]string)
envVarValidator := &runner.DetachedEnvVarValidator{} // Use detached validator for test

config, err := runner.NewRunConfigBuilder(ctx, imageMetadata, envVars, envVarValidator, options...)
require.NoError(t, err)

if tt.expectedAnalytics == nil {
// When not explicitly set, should use default from telemetry config
if tt.telemetryConfig != nil && tt.telemetryConfig.OpenTelemetry != nil && tt.telemetryConfig.OpenTelemetry.Enabled {
// If telemetry is enabled, config should be created with defaults
if config.TelemetryConfig != nil {
// The default value should be used (true)
assert.True(t, config.TelemetryConfig.UsageAnalyticsEnabled, "Should use default value when not explicitly set")
}
}
} else {
// When explicitly set, should match the configured value
require.NotNil(t, config.TelemetryConfig, "TelemetryConfig should be created when telemetry is enabled")
assert.Equal(t, *tt.expectedAnalytics, config.TelemetryConfig.UsageAnalyticsEnabled)
}
})
}
}

func TestMCPServerSpec_UsageAnalyticsField(t *testing.T) {
t.Parallel()
// Test that the UsageAnalyticsEnabled field is properly defined in the CRD
mcpServer := &v1alpha1.MCPServer{
Spec: v1alpha1.MCPServerSpec{
Image: "test-image:latest",
Telemetry: &v1alpha1.TelemetryConfig{
OpenTelemetry: &v1alpha1.OpenTelemetryConfig{
Enabled: true,
Endpoint: "otel-collector:4317",
UsageAnalyticsEnabled: boolPtr(false),
},
},
},
}

// Verify the field can be set and read
assert.NotNil(t, mcpServer.Spec.Telemetry)
assert.NotNil(t, mcpServer.Spec.Telemetry.OpenTelemetry)
assert.NotNil(t, mcpServer.Spec.Telemetry.OpenTelemetry.UsageAnalyticsEnabled)
assert.False(t, *mcpServer.Spec.Telemetry.OpenTelemetry.UsageAnalyticsEnabled)
}
9 changes: 9 additions & 0 deletions cmd/thv-operator/controllers/mcpserver_runconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ func addTelemetryConfigOptions(
var otelHeaders []string
var otelInsecure bool
var otelEnvironmentVariables []string
var usageAnalyticsEnabled *bool

// Process OpenTelemetry configuration
if telemetryConfig.OpenTelemetry != nil && telemetryConfig.OpenTelemetry.Enabled {
Expand Down Expand Up @@ -616,6 +617,9 @@ func addTelemetryConfigOptions(
if otel.Metrics != nil {
otelMetricsEnabled = otel.Metrics.Enabled
}

// Handle usage analytics configuration
usageAnalyticsEnabled = otel.UsageAnalyticsEnabled
}

// Process Prometheus configuration
Expand All @@ -635,6 +639,11 @@ func addTelemetryConfigOptions(
otelInsecure,
otelEnvironmentVariables,
))

// Add usage analytics configuration if explicitly set in CRD
if usageAnalyticsEnabled != nil {
*options = append(*options, runner.WithUsageAnalyticsEnabled(*usageAnalyticsEnabled))
}
}

// addAuthzConfigOptions adds authorization configuration options to the builder options
Expand Down
14 changes: 11 additions & 3 deletions cmd/thv/app/run_flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ func setupTelemetryConfiguration(cmd *cobra.Command, runFlags *RunFlags) *teleme

return createTelemetryConfig(finalOtelEndpoint, finalOtelEnablePrometheusMetricsPath,
runFlags.OtelServiceName, runFlags.OtelTracingEnabled, runFlags.OtelMetricsEnabled, finalOtelSamplingRate,
runFlags.OtelHeaders, finalOtelInsecure, finalOtelEnvironmentVariables)
runFlags.OtelHeaders, finalOtelInsecure, finalOtelEnvironmentVariables, config)
}

// setupRuntimeAndValidation creates container runtime and selects environment variable validator
Expand Down Expand Up @@ -634,7 +634,7 @@ func createOIDCConfig(oidcIssuer, oidcAudience, oidcJwksURL, oidcIntrospectionUR
// createTelemetryConfig creates a telemetry configuration if any telemetry parameters are provided
func createTelemetryConfig(otelEndpoint string, otelEnablePrometheusMetricsPath bool,
otelServiceName string, otelTracingEnabled bool, otelMetricsEnabled bool, otelSamplingRate float64, otelHeaders []string,
otelInsecure bool, otelEnvironmentVariables []string) *telemetry.Config {
otelInsecure bool, otelEnvironmentVariables []string, _ *cfg.Config) *telemetry.Config {
if otelEndpoint == "" && !otelEnablePrometheusMetricsPath {
return nil
}
Expand Down Expand Up @@ -667,16 +667,24 @@ func createTelemetryConfig(otelEndpoint string, otelEnablePrometheusMetricsPath
}
}

defaultConfig := telemetry.DefaultConfig()

// Usage analytics defaults to enabled unless explicitly disabled in config
usageAnalyticsEnabled := defaultConfig.UsageAnalyticsEnabled
analyticsEndpoint := defaultConfig.AnalyticsEndpoint

return &telemetry.Config{
Endpoint: otelEndpoint,
ServiceName: serviceName,
ServiceVersion: telemetry.DefaultConfig().ServiceVersion,
ServiceVersion: defaultConfig.ServiceVersion,
TracingEnabled: otelTracingEnabled,
MetricsEnabled: otelMetricsEnabled,
SamplingRate: otelSamplingRate,
Headers: headers,
Insecure: otelInsecure,
EnablePrometheusMetricsPath: otelEnablePrometheusMetricsPath,
EnvironmentVariables: processedEnvVars,
UsageAnalyticsEnabled: usageAnalyticsEnabled,
AnalyticsEndpoint: analyticsEndpoint,
}
}
31 changes: 31 additions & 0 deletions docs/observability.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ operations through:
debugging
4. **Protocol-aware instrumentation**: MCP-specific insights beyond generic HTTP
metrics
5. **Privacy-first usage analytics**: Anonymous tool call metrics for product
improvement (can be disabled)

See [the original design document](./proposals/otel-integration-proposal.md) for
more details on the design and goals of this observability architecture.
Expand Down Expand Up @@ -84,3 +86,32 @@ The telemetry middleware:

This provides end-to-end visibility across the entire request lifecycle while
maintaining the modular architecture of ToolHive's middleware system.

## Usage Analytics

ToolHive includes privacy-first usage analytics that collect anonymous tool call
metrics for product improvement. This feature uses a dual-endpoint architecture
to ensure user telemetry remains unaffected.

**Key Features:**
- **Anonymous**: Only tool call counts with success/error status
- **Privacy-first**: No server names, tool names, or sensitive data collected
- **Dual-endpoint**: Separate from user's telemetry configuration
- **Opt-out**: Can be disabled via configuration
- **Default enabled**: Helps improve ToolHive for all users

**Architecture:**
```mermaid
graph TD
A[Telemetry Middleware] --> B[User OTLP Endpoint]
A --> C[Analytics Collector]

B --> D[User's Observability Platform]
C --> E[Stacklok Analytics]

A --> F[Anonymous Metrics Only]
F --> C
```

For detailed information on usage analytics, including privacy policy and
configuration options, see [Usage Analytics Documentation](./usage-analytics.md).
141 changes: 141 additions & 0 deletions docs/usage-analytics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Usage Analytics

ToolHive includes privacy-first usage analytics to help improve the product while protecting user privacy. This document explains what data is collected, how it's used, and how to control or disable this feature.

## Overview

ToolHive collects anonymous usage analytics to understand how the MCP servers are being used in aggregate. This helps the Stacklok team prioritize development efforts and identify potential issues.

**Key Privacy Principles:**
- **Anonymous**: No personally identifiable information is collected
- **Minimal**: Only essential metrics are collected
- **Transparent**: This documentation explains exactly what is collected
- **Opt-out**: Usage analytics can be easily disabled
- **Dual-endpoint**: User's own telemetry configuration is preserved and unaffected

## What Data is Collected

Usage analytics collect only the following anonymous metrics:

### Tool Call Counts
- **Metric**: `toolhive_usage_tool_calls_total`
- **Data**: Count of MCP tool calls with success/error status
- **Attributes**: Only `status` (success or error)

### What is NOT Collected
- Server names or identifiers
- Tool names or tool types
- Command arguments or parameters
- File paths or content
- User identifiers or client information
- Request/response payloads
- Environment variables
- Host information
- IP addresses

## How Analytics Work

ToolHive uses a dual-endpoint telemetry architecture:

1. **User Telemetry**: Your configured OTEL endpoint (if any) receives full telemetry with all details for your observability needs
2. **Usage Analytics**: Stacklok's analytics collector receives only anonymous tool call counts

This ensures that enabling usage analytics doesn't interfere with your existing observability setup.

## Configuration

### Default Behavior
- **Usage analytics are enabled by default**
- Anonymous metrics are sent to `https://analytics.toolhive.stacklok.dev/v1/traces`
- Your existing telemetry configuration remains unaffected

### CLI Configuration

#### Disable Usage Analytics
To disable usage analytics, update your configuration file (`~/.toolhive/config.yaml`):

```yaml
otel:
usage-analytics-enabled: false
```

#### Check Current Setting
```bash
# View current configuration
thv config otel get usage-analytics-enabled
```

### Kubernetes Operator Configuration

For the ToolHive operator, you can disable usage analytics per MCPServer:

```yaml
apiVersion: toolhive.stacklok.dev/v1alpha1
kind: MCPServer
metadata:
name: example-server
spec:
image: example/mcp-server:latest
telemetry:
openTelemetry:
enabled: true
endpoint: "otel-collector:4317" # Your own telemetry
usageAnalyticsEnabled: false # Disable analytics for this server
```

## Data Retention and Usage

- **Retention**: Analytics data is retained for up to 2 years for trend analysis
- **Purpose**: Data is used solely for product development and improvement
- **Access**: Only authorized Stacklok personnel have access to aggregated analytics data
- **Sharing**: Analytics data is never shared with third parties

## Privacy Compliance

ToolHive's usage analytics are designed to comply with privacy regulations:
- **GDPR**: No personal data is collected; anonymous usage metrics fall outside GDPR scope
- **CCPA**: No personal information is collected or sold
- **SOC2**: Analytics infrastructure follows Stacklok's security and privacy controls

## Technical Implementation

### Architecture
- Separate OTLP endpoint for analytics (`https://analytics.toolhive.stacklok.dev/v1/traces`)
- Dedicated `AnalyticsMeterProvider` that only records tool call counts
- Middleware filters out all sensitive information before recording analytics metrics

### Security
- HTTPS/TLS encryption for all analytics data transmission
- No authentication headers needed (anonymous metrics)
- Separate from user's telemetry configuration to prevent cross-contamination

## Frequently Asked Questions

### Q: Can I see what analytics data is being sent?
A: Yes, you can enable debug logging to see the minimal metrics being sent. The only metric is `toolhive_usage_tool_calls_total` with a `status` attribute.

### Q: Will this affect my existing telemetry?
A: No. Usage analytics use a completely separate telemetry pipeline. Your existing OTEL configuration for traces, metrics, and logs remains unchanged.

### Q: How do I know if analytics are enabled?
A: Check your configuration with `thv config otel get usage-analytics-enabled` or look for `usage-analytics-enabled: true` in your config file.

### Q: What happens if I disable analytics?
A: When disabled, no usage analytics are collected or sent. Only your regular telemetry (if configured) continues to work.

### Q: Can I use custom analytics endpoints?
A: The analytics endpoint is set by default and not user-configurable. This ensures data goes to Stacklok's analytics infrastructure for product improvement.

## Support

If you have questions about usage analytics or privacy concerns:
- **Documentation**: https://docs.toolhive.dev
- **Issues**: https://github.com/stacklok/toolhive/issues
- **Email**: toolhive-support@stacklok.com

## Changes to This Policy

This documentation may be updated to reflect changes in the usage analytics system. Changes will be:
- Documented in the ToolHive release notes
- Committed to the repository with version control history
- Made available at https://docs.toolhive.dev/usage-analytics
Loading
Loading