From 3c532e0f2208ece0b8524ffe6194b06e0ab3b0e0 Mon Sep 17 00:00:00 2001 From: Keval Bhogayata <90185475+bhogayatakb@users.noreply.github.com> Date: Mon, 6 Oct 2025 20:04:22 +0530 Subject: [PATCH] Added Java Auto-instrumentation script with tomcat support --- .../instrumentation-java-so-tomcat.sh | 2396 +++++++++++++++++ 1 file changed, 2396 insertions(+) create mode 100644 scripts/auto-instrumentation/instrumentation-java-so-tomcat.sh diff --git a/scripts/auto-instrumentation/instrumentation-java-so-tomcat.sh b/scripts/auto-instrumentation/instrumentation-java-so-tomcat.sh new file mode 100644 index 0000000..e1f5952 --- /dev/null +++ b/scripts/auto-instrumentation/instrumentation-java-so-tomcat.sh @@ -0,0 +1,2396 @@ +#!/usr/bin/env bash +# otel-system-wide-java-extended.sh +# Install OpenTelemetry Java agent for host JVMs, provide docker-run wrapper, +# and patch Kubernetes controllers to inject agent into Java containers. +# +# Must be run as root when performing system changes. +# shellcheck disable=SC2317 +set -euo pipefail + +# Defaults (override by exporting before running) +OTEL_DIR="${OTEL_DIR:-/usr/lib/opentelemetry}" +AGENT_NAME="${AGENT_NAME:-opentelemetry-javaagent.jar}" +AGENT_PATH="$OTEL_DIR/$AGENT_NAME" +# AGENT_URL="${AGENT_URL:-https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/latest/download/opentelemetry-javaagent.jar}" +AGENT_URL="${AGENT_URL:-https://github.com/middleware-labs/opentelemetry-java-instrumentation/releases/download/v1.8.1/middleware-javaagent.jar}" +# OpenTelemetry Configuration (can be overridden by environment variables) +# Priority: OTEL_EXPORTER_OTLP_ENDPOINT > MW_TARGET > fallback to localhost:4317 +if [ -n "${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]; then + # Use explicitly set OTEL_EXPORTER_OTLP_ENDPOINT (already set) + : # No-op since variable is already set +elif [ -n "${MW_TARGET:-}" ]; then + # Use MW_TARGET if OTEL_EXPORTER_OTLP_ENDPOINT is not set + OTEL_EXPORTER_OTLP_ENDPOINT="${MW_TARGET}" +else + # Fallback to localhost:4317 + OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4317" +fi + +# Priority: OTEL_EXPORTER_OTLP_HEADERS > MW_API_KEY > fallback to default +if [ -n "${OTEL_EXPORTER_OTLP_HEADERS:-}" ]; then + # Use explicitly set OTEL_EXPORTER_OTLP_HEADERS (already set) + : # No-op since variable is already set +elif [ -n "${MW_API_KEY:-}" ]; then + # Use MW_API_KEY if OTEL_EXPORTER_OTLP_HEADERS is not set + OTEL_EXPORTER_OTLP_HEADERS="authorization=${MW_API_KEY}" +else + # Fallback to default + OTEL_EXPORTER_OTLP_HEADERS="authorization=5xrocjh0p5ir233mvi34dvl5bepnyqri3rqb" +fi +OTEL_SERVICE_NAME="${OTEL_SERVICE_NAME:-}" +OTEL_RESOURCE_ATTRIBUTES="${OTEL_RESOURCE_ATTRIBUTES:-}" +OTEL_TRACES_EXPORTER="${OTEL_TRACES_EXPORTER:-otlp}" +OTEL_METRICS_EXPORTER="${OTEL_METRICS_EXPORTER:-otlp}" +OTEL_LOGS_EXPORTER="${OTEL_LOGS_EXPORTER:-otlp}" + +FORCE="${FORCE:-0}" +DRY_RUN="${DRY_RUN:-0}" # if 1, don't apply changes (for Kubernetes patches) +K8S_NAMESPACE="${K8S_NAMESPACE:-all}" # or single namespace +DOCKER_WRAPPER_PATH="${DOCKER_WRAPPER_PATH:-/usr/local/bin/docker-run-otel}" +AUTO_UPDATE_SERVICES="${AUTO_UPDATE_SERVICES:-1}" # if 1, automatically update existing Java services + +# helper +err() { echo "ERROR: $*" >&2; exit 1; } +info() { echo "INFO: $*"; } +warn() { echo "WARN: $*"; } + +# Function to detect Java services +detect_java_services() { + local services=() + for service in $(systemctl list-units --type=service --state=active --no-pager --no-legend | awk '{print $1}' | grep -E '\.(service)$'); do + # Check if service runs Java + local exec_start + exec_start=$(systemctl show "$service" --property=ExecStart --no-pager 2>/dev/null | cut -d'=' -f2- | tr -d '"') + if echo "$exec_start" | grep -q "java"; then + services+=("$service") + fi + done + echo "${services[@]}" +} + +# Function to detect Java Docker containers +detect_java_containers() { + local containers=() + for container in $(docker ps --format "{{.Names}}" 2>/dev/null); do + # Skip empty container names + if [ -z "$container" ]; then + continue + fi + # Check if container runs Java + local command + command=$(docker inspect "$container" --format '{{.Config.Cmd}}' 2>/dev/null) + if echo "$command" | grep -q "java"; then + containers+=("$container") + fi + done + # Only echo if we have containers + if [ ${#containers[@]} -gt 0 ]; then + echo "${containers[@]}" + fi +} + +# Function to detect Tomcat servers and applications +detect_tomcat_apps() { + local tomcat_apps=() + + # Look for Tomcat processes + local tomcat_pids + tomcat_pids=$(pgrep -f "tomcat\|catalina" 2>/dev/null || echo "") + + if [ -n "$tomcat_pids" ]; then + for pid in $tomcat_pids; do + # Get process details + local cmdline + cmdline=$(cat "/proc/$pid/cmdline" 2>/dev/null | tr '\0' ' ' || echo "") + + if echo "$cmdline" | grep -q "java.*tomcat\|java.*catalina"; then + # Extract Tomcat home directory + local tomcat_home="" + local catalina_home="" + + # Try to get CATALINA_HOME from environment + if [ -f "/proc/$pid/environ" ]; then + catalina_home=$(cat "/proc/$pid/environ" 2>/dev/null | tr '\0' '\n' | grep "^CATALINA_HOME=" | cut -d'=' -f2- || echo "") + fi + + # If CATALINA_HOME not found, try to extract from command line + if [ -z "$catalina_home" ]; then + # Look for -Dcatalina.home= in command line + catalina_home=$(echo "$cmdline" | grep -o '\-Dcatalina\.home=[^[:space:]]*' | cut -d'=' -f2- || echo "") + fi + + # If still not found, try common Tomcat locations + if [ -z "$catalina_home" ]; then + for common_path in "/opt/tomcat" "/usr/local/tomcat" "/var/lib/tomcat*" "/opt/apache-tomcat*"; do + if [ -d "$common_path" ]; then + catalina_home="$common_path" + break + fi + done + fi + + # Get Tomcat base directory (CATALINA_BASE) + local catalina_base="" + if [ -f "/proc/$pid/environ" ]; then + catalina_base=$(cat "/proc/$pid/environ" 2>/dev/null | tr '\0' '\n' | grep "^CATALINA_BASE=" | cut -d'=' -f2- || echo "") + fi + + if [ -z "$catalina_base" ]; then + catalina_base=$(echo "$cmdline" | grep -o '\-Dcatalina\.base=[^[:space:]]*' | cut -d'=' -f2- || echo "") + fi + + # Use CATALINA_HOME as fallback for CATALINA_BASE + if [ -z "$catalina_base" ] && [ -n "$catalina_home" ]; then + catalina_base="$catalina_home" + fi + + # Find deployed applications + local webapps_dir="" + if [ -n "$catalina_base" ] && [ -d "$catalina_base" ]; then + webapps_dir="$catalina_base/webapps" + elif [ -n "$catalina_home" ] && [ -d "$catalina_home" ]; then + webapps_dir="$catalina_home/webapps" + fi + + if [ -n "$webapps_dir" ] && [ -d "$webapps_dir" ]; then + # Find deployed applications (directories and WAR files) + for app in "$webapps_dir"/*; do + if [ -d "$app" ] || [[ "$app" == *.war ]]; then + local app_name + app_name=$(basename "$app") + # Skip default Tomcat apps + if [[ "$app_name" != "ROOT" ]] && [[ "$app_name" != "manager" ]] && [[ "$app_name" != "host-manager" ]] && [[ "$app_name" != "docs" ]] && [[ "$app_name" != "examples" ]]; then + # Create a unique identifier for this Tomcat app + local tomcat_app_id="tomcat-${pid}-${app_name}" + tomcat_apps+=("$tomcat_app_id|$pid|$catalina_home|$catalina_base|$app_name|$app") + fi + fi + done + fi + + # If no specific apps found, add the Tomcat server itself + if [ ${#tomcat_apps[@]} -eq 0 ] || ! printf '%s\n' "${tomcat_apps[@]}" | grep -q "tomcat-${pid}-"; then + local tomcat_app_id="tomcat-${pid}-server" + tomcat_apps+=("$tomcat_app_id|$pid|$catalina_home|$catalina_base|server|") + fi + fi + done + fi + + # Also check for systemd services that might be Tomcat + for service in $(systemctl list-units --type=service --state=active --no-pager --no-legend | awk '{print $1}' | grep -E '\.(service)$'); do + local exec_start + exec_start=$(systemctl show "$service" --property=ExecStart --no-pager 2>/dev/null | cut -d'=' -f2- | tr -d '"') + if echo "$exec_start" | grep -q "tomcat\|catalina"; then + # Get the PID of this service + local service_pid + service_pid=$(systemctl show "$service" --property=MainPID --no-pager 2>/dev/null | cut -d'=' -f2- || echo "") + if [ -n "$service_pid" ] && [ "$service_pid" != "0" ]; then + # Check if we already have this PID + if ! printf '%s\n' "${tomcat_apps[@]}" | grep -q "tomcat-${service_pid}-"; then + local tomcat_app_id="tomcat-${service_pid}-service" + # Extract actual paths from the process + local cmdline + local catalina_home="" + local catalina_base="" + cmdline=$(cat "/proc/$service_pid/cmdline" 2>/dev/null | tr "\0" " " || echo "") + if [ -n "$cmdline" ]; then + catalina_home=$(echo "$cmdline" | grep -o "\-Dcatalina\.home=[^[:space:]]*" | cut -d"=" -f2- || echo "") + catalina_base=$(echo "$cmdline" | grep -o "\-Dcatalina\.base=[^[:space:]]*" | cut -d"=" -f2- || echo "") + if [ -z "$catalina_base" ] && [ -n "$catalina_home" ]; then + catalina_base="$catalina_home" + fi + fi + tomcat_apps+=("$tomcat_app_id|$service_pid|$catalina_home|$catalina_base|service|") + fi + fi + fi + done + + echo "${tomcat_apps[@]}" +} + +# Function to get container configuration for restart +get_container_config() { + local container="$1" + local image + image=$(docker inspect "$container" --format '{{.Config.Image}}' 2>/dev/null) + + # Get ports in a safer way + local ports="" + docker port "$container" 2>/dev/null | while read -r line; do + if [ -n "$line" ]; then + local host_port + local container_port + host_port=$(echo "$line" | awk -F: '{print $2}') + container_port=$(echo "$line" | awk -F: '{print $1}') + ports="$ports -p $host_port:$container_port" + fi + done + + local env_vars + local volumes + local working_dir + local command + env_vars=$(docker inspect "$container" --format '{{range .Config.Env}}{{println .}}{{end}}' 2>/dev/null | grep -v "^$" | sed 's/^/-e /' | tr '\n' ' ') + volumes=$(docker inspect "$container" --format '{{range .Mounts}}{{print .Source ":" .Destination "\n"}}{{end}}' 2>/dev/null | grep -v "^$" | sed 's/^/-v /' | tr '\n' ' ') + working_dir=$(docker inspect "$container" --format '{{.Config.WorkingDir}}' 2>/dev/null) + command=$(docker inspect "$container" --format '{{.Config.Cmd}}' 2>/dev/null | tr -d '[]') + + echo "IMAGE=$image" + echo "PORTS=\"$ports\"" + echo "ENV_VARS=\"$env_vars\"" + echo "VOLUMES=\"$volumes\"" + echo "WORKING_DIR=$working_dir" + echo "COMMAND=\"$command\"" +} + +# Function to update Docker containers with OTEL wrapper +update_docker_containers() { + if [ "$AUTO_UPDATE_SERVICES" != "1" ]; then + info "AUTO_UPDATE_SERVICES is disabled, skipping Docker container updates" + return 0 + fi + + info "Detecting Java Docker containers..." + local java_containers + # Get containers and handle empty results properly + local java_containers_output + java_containers_output=$(detect_java_containers) + if [ -n "$java_containers_output" ]; then + mapfile -t java_containers <<< "$java_containers_output" + else + java_containers=() + fi + + if [ ${#java_containers[@]} -eq 0 ]; then + info "No running Java containers detected" + return 0 + fi + + info "Found Java containers: ${java_containers[*]}" + + # Check if docker-run-otel wrapper exists, install if missing + if [ ! -f "$DOCKER_WRAPPER_PATH" ]; then + info "Docker wrapper not found at $DOCKER_WRAPPER_PATH, installing it..." + install_agent + install_docker_wrapper + fi + + for container in "${java_containers[@]}"; do + info "Processing container: $container" + update_docker_container "$container" + done +} + +# Function to update a specific Docker container +update_docker_container() { + local container="$1" + + info "Using endpoint for Docker container: ${OTEL_EXPORTER_OTLP_ENDPOINT}" + info "Using headers for Docker container: ${OTEL_EXPORTER_OTLP_HEADERS}" + info "Debug: MW_TARGET=${MW_TARGET:-not set}" + info "Debug: OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + + # Check if container already has complete OTEL configuration + local has_endpoint + local has_service_name + local has_java_tool_options + has_endpoint=$(docker exec "$container" env 2>/dev/null | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT" && echo "yes" || echo "no") + has_service_name=$(docker exec "$container" env 2>/dev/null | grep -q "OTEL_SERVICE_NAME" && echo "yes" || echo "no") + has_java_tool_options=$(docker exec "$container" env 2>/dev/null | grep -q "JAVA_TOOL_OPTIONS" && echo "yes" || echo "no") + + if [ "$has_endpoint" = "yes" ] && [ "$has_service_name" = "yes" ] && [ "$has_java_tool_options" = "yes" ]; then + info "Container $container already has complete OTEL configuration" + return 0 + elif [ "$has_endpoint" = "yes" ] && [ "$has_java_tool_options" = "no" ]; then + info "Container $container missing JAVA_TOOL_OPTIONS, updating configuration" + elif [ "$has_endpoint" = "yes" ] && [ "$has_service_name" = "no" ]; then + info "Container $container has partial OTEL configuration, updating service name" + else + info "Container $container needs OTEL configuration" + fi + + info "Updating container: $container" + + # Get basic container info + local image + image=$(docker inspect "$container" --format '{{.Config.Image}}' 2>/dev/null) + + # Get port mapping in a simpler way + local ports="" + local port_line + port_line=$(docker port "$container" 2>/dev/null | head -1) + if [ -n "$port_line" ]; then + # Parse format like "9090/tcp -> 0.0.0.0:8082" + local host_port + local container_port + host_port=$(echo "$port_line" | awk -F: '{print $2}') + container_port=$(echo "$port_line" | awk '{print $1}' | awk -F/ '{print $1}') + ports="-p $host_port:$container_port" + fi + + # Create new container name + local new_name="${container}-otel" + + # Use container name as service name (remove any suffixes like -otel) + local service_name="${container%-otel}" + + # Stop the old container + info "Stopping container: $container" + docker stop "$container" >/dev/null 2>&1 + + # Build docker-run-otel command with basic configuration + local docker_cmd="docker-run-otel" + + # Add ports if available + if [ -n "$ports" ]; then + docker_cmd="$docker_cmd $ports" + fi + + # Add service name environment variable + docker_cmd="$docker_cmd -e OTEL_SERVICE_NAME=$service_name" + + # Add OpenTelemetry environment variables + docker_cmd="$docker_cmd -e OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + docker_cmd="$docker_cmd -e OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + docker_cmd="$docker_cmd -e OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER:-otlp}" + docker_cmd="$docker_cmd -e OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-otlp}" + docker_cmd="$docker_cmd -e OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER:-otlp}" + + # Add image and name + docker_cmd="$docker_cmd --name $new_name $image" + + info "Starting container with OTEL instrumentation: $new_name" + info "Service name will be: $service_name" + info "Command: $docker_cmd" + info "Debug: Environment variables being passed to docker-run-otel:" + info " OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + info " OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + + # Execute the command in background to ensure detached mode + eval "$docker_cmd" & + + # Wait a moment for container to start + sleep 3 + + # Check if container is running + if docker ps --format "{{.Names}}" | grep -q "^${new_name}$"; then + info "✓ Successfully restarted container with OTEL instrumentation: $new_name" + info "✓ Service name set to: $service_name" + # Remove old container + docker rm "$container" >/dev/null 2>&1 + else + warn "Failed to restart container with OTEL instrumentation" + # Restart original container + docker start "$container" >/dev/null 2>&1 + fi +} + +# Function to update existing Java services +update_java_services() { + if [ "$AUTO_UPDATE_SERVICES" != "1" ]; then + info "AUTO_UPDATE_SERVICES is disabled, skipping service updates" + return 0 + fi + + info "Detecting existing Java services..." + local java_services + mapfile -t java_services < <(detect_java_services) + + if [ ${#java_services[@]} -eq 0 ]; then + info "No running Java services detected" + return 0 + fi + + info "Found Java services: ${java_services[*]}" + + for service in "${java_services[@]}"; do + info "Updating service: $service" + if update_service_config "$service"; then + info "✓ Successfully updated service: $service" + else + warn "✗ Failed to update service: $service" + fi + done +} + +# Function to update existing Tomcat applications +update_tomcat_apps() { + if [ "$AUTO_UPDATE_SERVICES" != "1" ]; then + info "AUTO_UPDATE_SERVICES is disabled, skipping Tomcat app updates" + return 0 + fi + + info "Detecting existing Tomcat applications..." + local tomcat_apps + mapfile -t tomcat_apps < <(detect_tomcat_apps) + + if [ ${#tomcat_apps[@]} -eq 0 ]; then + info "No running Tomcat applications detected" + return 0 + fi + + info "Found Tomcat applications: ${#tomcat_apps[@]}" + + for app_info in "${tomcat_apps[@]}"; do + IFS='|' read -r app_id pid catalina_home catalina_base app_name app_path <<< "$app_info" + info "Processing Tomcat app: $app_name (PID: $pid)" + if update_tomcat_app "$app_id" "$pid" "$catalina_home" "$catalina_base" "$app_name" "$app_path"; then + info "✓ Successfully updated Tomcat app: $app_name" + else + warn "✗ Failed to update Tomcat app: $app_name" + fi + done +} + +# Function to update a specific Tomcat application +update_tomcat_app() { + local app_id="$1" + local pid="$2" + local catalina_home="$3" + local catalina_base="$4" + local app_name="$5" + local app_path="$6" + + info "Updating Tomcat application: $app_name (PID: $pid)" + + info "Using endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT}" + info "Using headers: ${OTEL_EXPORTER_OTLP_HEADERS}" + + # Check if this Tomcat instance is already instrumented + local env_output + env_output=$(cat "/proc/$pid/environ" 2>/dev/null | tr '\0' '\n' | grep -E "OTEL_EXPORTER_OTLP_ENDPOINT|JAVA_TOOL_OPTIONS" || echo "") + + if echo "$env_output" | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT"; then + info "Tomcat application $app_name is already instrumented" + return 0 + fi + + # Create backup of Tomcat configuration + local backup_dir="/tmp/tomcat-otel-backup-$(date +%Y%m%d_%H%M%S)" + mkdir -p "$backup_dir" + + # Find and backup setenv.sh or catalina.sh + local setenv_file="" + local catalina_script="" + + if [ -n "$catalina_base" ] && [ -d "$catalina_base" ]; then + setenv_file="$catalina_base/bin/setenv.sh" + catalina_script="$catalina_base/bin/catalina.sh" + elif [ -n "$catalina_home" ] && [ -d "$catalina_home" ]; then + setenv_file="$catalina_home/bin/setenv.sh" + catalina_script="$catalina_home/bin/catalina.sh" + fi + + # Create or update setenv.sh with OTEL configuration + if [ -n "$setenv_file" ]; then + # Backup existing setenv.sh if it exists + if [ -f "$setenv_file" ]; then + cp "$setenv_file" "$backup_dir/setenv.sh.backup" + info "Backed up existing setenv.sh to $backup_dir" + fi + + # Create or update setenv.sh with OTEL configuration + cat > "$setenv_file" </dev/null | cut -d'=' -f2- || echo "") + if [ "$service_pid" = "$pid" ]; then + service_name="$service" + break + fi + done + + if [ -n "$service_name" ]; then + info "Found systemd service for Tomcat: $service_name" + update_service_config "$service_name" + else + # For standalone Tomcat, we need to restart it to pick up the new setenv.sh + info "Tomcat is not managed by systemd, manual restart required" + info "Please restart Tomcat to apply OpenTelemetry instrumentation:" + info " - Stop: kill $pid" + info " - Start: $catalina_script start" + warn "Manual restart required for Tomcat application: $app_name" + warn "Note: The script will continue but Tomcat instrumentation will not be active until manual restart" + fi +} + +# Function to instrument a specific Tomcat application +instrument_tomcat_app() { + local app_id="$1" + + if [ -z "$app_id" ]; then + err "Tomcat application ID is required" + return 1 + fi + + info "Adding OTEL instrumentation to Tomcat application: $app_id" + + # Parse the app_id to get components + IFS='-' read -r prefix pid suffix <<< "$app_id" + if [ "$prefix" != "tomcat" ]; then + err "Invalid Tomcat application ID format: $app_id" + return 1 + fi + + # Find the Tomcat app details + local tomcat_apps + mapfile -t tomcat_apps < <(detect_tomcat_apps) + + local app_info="" + for app in "${tomcat_apps[@]}"; do + if [[ "$app" == "$app_id|"* ]]; then + app_info="$app" + break + fi + done + + if [ -z "$app_info" ]; then + err "Tomcat application $app_id not found or not running" + return 1 + fi + + IFS='|' read -r found_app_id found_pid catalina_home catalina_base app_name app_path <<< "$app_info" + + # Check if already instrumented + local env_output + env_output=$(cat "/proc/$found_pid/environ" 2>/dev/null | tr '\0' '\n' | grep -E "OTEL_EXPORTER_OTLP_ENDPOINT" || echo "") + + if echo "$env_output" | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT"; then + info "Tomcat application $app_name is already instrumented" + return 0 + fi + + # Update the Tomcat application + update_tomcat_app "$found_app_id" "$found_pid" "$catalina_home" "$catalina_base" "$app_name" "$app_path" + + if [ $? -eq 0 ]; then + info "✓ Successfully added OTEL instrumentation to Tomcat application: $app_name" + else + err "Failed to add OTEL instrumentation to Tomcat application: $app_name" + return 1 + fi +} + +# Function to update a specific service configuration +update_service_config() { + local service="$1" + local service_file="/etc/systemd/system/${service}" + + if [ ! -f "$service_file" ]; then + warn "Service file not found: $service_file" + return 1 + fi + + info "Using endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT}" + info "Using headers: ${OTEL_EXPORTER_OTLP_HEADERS}" + + # Check if service already has OTEL configuration + if grep -q "OTEL_EXPORTER_OTLP_ENDPOINT" "$service_file"; then + info "Service $service already has OTEL configuration" + return 0 + fi + + # Create backup + cp "$service_file" "${service_file}.backup.$(date +%Y%m%d_%H%M%S)" + info "Created backup: ${service_file}.backup.$(date +%Y%m%d_%H%M%S)" + + # Add OTEL environment variables to service file in the [Service] section + local temp_file + temp_file=$(mktemp) + + # Process the service file line by line + local in_service_section=false + local env_added=false + + while IFS= read -r line; do + # Check if we're entering the [Service] section + if [[ "$line" == "[Service]" ]]; then + in_service_section=true + echo "$line" >> "$temp_file" + continue + fi + + # Check if we're leaving the [Service] section + if [[ "$line" == "["*"]" ]] && [[ "$line" != "[Service]" ]]; then + # If we're leaving the [Service] section and haven't added env vars yet, add them now + if [ "$in_service_section" = true ] && [ "$env_added" = false ]; then + { + echo "" + echo "# OpenTelemetry instrumentation (auto-added)" + echo "Environment=JAVA_TOOL_OPTIONS=-javaagent:${AGENT_PATH}" + echo "Environment=OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + echo "Environment=OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + echo "Environment=OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER}" + echo "Environment=OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER}" + echo "Environment=OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER}" + } >> "$temp_file" + if [ -n "${OTEL_SERVICE_NAME:-}" ]; then + echo "Environment=OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME}" >> "$temp_file" + fi + if [ -n "${OTEL_RESOURCE_ATTRIBUTES:-}" ]; then + echo "Environment=OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES}" >> "$temp_file" + fi + env_added=true + fi + in_service_section=false + fi + + # If we're in the [Service] section and this is the last line before [Install], add env vars + if [ "$in_service_section" = true ] && [ "$env_added" = false ]; then + # Check if this is the last line of the [Service] section (empty line or next section) + if [[ -z "$line" ]] || [[ "$line" == "["*"]" ]]; then + { + echo "" + echo "# OpenTelemetry instrumentation (auto-added)" + echo "Environment=JAVA_TOOL_OPTIONS=-javaagent:${AGENT_PATH}" + echo "Environment=OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + echo "Environment=OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + echo "Environment=OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER}" + echo "Environment=OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER}" + echo "Environment=OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER}" + } >> "$temp_file" + if [ -n "${OTEL_SERVICE_NAME:-}" ]; then + echo "Environment=OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME}" >> "$temp_file" + fi + if [ -n "${OTEL_RESOURCE_ATTRIBUTES:-}" ]; then + echo "Environment=OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES}" >> "$temp_file" + fi + env_added=true + fi + fi + + echo "$line" >> "$temp_file" + done < "$service_file" + + # If we're still in the [Service] section at the end of the file, add env vars + if [ "$in_service_section" = true ] && [ "$env_added" = false ]; then + { + echo "" + echo "# OpenTelemetry instrumentation (auto-added)" + echo "Environment=JAVA_TOOL_OPTIONS=-javaagent:${AGENT_PATH}" + echo "Environment=OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + echo "Environment=OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + echo "Environment=OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER}" + echo "Environment=OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER}" + echo "Environment=OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER}" + } >> "$temp_file" + if [ -n "${OTEL_SERVICE_NAME:-}" ]; then + echo "Environment=OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME}" >> "$temp_file" + fi + if [ -n "${OTEL_RESOURCE_ATTRIBUTES:-}" ]; then + echo "Environment=OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES}" >> "$temp_file" + fi + fi + + # Replace the original file with the new one + mv "$temp_file" "$service_file" + + info "Updated service configuration: $service" + + # Reload systemd and restart service + systemctl daemon-reload + if systemctl is-active --quiet "$service"; then + info "Restarting service: $service (this may take a moment)..." + # Add timeout to prevent hanging and redirect output to avoid terminal interference + if timeout 30 systemctl restart "$service" >/dev/null 2>&1; then + info "✓ Service $service restarted successfully" + else + warn "Service $service restart timed out or failed" + # Check if service is still running + if systemctl is-active --quiet "$service"; then + info "Service $service is still running despite timeout" + else + warn "Service $service may have failed to start" + fi + fi + fi +} + +# Function to instrument a specific service +instrument_service() { + local service_name="$1" + + if [ -z "$service_name" ]; then + err "Service name is required" + return 1 + fi + + info "Adding OTEL instrumentation to service: $service_name" + info "Using endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT}" + info "Using headers: ${OTEL_EXPORTER_OTLP_HEADERS}" + + # Check if service exists and is running + if ! systemctl is-active --quiet "$service_name" 2>/dev/null; then + err "Service $service_name is not running or does not exist" + return 1 + fi + + # Check if service is already instrumented + local env_output + env_output=$(systemctl show "$service_name" --property=Environment --no-pager 2>/dev/null) + if echo "$env_output" | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT"; then + info "Service $service_name is already instrumented" + return 0 + fi + + # Create backup of service file + local service_file="/etc/systemd/system/$service_name" + if [ -f "$service_file" ]; then + cp "$service_file" "${service_file}.backup.$(date +%Y%m%d_%H%M%S)" + info "Created backup: ${service_file}.backup.$(date +%Y%m%d_%H%M%S)" + fi + + # Add OTEL environment variables to service file + if [ -f "$service_file" ]; then + # Create a temporary file to build the new service file + local temp_file + temp_file=$(mktemp) + + # Process the service file line by line + local in_service_section=false + local env_added=false + + while IFS= read -r line; do + # Check if we're entering the [Service] section + if [[ "$line" == "[Service]" ]]; then + in_service_section=true + echo "$line" >> "$temp_file" + continue + fi + + # Check if we're leaving the [Service] section + if [[ "$line" == "["*"]" ]] && [[ "$line" != "[Service]" ]]; then + # If we're leaving the [Service] section and haven't added env vars yet, add them now + if [ "$in_service_section" = true ] && [ "$env_added" = false ]; then + { + echo "Environment=JAVA_TOOL_OPTIONS=-javaagent:${AGENT_PATH}" + echo "Environment=OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + echo "Environment=OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + echo "Environment=OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER}" + echo "Environment=OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER}" + echo "Environment=OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER}" + } >> "$temp_file" + if [ -n "${OTEL_SERVICE_NAME:-}" ]; then + echo "Environment=OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME}" >> "$temp_file" + fi + if [ -n "${OTEL_RESOURCE_ATTRIBUTES:-}" ]; then + echo "Environment=OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES}" >> "$temp_file" + fi + env_added=true + fi + in_service_section=false + fi + + # If we're in the [Service] section and this is the last line before [Install], add env vars + if [ "$in_service_section" = true ] && [ "$env_added" = false ]; then + # Check if this is the last line of the [Service] section (empty line or next section) + if [[ -z "$line" ]] || [[ "$line" == "["*"]" ]]; then + { + echo "Environment=JAVA_TOOL_OPTIONS=-javaagent:${AGENT_PATH}" + echo "Environment=OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + echo "Environment=OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + echo "Environment=OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER}" + echo "Environment=OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER}" + echo "Environment=OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER}" + } >> "$temp_file" + if [ -n "${OTEL_SERVICE_NAME:-}" ]; then + echo "Environment=OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME}" >> "$temp_file" + fi + if [ -n "${OTEL_RESOURCE_ATTRIBUTES:-}" ]; then + echo "Environment=OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES}" >> "$temp_file" + fi + env_added=true + fi + fi + + echo "$line" >> "$temp_file" + done < "$service_file" + + # If we're still in the [Service] section at the end of the file, add env vars + if [ "$in_service_section" = true ] && [ "$env_added" = false ]; then + { + echo "Environment=JAVA_TOOL_OPTIONS=-javaagent:${AGENT_PATH}" + echo "Environment=OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + echo "Environment=OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + echo "Environment=OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER}" + echo "Environment=OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER}" + echo "Environment=OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER}" + } >> "$temp_file" + if [ -n "${OTEL_SERVICE_NAME:-}" ]; then + echo "Environment=OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME}" >> "$temp_file" + fi + if [ -n "${OTEL_RESOURCE_ATTRIBUTES:-}" ]; then + echo "Environment=OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES}" >> "$temp_file" + fi + fi + + # Replace the original file with the new one + mv "$temp_file" "$service_file" + info "Added OTEL environment variables to $service_file" + fi + + # Reload systemd and restart service + systemctl daemon-reload + # Add timeout to prevent hanging and redirect output to avoid terminal interference + if timeout 30 systemctl restart "$service_name" >/dev/null 2>&1; then + info "✓ Service $service_name restarted successfully" + else + warn "Service $service_name restart timed out or failed" + # Check if service is still running + if systemctl is-active --quiet "$service_name"; then + info "Service $service_name is still running despite timeout" + else + warn "Service $service_name may have failed to start" + fi + fi + + if systemctl is-active --quiet "$service_name"; then + info "✓ Successfully added OTEL instrumentation to $service_name" + else + err "Failed to restart service $service_name after adding instrumentation" + return 1 + fi +} + +# Function to instrument a specific container +instrument_container() { + local container_name="$1" + + if [ -z "$container_name" ]; then + err "Container name is required" + return 1 + fi + + info "Adding OTEL instrumentation to container: $container_name" + info "Using endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT}" + info "Using headers: ${OTEL_EXPORTER_OTLP_HEADERS}" + + # Check if docker-run-otel wrapper exists, install if missing + if [ ! -f "$DOCKER_WRAPPER_PATH" ]; then + info "Docker wrapper not found at $DOCKER_WRAPPER_PATH, installing it..." + install_agent + install_docker_wrapper + fi + + # Check if container exists and is running + if ! docker ps --format "{{.Names}}" | grep -q "^${container_name}$"; then + err "Container $container_name is not running or does not exist" + return 1 + fi + + # Check if container is already instrumented + local has_endpoint + has_endpoint=$(docker exec "$container_name" env 2>/dev/null | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT" && echo "yes" || echo "no") + if [ "$has_endpoint" = "yes" ]; then + info "Container $container_name is already instrumented" + return 0 + fi + + # Get container configuration with better error handling + info "Getting container image..." + local image + if ! image=$(timeout 15 docker inspect "$container_name" --format '{{.Config.Image}}' 2>&1); then + err "Failed to get image for container $container_name (timeout or error)" + return 1 + fi + if [ -z "$image" ]; then + err "Failed to get image for container $container_name (empty result)" + return 1 + fi + info "Container image: $image" + + # Get port mapping with better error handling + info "Getting port mapping for container: $container_name" + local ports="" + local port_output + if port_output=$(timeout 15 docker port "$container_name" 2>&1); then + info "Port output: $port_output" + if [ -n "$port_output" ]; then + # Parse all port mappings, not just the first one + while IFS= read -r port_line; do + if [ -n "$port_line" ]; then + local host_port + local container_port + # Parse format like "9090/tcp -> 0.0.0.0:8082" + if [[ "$port_line" == *"->"* ]]; then + host_port=$(echo "$port_line" | awk -F: '{print $2}') + container_port=$(echo "$port_line" | awk '{print $1}' | awk -F/ '{print $1}') + if [ -n "$host_port" ] && [ -n "$container_port" ]; then + ports="$ports -p $host_port:$container_port" + info "Found port mapping: $host_port:$container_port" + fi + fi + fi + done <<< "$port_output" + fi + else + warn "Failed to get port mapping for container $container_name (timeout or error)" + fi + info "Final ports: $ports" + + # Get environment variables (excluding OTEL ones) with better error handling + info "Getting environment variables for container: $container_name" + local env_vars="" + local env_output + if env_output=$(timeout 15 docker inspect "$container_name" --format '{{range .Config.Env}}{{println .}}{{end}}' 2>&1); then + info "Environment output length: ${#env_output}" + if [ -n "$env_output" ]; then + env_vars=$(echo "$env_output" | grep -v "^$" | grep -v "OTEL_" | grep -v "JAVA_TOOL_OPTIONS" | while read -r env_var; do + if [ -n "$env_var" ]; then + echo "-e $env_var" + fi + done | tr '\n' ' ') + fi + else + warn "Failed to get environment variables for container $container_name (timeout or error)" + fi + info "Final env_vars: $env_vars" + + # Get volumes (excluding OTEL agent volume) with better error handling + info "Getting volume information for container: $container_name" + local volumes="" + local volume_output + if volume_output=$(timeout 15 docker inspect "$container_name" --format '{{range .Mounts}}{{print .Source ":" .Destination "\n"}}{{end}}' 2>&1); then + info "Volume output length: ${#volume_output}" + info "Volume output: $volume_output" + if [ -n "$volume_output" ]; then + # Process volumes line by line to avoid hanging + while IFS= read -r volume_line; do + if [ -n "$volume_line" ] && [[ "$volume_line" != *":/otel"* ]]; then + volumes="$volumes -v $volume_line" + info "Added volume: $volume_line" + fi + done <<< "$volume_output" + fi + else + warn "Failed to get volume information for container $container_name (timeout or error)" + fi + info "Final volumes: $volumes" + + # Get working directory + info "Getting working directory for container: $container_name" + local working_dir + if ! working_dir=$(timeout 15 docker inspect "$container_name" --format '{{.Config.WorkingDir}}' 2>&1); then + warn "Failed to get working directory for container $container_name (timeout or error)" + working_dir="" + fi + info "Working directory: $working_dir" + + # Get command + info "Getting command for container: $container_name" + local command + if ! command=$(timeout 15 docker inspect "$container_name" --format '{{.Config.Cmd}}' 2>&1 | tr -d '[]' | tr ',' ' '); then + warn "Failed to get command for container $container_name (timeout or error)" + command="" + fi + info "Command: $command" + + # Create new container name with -otel suffix + local new_name="${container_name}-otel" + + # Check if target container already exists and remove it + if docker ps -a --format "{{.Names}}" | grep -q "^${new_name}$"; then + info "Removing existing container: $new_name" + if ! docker rm "$new_name" >/dev/null 2>&1; then + warn "Failed to remove existing container $new_name, continuing anyway" + fi + fi + + # Stop the old container + info "Stopping container: $container_name" + if ! docker stop "$container_name" >/dev/null 2>&1; then + err "Failed to stop container $container_name" + return 1 + fi + + # Build docker-run-otel command with OTEL instrumentation + # Ensure detached mode is always used + local docker_cmd="docker-run-otel -d" + + # Add ports if available + if [ -n "$ports" ]; then + docker_cmd="$docker_cmd $ports" + fi + + # Add environment variables (excluding OTEL ones, they'll be added by docker-run-otel) + if [ -n "$env_vars" ]; then + docker_cmd="$docker_cmd $env_vars" + fi + + # Add volumes if available + if [ -n "$volumes" ]; then + docker_cmd="$docker_cmd $volumes" + fi + + # Add working directory if available + if [ -n "$working_dir" ] && [ "$working_dir" != "/" ]; then + docker_cmd="$docker_cmd -w $working_dir" + fi + + # Add service name environment variable + docker_cmd="$docker_cmd -e OTEL_SERVICE_NAME=$container_name" + + # Add OpenTelemetry environment variables + docker_cmd="$docker_cmd -e OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + docker_cmd="$docker_cmd -e OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + docker_cmd="$docker_cmd -e OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER:-otlp}" + docker_cmd="$docker_cmd -e OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-otlp}" + docker_cmd="$docker_cmd -e OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER:-otlp}" + + # Add JVM options to fix cgroup-related issues with OpenTelemetry agent + docker_cmd="$docker_cmd -e OTEL_JAVAAGENT_ENABLE_RUNTIME_METRICS=false" + docker_cmd="$docker_cmd -e OTEL_JAVAAGENT_ENABLE_EXPERIMENTAL_RUNTIME_METRICS=false" + + # Add additional JVM options to prevent cgroup-related failures + docker_cmd="$docker_cmd -e JAVA_TOOL_OPTIONS=\"-javaagent:/otel/opentelemetry-javaagent.jar -Dotel.javaagent.enable.runtime.metrics=false -Dotel.javaagent.enable.experimental.runtime.metrics=false -XX:+DisableAttachMechanism\"" + + # Add name and image + docker_cmd="$docker_cmd --name $new_name $image" + + # Add command if available + if [ -n "$command" ]; then + docker_cmd="$docker_cmd $command" + fi + + info "Starting container with OTEL instrumentation: $new_name" + info "Service name will be: $container_name" + info "Container will start in detached mode" + info "Command: $docker_cmd" + + # Execute the command and capture output with timeout + local run_output + info "Executing docker command with timeout..." + if ! run_output=$(timeout 30 bash -c "eval '$docker_cmd'" 2>&1); then + local exit_code=$? + if [ $exit_code -eq 124 ]; then + err "Docker command timed out after 30 seconds" + else + # Check if the container was actually created despite the error + if docker ps -a --format "{{.Names}}" | grep -q "^${new_name}$"; then + info "Container was created despite error, checking status..." + # Container exists, check if it's running + if docker ps --format "{{.Names}}" | grep -q "^${new_name}$"; then + info "Container is running, continuing with verification..." + else + err "Container was created but is not running (exit code: $exit_code): $run_output" + # Try to restart original container + info "Attempting to restart original container: $container_name" + if docker start "$container_name" >/dev/null 2>&1; then + info "✓ Restarted original container: $container_name" + else + err "Failed to restart original container: $container_name" + fi + return 1 + fi + else + err "Failed to start new container (exit code: $exit_code): $run_output" + # Try to restart original container + info "Attempting to restart original container: $container_name" + if docker start "$container_name" >/dev/null 2>&1; then + info "✓ Restarted original container: $container_name" + else + err "Failed to restart original container: $container_name" + fi + return 1 + fi + fi + else + info "Docker command executed successfully: $run_output" + fi + + # Wait a moment for container to start + sleep 5 + + # Check if container is running + if docker ps --format "{{.Names}}" | grep -q "^${new_name}$"; then + info "✓ Successfully restarted container with OTEL instrumentation: $new_name" + info "✓ Service name set to: $container_name" + # Check container logs for any issues + info "Checking container logs for any issues..." + local log_output + log_output=$(docker logs "$new_name" 2>&1 | tail -10) + if echo "$log_output" | grep -q "OpenTelemetry Javaagent failed to start"; then + warn "OpenTelemetry agent had initialization issues, but container is running" + info "This is often due to cgroup configuration issues but does not affect application functionality" + fi + if echo "$log_output" | grep -q "Started.*Application"; then + info "✓ Application started successfully" + fi + # Remove old container + if docker rm "$container_name" >/dev/null 2>&1; then + info "✓ Removed old container: $container_name" + else + warn "Failed to remove old container: $container_name" + fi + else + err "Failed to restart container with OTEL instrumentation" + # Check what happened to the container + if docker ps -a --format "{{.Names}}" | grep -q "^${new_name}$"; then + info "Container exists but not running. Checking logs..." + docker logs "$new_name" 2>&1 | tail -20 + fi + # Try to restart original container + info "Attempting to restart original container: $container_name" + if docker start "$container_name" >/dev/null 2>&1; then + info "✓ Restarted original container: $container_name" + else + err "Failed to restart original container: $container_name" + fi + return 1 + fi +} + +# Function to list all instrumented Java apps +list_instrumented_apps() { + info "Listing all instrumented Java applications..." + + local instrumented_count=0 + + # Check systemd services + info "=== Systemd Services ===" + local java_services + mapfile -t java_services < <(detect_java_services) + + for service in "${java_services[@]}"; do + local env_output + env_output=$(systemctl show "$service" --property=Environment --no-pager 2>/dev/null) + if echo "$env_output" | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT"; then + info "✓ Service: $service (instrumented)" + instrumented_count=$((instrumented_count + 1)) + else + info "✗ Service: $service (not instrumented)" + fi + done + + # Check Docker containers + info "=== Docker Containers ===" + local java_containers + # Get containers and handle empty results properly + local java_containers_output + java_containers_output=$(detect_java_containers) + if [ -n "$java_containers_output" ]; then + mapfile -t java_containers <<< "$java_containers_output" + else + java_containers=() + fi + + for container in "${java_containers[@]}"; do + local has_endpoint + has_endpoint=$(docker exec "$container" env 2>/dev/null | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT" && echo "yes" || echo "no") + if [ "$has_endpoint" = "yes" ]; then + info "✓ Container: $container (instrumented)" + instrumented_count=$((instrumented_count + 1)) + else + info "✗ Container: $container (not instrumented)" + fi + done + + # Check Tomcat applications + info "=== Tomcat Applications ===" + local tomcat_apps + mapfile -t tomcat_apps < <(detect_tomcat_apps) + + for app_info in "${tomcat_apps[@]}"; do + IFS='|' read -r app_id pid catalina_home catalina_base app_name app_path <<< "$app_info" + local env_output + env_output=$(cat "/proc/$pid/environ" 2>/dev/null | tr '\0' '\n' | grep -E "OTEL_EXPORTER_OTLP_ENDPOINT" || echo "") + if [ -n "$env_output" ]; then + info "✓ Tomcat App: $app_name (PID: $pid) (instrumented) - App ID: $app_id" + instrumented_count=$((instrumented_count + 1)) + else + info "✗ Tomcat App: $app_name (PID: $pid) (not instrumented) - App ID: $app_id" + fi + done + + info "Total instrumented Java applications: $instrumented_count" +} + +# Function to remove OTEL instrumentation from a specific service +uninstrument_service() { + local service_name="$1" + + if [ -z "$service_name" ]; then + err "Service name is required" + return 1 + fi + + info "Removing OTEL instrumentation from service: $service_name" + + # Check if service exists and is running + if ! systemctl is-active --quiet "$service_name" 2>/dev/null; then + err "Service $service_name is not running or does not exist" + return 1 + fi + + # Create backup of service file + local service_file="/etc/systemd/system/$service_name" + if [ -f "$service_file" ]; then + cp "$service_file" "${service_file}.backup.$(date +%Y%m%d_%H%M%S)" + info "Created backup: ${service_file}.backup.$(date +%Y%m%d_%H%M%S)" + fi + + # Remove OTEL environment variables from service file + if [ -f "$service_file" ]; then + sed -i '/Environment=JAVA_TOOL_OPTIONS.*opentelemetry/d' "$service_file" + sed -i '/Environment=OTEL_/d' "$service_file" + info "Removed OTEL environment variables from $service_file" + fi + + # Reload systemd and restart service + systemctl daemon-reload + systemctl restart "$service_name" + + if systemctl is-active --quiet "$service_name"; then + info "✓ Successfully removed OTEL instrumentation from $service_name" + else + err "Failed to restart service $service_name after removing instrumentation" + return 1 + fi +} + +# Function to remove OTEL instrumentation from a specific container +uninstrument_container() { + local container_name="$1" + + if [ -z "$container_name" ]; then + err "Container name is required" + return 1 + fi + + info "Removing OTEL instrumentation from container: $container_name" + + # Check if container exists and is running + if ! docker ps --format "{{.Names}}" | grep -q "^${container_name}$"; then + err "Container $container_name is not running or does not exist" + return 1 + fi + + # Get container configuration with better error handling + local image + if ! image=$(timeout 15 docker inspect "$container_name" --format '{{.Config.Image}}' 2>/dev/null); then + err "Failed to get image for container $container_name (timeout or error)" + return 1 + fi + if [ -z "$image" ]; then + err "Failed to get image for container $container_name (empty result)" + return 1 + fi + + info "Container image: $image" + + # Get port mapping with better error handling + info "Getting port mapping for container: $container_name" + local ports="" + local port_output + if port_output=$(timeout 15 docker port "$container_name" 2>&1); then + info "Port output: $port_output" + if [ -n "$port_output" ]; then + # Parse all port mappings, not just the first one + while IFS= read -r port_line; do + if [ -n "$port_line" ]; then + local host_port + local container_port + # Parse format like "9090/tcp -> 0.0.0.0:8082" + if [[ "$port_line" == *"->"* ]]; then + host_port=$(echo "$port_line" | awk -F: '{print $2}') + container_port=$(echo "$port_line" | awk '{print $1}' | awk -F/ '{print $1}') + if [ -n "$host_port" ] && [ -n "$container_port" ]; then + ports="$ports -p $host_port:$container_port" + info "Found port mapping: $host_port:$container_port" + fi + fi + fi + done <<< "$port_output" + fi + else + warn "Failed to get port mapping for container $container_name (timeout or error)" + fi + info "Final ports: $ports" + + # Get environment variables (excluding OTEL ones) with better error handling + info "Getting environment variables for container: $container_name" + local env_vars="" + local env_output + if env_output=$(timeout 15 docker inspect "$container_name" --format '{{range .Config.Env}}{{println .}}{{end}}' 2>&1); then + info "Environment output length: ${#env_output}" + if [ -n "$env_output" ]; then + env_vars=$(echo "$env_output" | grep -v "^$" | grep -v "OTEL_" | grep -v "JAVA_TOOL_OPTIONS" | while read -r env_var; do + if [ -n "$env_var" ]; then + echo "-e $env_var" + fi + done | tr '\n' ' ') + fi + else + warn "Failed to get environment variables for container $container_name (timeout or error)" + fi + info "Final env_vars: $env_vars" + + # Get volumes (excluding OTEL agent volume) with better error handling + info "Getting volume information for container: $container_name" + local volumes="" + local volume_output + if volume_output=$(timeout 15 docker inspect "$container_name" --format '{{range .Mounts}}{{print .Source ":" .Destination "\n"}}{{end}}' 2>&1); then + info "Volume output length: ${#volume_output}" + info "Volume output: $volume_output" + if [ -n "$volume_output" ]; then + # Process volumes line by line to avoid hanging + while IFS= read -r volume_line; do + if [ -n "$volume_line" ] && [[ "$volume_line" != *":/otel"* ]]; then + volumes="$volumes -v $volume_line" + info "Added volume: $volume_line" + fi + done <<< "$volume_output" + fi + else + warn "Failed to get volume information for container $container_name (timeout or error)" + fi + info "Final volumes: $volumes" + + # Get working directory + info "Getting working directory for container: $container_name" + local working_dir + if ! working_dir=$(timeout 15 docker inspect "$container_name" --format '{{.Config.WorkingDir}}' 2>&1); then + warn "Failed to get working directory for container $container_name (timeout or error)" + working_dir="" + fi + info "Working directory: $working_dir" + + # Get command + info "Getting command for container: $container_name" + local command + if ! command=$(timeout 15 docker inspect "$container_name" --format '{{.Config.Cmd}}' 2>&1 | tr -d '[]' | tr ',' ' '); then + warn "Failed to get command for container $container_name (timeout or error)" + command="" + fi + info "Command: $command" + + # Create new container name (remove -otel suffix if present) + local new_name="${container_name%-otel}" + + info "Container configuration:" + info " Image: $image" + info " Ports: $ports" + info " Working directory: $working_dir" + info " Command: $command" + + # Check if target container already exists and remove it + if docker ps -a --format "{{.Names}}" | grep -q "^${new_name}$"; then + info "Removing existing container: $new_name" + if ! docker rm "$new_name" >/dev/null 2>&1; then + warn "Failed to remove existing container $new_name, continuing anyway" + fi + fi + + # Stop the old container + info "Stopping container: $container_name" + if ! docker stop "$container_name" >/dev/null 2>&1; then + err "Failed to stop container $container_name" + return 1 + fi + + # Build docker run command without OTEL instrumentation + local docker_cmd="docker run -d" + + # Add ports if available + if [ -n "$ports" ]; then + docker_cmd="$docker_cmd $ports" + fi + + # Add environment variables (excluding OTEL ones) + if [ -n "$env_vars" ]; then + docker_cmd="$docker_cmd $env_vars" + fi + + # Add volumes if available + if [ -n "$volumes" ]; then + docker_cmd="$docker_cmd $volumes" + fi + + # Add working directory if available + if [ -n "$working_dir" ] && [ "$working_dir" != "/" ]; then + docker_cmd="$docker_cmd -w $working_dir" + fi + + # Add name and image + docker_cmd="$docker_cmd --name $new_name $image" + + # Add command if available + if [ -n "$command" ]; then + docker_cmd="$docker_cmd $command" + fi + + info "Starting container without OTEL instrumentation: $new_name" + info "Command: $docker_cmd" + + # Execute the command and capture output + local run_output + if ! run_output=$(eval "$docker_cmd" 2>&1); then + err "Failed to start new container: $run_output" + # Try to restart original container + info "Attempting to restart original container: $container_name" + if docker start "$container_name" >/dev/null 2>&1; then + info "✓ Restarted original container: $container_name" + else + err "Failed to restart original container: $container_name" + fi + return 1 + fi + + # Wait a moment for container to start + sleep 3 + + # Check if container is running + if docker ps --format "{{.Names}}" | grep -q "^${new_name}$"; then + info "✓ Successfully restarted container without OTEL instrumentation: $new_name" + # Remove old container + if docker rm "$container_name" >/dev/null 2>&1; then + info "✓ Removed old container: $container_name" + else + warn "Failed to remove old container: $container_name" + fi + else + err "Failed to restart container without OTEL instrumentation" + # Try to restart original container + info "Attempting to restart original container: $container_name" + if docker start "$container_name" >/dev/null 2>&1; then + info "✓ Restarted original container: $container_name" + else + err "Failed to restart original container: $container_name" + fi + return 1 + fi +} + +# Function to remove OTEL instrumentation from a specific Tomcat application +uninstrument_tomcat_app() { + local app_id="$1" + + if [ -z "$app_id" ]; then + err "Tomcat application ID is required" + return 1 + fi + + info "Removing OTEL instrumentation from Tomcat application: $app_id" + + # Parse the app_id to get components + IFS='-' read -r prefix pid suffix <<< "$app_id" + if [ "$prefix" != "tomcat" ]; then + err "Invalid Tomcat application ID format: $app_id" + return 1 + fi + + # Find the Tomcat app details + local tomcat_apps + mapfile -t tomcat_apps < <(detect_tomcat_apps) + + local app_info="" + for app in "${tomcat_apps[@]}"; do + if [[ "$app" == "$app_id|"* ]]; then + app_info="$app" + break + fi + done + + if [ -z "$app_info" ]; then + err "Tomcat application $app_id not found or not running" + return 1 + fi + + IFS='|' read -r found_app_id found_pid catalina_home catalina_base app_name app_path <<< "$app_info" + + # Check if already uninstrumented + local env_output + env_output=$(cat "/proc/$found_pid/environ" 2>/dev/null | tr '\0' '\n' | grep -E "OTEL_EXPORTER_OTLP_ENDPOINT" || echo "") + + if [ -z "$env_output" ]; then + info "Tomcat application $app_name is not instrumented" + return 0 + fi + + # Find and remove setenv.sh or restore backup + local setenv_file="" + if [ -n "$catalina_base" ] && [ -d "$catalina_base" ]; then + setenv_file="$catalina_base/bin/setenv.sh" + elif [ -n "$catalina_home" ] && [ -d "$catalina_home" ]; then + setenv_file="$catalina_home/bin/setenv.sh" + fi + + if [ -n "$setenv_file" ] && [ -f "$setenv_file" ]; then + # Check if this is our auto-generated setenv.sh + if grep -q "OpenTelemetry instrumentation for Tomcat (auto-generated)" "$setenv_file"; then + # Remove the auto-generated setenv.sh + rm -f "$setenv_file" + info "Removed auto-generated setenv.sh: $setenv_file" + + # Look for backup and restore it + local backup_dir="/tmp/tomcat-otel-backup-*" + for backup in $backup_dir; do + if [ -d "$backup" ] && [ -f "$backup/setenv.sh.backup" ]; then + cp "$backup/setenv.sh.backup" "$setenv_file" + chmod +x "$setenv_file" + info "Restored original setenv.sh from backup" + break + fi + done + else + # Remove OTEL-related lines from existing setenv.sh + local temp_file + temp_file=$(mktemp) + grep -v -E "OTEL_|javaagent.*opentelemetry|otel\.javaagent" "$setenv_file" > "$temp_file" + mv "$temp_file" "$setenv_file" + chmod +x "$setenv_file" + info "Removed OTEL configuration from setenv.sh: $setenv_file" + fi + fi + + # If this is a systemd service, also remove from service file + local service_name="" + for service in $(systemctl list-units --type=service --state=active --no-pager --no-legend | awk '{print $1}' | grep -E '\.(service)$'); do + local service_pid + service_pid=$(systemctl show "$service" --property=MainPID --no-pager 2>/dev/null | cut -d'=' -f2- || echo "") + if [ "$service_pid" = "$found_pid" ]; then + service_name="$service" + break + fi + done + + if [ -n "$service_name" ]; then + info "Found systemd service for Tomcat: $service_name" + uninstrument_service "$service_name" + else + # For standalone Tomcat, we need to restart it to pick up the changes + info "Tomcat is not managed by systemd, manual restart required" + info "Please restart Tomcat to remove OpenTelemetry instrumentation:" + info " - Stop: kill $found_pid" + info " - Start: $catalina_script start" + warn "Manual restart required for Tomcat application: $app_name" + fi + + info "✓ Successfully removed OTEL instrumentation from Tomcat application: $app_name" +} + + +# Function to install system-wide binary +install_binary() { + local binary_name="mw-instrument" + local binary_path="/usr/local/bin/$binary_name" + local script_path="$0" + + info "Installing system-wide binary: $binary_name" + + # Create a wrapper script that calls the original script + cat > "$binary_path" </dev/null)"; do + if [ -f "\$path" ]; then + ORIGINAL_SCRIPT="\$path" + break + fi +done + +if [ -z "\$ORIGINAL_SCRIPT" ]; then + echo "ERROR: Could not find the original instrumentation script" >&2 + echo "Please ensure the script is in one of these locations:" >&2 + echo " - $script_path" >&2 + echo " - /opt/middleware/scripts/auto-instrumentation/instrument-java-so-stable-tomcat.sh" >&2 + echo " - /usr/local/share/middleware/scripts/auto-instrumentation/instrument-java-so-stable-tomcat.sh" >&2 + echo " - /usr/local/bin/instrument-java-so-stable-tomcat.sh" >&2 + exit 1 +fi + +# Execute the original script with all arguments +exec "\$ORIGINAL_SCRIPT" "\$@" +EOF + + chmod +x "$binary_path" + info "✓ Installed binary: $binary_path" + info "You can now use: $binary_name " +} + +# Function to remove system-wide binary +uninstall_binary() { + local binary_name="mw-instrument" + local binary_path="/usr/local/bin/$binary_name" + + if [ -f "$binary_path" ]; then + info "Removing system-wide binary: $binary_name" + rm -f "$binary_path" + info "✓ Removed binary: $binary_path" + else + info "Binary $binary_name not found at $binary_path" + fi +} +# Function to remove OTEL instrumentation from all Java apps +uninstrument_all() { + info "Removing OTEL instrumentation from all Java applications..." + + local total_removed=0 + + # Remove from systemd services + info "=== Removing from Systemd Services ===" + local java_services + mapfile -t java_services < <(detect_java_services) + + for service in "${java_services[@]}"; do + local env_output + env_output=$(systemctl show "$service" --property=Environment --no-pager 2>/dev/null) + if echo "$env_output" | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT"; then + info "Removing instrumentation from service: $service" + if uninstrument_service "$service"; then + total_removed=$((total_removed + 1)) + fi + else + info "Service $service is not instrumented, skipping" + fi + done + + # Remove from Docker containers + info "=== Removing from Docker Containers ===" + local java_containers + # Get containers and handle empty results properly + local java_containers_output + java_containers_output=$(detect_java_containers) + if [ -n "$java_containers_output" ]; then + mapfile -t java_containers <<< "$java_containers_output" + else + java_containers=() + fi + + for container in "${java_containers[@]}"; do + local has_endpoint + has_endpoint=$(docker exec "$container" env 2>/dev/null | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT" && echo "yes" || echo "no") + if [ "$has_endpoint" = "yes" ]; then + info "Removing instrumentation from container: $container" + if uninstrument_container "$container"; then + total_removed=$((total_removed + 1)) + fi + else + info "Container $container is not instrumented, skipping" + fi + done + + # Remove from Tomcat applications + info "=== Removing from Tomcat Applications ===" + local tomcat_apps + mapfile -t tomcat_apps < <(detect_tomcat_apps) + + for app_info in "${tomcat_apps[@]}"; do + IFS='|' read -r app_id pid catalina_home catalina_base app_name app_path <<< "$app_info" + local env_output + env_output=$(cat "/proc/$pid/environ" 2>/dev/null | tr '\0' '\n' | grep -E "OTEL_EXPORTER_OTLP_ENDPOINT" || echo "") + if [ -n "$env_output" ]; then + info "Removing instrumentation from Tomcat app: $app_name" + if uninstrument_tomcat_app "$app_id"; then + total_removed=$((total_removed + 1)) + fi + else + info "Tomcat app $app_name is not instrumented, skipping" + fi + done + + # Remove systemd drop-in + local systemd_dropin="/usr/lib/systemd/system.conf.d/00-otelinject-instrumentation.conf" + if [ -f "$systemd_dropin" ]; then + info "Removing systemd drop-in: $systemd_dropin" + rm -f "$systemd_dropin" + systemctl daemon-reload + fi + + # Remove profile snippet + local profile_snippet="/etc/profile.d/otelinject-instrumentation.sh" + if [ -f "$profile_snippet" ]; then + info "Removing profile snippet: $profile_snippet" + rm -f "$profile_snippet" + fi + + # Remove Docker wrapper + if [ -f "$DOCKER_WRAPPER_PATH" ]; then + info "Removing Docker wrapper: $DOCKER_WRAPPER_PATH" + rm -f "$DOCKER_WRAPPER_PATH" + fi + + info "✓ Successfully removed OTEL instrumentation from $total_removed applications" + info "✓ Removed systemd drop-in, profile snippet, and Docker wrapper" +} + +# Function to validate instrumentation +validate_instrumentation() { + info "Validating OpenTelemetry instrumentation..." + + local total_apps=0 + local instrumented_apps=0 + + # Check systemd services + local java_services + mapfile -t java_services < <(detect_java_services) + total_apps=$((total_apps + ${#java_services[@]})) + + for service in "${java_services[@]}"; do + info "Checking service: $service" + + # Check if service has OTEL environment variables + local env_output + env_output=$(systemctl show "$service" --property=Environment --no-pager 2>/dev/null) + if echo "$env_output" | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT"; then + info "✓ Service $service has OTEL configuration" + instrumented_apps=$((instrumented_apps + 1)) + else + warn "✗ Service $service missing OTEL configuration" + fi + done + + # Check Docker containers + local java_containers + # Get containers and handle empty results properly + local java_containers_output + java_containers_output=$(detect_java_containers) + if [ -n "$java_containers_output" ]; then + mapfile -t java_containers <<< "$java_containers_output" + else + java_containers=() + fi + total_apps=$((total_apps + ${#java_containers[@]})) + + for container in "${java_containers[@]}"; do + info "Checking container: $container" + + local has_endpoint + has_endpoint=$(docker exec "$container" env 2>/dev/null | grep -q "OTEL_EXPORTER_OTLP_ENDPOINT" && echo "yes" || echo "no") + if [ "$has_endpoint" = "yes" ]; then + info "✓ Container $container has OTEL configuration" + instrumented_apps=$((instrumented_apps + 1)) + else + warn "✗ Container $container missing OTEL configuration" + fi + done + + # Check Tomcat applications + local tomcat_apps + mapfile -t tomcat_apps < <(detect_tomcat_apps) + total_apps=$((total_apps + ${#tomcat_apps[@]})) + + for app_info in "${tomcat_apps[@]}"; do + IFS='|' read -r app_id pid catalina_home catalina_base app_name app_path <<< "$app_info" + info "Checking Tomcat app: $app_name (PID: $pid)" + + local env_output + env_output=$(cat "/proc/$pid/environ" 2>/dev/null | tr '\0' '\n' | grep -E "OTEL_EXPORTER_OTLP_ENDPOINT" || echo "") + if [ -n "$env_output" ]; then + info "✓ Tomcat app $app_name has OTEL configuration" + instrumented_apps=$((instrumented_apps + 1)) + else + warn "✗ Tomcat app $app_name missing OTEL configuration" + fi + done + + info "Total Java applications found: $total_apps" + info "Total instrumented applications: $instrumented_apps" + + if [ $instrumented_apps -eq $total_apps ] && [ $total_apps -gt 0 ]; then + info "✓ All Java applications are properly instrumented" + return 0 + elif [ $total_apps -eq 0 ]; then + info "No Java applications found to instrument" + return 0 + else + warn "Some applications may not be properly instrumented" + return 1 + fi +} + +if [ "$EUID" -ne 0 ]; then + case "${1:-}" in + install-binary|install-agent|install-host|docker-wrapper|patch-k8s|\ + update-services|update-docker|update-tomcat|all|instrument-all|instrument-service|instrument-container|instrument-tomcat|\ + uninstrument-all|uninstrument-service|uninstrument-container|uninstrument-tomcat|uninstall-binary) + # These operations require root privileges, re-run with sudo + info "This operation requires sudo privileges. Re-running with sudo..." + # Preserve all environment variables + exec sudo -E "$0" "$@" + ;; + help|-h|--help|list-instrumented|validate) + # These commands can run without sudo + ;; + *) + # For other commands, show error + err "Please run as root (sudo)." + ;; + esac +fi + +# Check dependencies +command -v curl >/dev/null 2>&1 || err "curl required. Install and re-run." +command -v jq >/dev/null 2>&1 || err "jq required. Install and re-run." + +install_agent() { + info "Creating $OTEL_DIR ..." + mkdir -p "$OTEL_DIR" + chmod 0755 "$OTEL_DIR" + + if [ -f "$AGENT_PATH" ] && [ "$FORCE" != "1" ]; then + info "Agent already exists at $AGENT_PATH (use FORCE=1 to re-download)." + else + info "Downloading OpenTelemetry Java agent from: $AGENT_URL" + curl -fL --progress-bar -o "$AGENT_PATH" "$AGENT_URL" || err "Failed to download java agent" + chmod 0644 "$AGENT_PATH" + info "Downloaded agent to $AGENT_PATH" + fi +} + +install_systemd_dropin() { + SYSTEMD_DIR="/usr/lib/systemd/system.conf.d" + SYSTEMD_DROPIN="$SYSTEMD_DIR/00-otelinject-instrumentation.conf" + mkdir -p "$SYSTEMD_DIR" + + info "Using endpoint for systemd drop-in: ${OTEL_EXPORTER_OTLP_ENDPOINT}" + info "Using headers for systemd drop-in: ${OTEL_EXPORTER_OTLP_HEADERS}" + + # Build environment variables string + ENV_VARS="JAVA_TOOL_OPTIONS=-javaagent:${AGENT_PATH}" + ENV_VARS="${ENV_VARS} OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT}" + ENV_VARS="${ENV_VARS} OTEL_EXPORTER_OTLP_HEADERS=${OTEL_EXPORTER_OTLP_HEADERS}" + ENV_VARS="${ENV_VARS} OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER}" + ENV_VARS="${ENV_VARS} OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER}" + ENV_VARS="${ENV_VARS} OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER}" + + if [ -n "${OTEL_SERVICE_NAME:-}" ]; then + ENV_VARS="${ENV_VARS} OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME}" + fi + + if [ -n "${OTEL_RESOURCE_ATTRIBUTES:-}" ]; then + ENV_VARS="${ENV_VARS} OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES}" + fi + + cat > "$SYSTEMD_DROPIN" < "$PROFILE_SNIPPET" <<'EOF' +# OpenTelemetry Java agent +AGENT="/usr/lib/opentelemetry/opentelemetry-javaagent.jar" + +prepend_agent() { + if [ -z "${JAVA_TOOL_OPTIONS:-}" ]; then + export JAVA_TOOL_OPTIONS="-javaagent:${AGENT}" + else + case ":$JAVA_TOOL_OPTIONS:" in + *":-javaagent:${AGENT}:"*) + ;; + *) + export JAVA_TOOL_OPTIONS="-javaagent:${AGENT} $JAVA_TOOL_OPTIONS" + ;; + esac + fi +} + +prepend_agent +EOF + chmod 0644 "$PROFILE_SNIPPET" + info "Wrote profile snippet: $PROFILE_SNIPPET" +} + +install_docker_wrapper() { + cat > "$DOCKER_WRAPPER_PATH" <<'BASH' +#!/usr/bin/env bash +# docker-run-otel: wrapper around docker run to mount OTEL agent and set env vars. +# Usage: docker-run-otel [docker run args... ] image [cmd...] +OTEL_HOST_DIR="/usr/lib/opentelemetry" +OTEL_CONTAINER_PATH="/otel" +AGENT_NAME="opentelemetry-javaagent.jar" +AGENT_FULL_PATH="${OTEL_HOST_DIR}/${AGENT_NAME}" + +if [ ! -f "${AGENT_FULL_PATH}" ]; then + echo "Agent not found at ${AGENT_FULL_PATH}. Place agent there or run installer." >&2 + exit 1 +fi + +# We will add: +# - a read-only mount of the agent dir to /otel in the container +# - JAVA_TOOL_OPTIONS to include the javaagent path +# If caller already passes -e JAVA_TOOL_OPTIONS or -v containing AGENT path, we do not override. +args=() +skip_envset=0 +skip_mount=0 + +# quick parse for env or volume that mention JAVA_TOOL_OPTIONS or -v /otel (not bulletproof) +for i in "$@"; do + case "$i" in + *JAVA_TOOL_OPTIONS*) + skip_envset=1 + ;; + *":/otel"*) + skip_mount=1 + ;; + esac +done + +if [ "$skip_mount" -eq 0 ]; then + args+=( -v "${OTEL_HOST_DIR}:${OTEL_CONTAINER_PATH}:ro" ) +fi + +if [ "$skip_envset" -eq 0 ]; then + # Add JVM options to fix cgroup-related issues and ensure proper agent initialization + java_tool_options="-javaagent:${OTEL_CONTAINER_PATH}/${AGENT_NAME}" + java_tool_options="${java_tool_options} -Dotel.javaagent.enable.runtime.metrics=false" + java_tool_options="${java_tool_options} -Dotel.javaagent.enable.experimental.runtime.metrics=false" + java_tool_options="${java_tool_options} -XX:+DisableAttachMechanism" + java_tool_options="${java_tool_options} ${JAVA_TOOL_OPTIONS:-}" + args+=( -e "JAVA_TOOL_OPTIONS=${java_tool_options}" ) +fi + +# Add detached mode if not already specified +if ! echo "$@" | grep -q "\-d\|--detach"; then + args+=( -d ) +fi + +# Apply the same fallback logic for OTEL_EXPORTER_OTLP_ENDPOINT +# Priority: OTEL_EXPORTER_OTLP_ENDPOINT > MW_TARGET > fallback to localhost:4317 +echo "Docker wrapper debug: OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT:-not set}" +echo "Docker wrapper debug: MW_TARGET=${MW_TARGET:-not set}" +if [ -n "${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]; then + # Use explicitly set OTEL_EXPORTER_OTLP_ENDPOINT + FINAL_OTEL_ENDPOINT="${OTEL_EXPORTER_OTLP_ENDPOINT}" + echo "Docker wrapper: Using explicit OTEL_EXPORTER_OTLP_ENDPOINT=${FINAL_OTEL_ENDPOINT}" +elif [ -n "${MW_TARGET:-}" ]; then + # Use MW_TARGET if OTEL_EXPORTER_OTLP_ENDPOINT is not set + FINAL_OTEL_ENDPOINT="${MW_TARGET}" + echo "Docker wrapper: Using MW_TARGET fallback=${FINAL_OTEL_ENDPOINT}" +else + # Fallback to localhost:4317 + FINAL_OTEL_ENDPOINT="http://localhost:4317" + echo "Docker wrapper: Using localhost fallback=${FINAL_OTEL_ENDPOINT}" +fi + +# Apply the same fallback logic for OTEL_EXPORTER_OTLP_HEADERS +# Priority: OTEL_EXPORTER_OTLP_HEADERS > MW_API_KEY > fallback to default +if [ -n "${OTEL_EXPORTER_OTLP_HEADERS:-}" ]; then + # Use explicitly set OTEL_EXPORTER_OTLP_HEADERS + FINAL_OTEL_HEADERS="${OTEL_EXPORTER_OTLP_HEADERS}" +elif [ -n "${MW_API_KEY:-}" ]; then + # Use MW_API_KEY if OTEL_EXPORTER_OTLP_HEADERS is not set + FINAL_OTEL_HEADERS="authorization=${MW_API_KEY}" +else + # Fallback to default + FINAL_OTEL_HEADERS="authorization=5xrocjh0p5ir233mvi34dvl5bepnyqri3rqb" +fi + +# Add OpenTelemetry environment variables +args+=( -e "OTEL_EXPORTER_OTLP_ENDPOINT=${FINAL_OTEL_ENDPOINT}" ) +args+=( -e "OTEL_EXPORTER_OTLP_HEADERS=${FINAL_OTEL_HEADERS}" ) +args+=( -e "OTEL_TRACES_EXPORTER=${OTEL_TRACES_EXPORTER:-otlp}" ) +args+=( -e "OTEL_METRICS_EXPORTER=${OTEL_METRICS_EXPORTER:-otlp}" ) +args+=( -e "OTEL_LOGS_EXPORTER=${OTEL_LOGS_EXPORTER:-otlp}" ) + +# Add JVM options to fix cgroup-related issues with OpenTelemetry agent +args+=( -e "OTEL_JAVAAGENT_ENABLE_RUNTIME_METRICS=false" ) +args+=( -e "OTEL_JAVAAGENT_ENABLE_EXPERIMENTAL_RUNTIME_METRICS=false" ) + +# Set service name from container name if not already set +if [ -z "${OTEL_SERVICE_NAME:-}" ]; then + # Extract container name from --name argument or use a default + container_name="" + for i in "${@}"; do + if [ "$i" = "--name" ]; then + container_name="next" + elif [ "$container_name" = "next" ]; then + container_name="$i" + break + fi + done + + if [ -n "$container_name" ]; then + # Remove -otel suffix if present + service_name=$(echo "$container_name" | sed 's/-otel$//') + args+=( -e "OTEL_SERVICE_NAME=$service_name" ) + fi +else + args+=( -e "OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME}" ) +fi + +# Exec docker run with provided args prefixed +docker run "${args[@]}" "$@" +BASH + chmod +x "$DOCKER_WRAPPER_PATH" + info "Installed docker wrapper: $DOCKER_WRAPPER_PATH" + info "Use it like: sudo $DOCKER_WRAPPER_PATH image" +} + +# Kubernetes patcher: +# - requires kubectl configured and jq +patch_k8s_controllers() { + if ! command -v kubectl >/dev/null 2>&1; then + err "kubectl required for Kubernetes patching. Install/configure kubectl to continue." + fi + + if [ "$K8S_NAMESPACE" = "all" ]; then + ns_list=$(kubectl get ns -o jsonpath='{.items[*].metadata.name}') + else + ns_list="$K8S_NAMESPACE" + fi + + for ns in $ns_list; do + info "Scanning namespace: $ns" + for kind in deployments statefulsets daemonsets; do + info "Checking ${kind} in ${ns}..." + names=$(kubectl -n "$ns" get "$kind" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || echo "") + for name in $names; do + info "Inspecting $kind/$name ..." + obj=$(kubectl -n "$ns" get "$kind" "$name" -o json) + # decide whether any container looks like Java (image or command/args) + is_java=$(echo "$obj" | jq -r ' + .spec.template.spec.containers + | map( + ( .image // "" ) as $img + | ( ($img|test("java|openjdk|jdk|jre";"i")) + or ( (.command // []) | join(" ") | test("java";"i") ) + or ( (.args // []) | join(" ") | test("java";"i") ) + ) + ) + | any(. == true) + ') + if [ "$is_java" != "true" ]; then + info "Skipping $kind/$name (no Java-like container detected)." + continue + fi + + info "Preparing patch for $kind/$name ..." + + # Build patch JSON using jq: + # - add volume otel-agent (emptyDir) if missing + # - add initContainer otel-download if missing (uses curlimages/curl) + # - for each container that is Java-like, add volumeMount and set/prepend JAVA_TOOL_OPTIONS + patch=$(echo "$obj" | jq --arg AGENT_URL "$AGENT_URL" ' + def ensure_volumes: + .spec.template.spec.volumes + |= ( + if . == null then + [{"name":"otel-agent","emptyDir":{}}] + else + (if any(.[]; .name=="otel-agent") then . else . + [{"name":"otel-agent","emptyDir":{}}] end) + end + ); + + def ensure_init: + .spec.template.spec.initContainers + |= ( + if . == null then + [ + { + "name":"otel-download", + "image":"curlimages/curl:latest", + "command":["sh","-c"], + "args":["set -e; mkdir -p /otel; curl -fsSL -o /otel/opentelemetry-javaagent.jar \($AGENT_URL)"], + "volumeMounts":[{"name":"otel-agent","mountPath":"/otel"}] + } + ] + else + (if any(.[]; .name=="otel-download") then . else . + [ + { + "name":"otel-download", + "image":"curlimages/curl:latest", + "command":["sh","-c"], + "args":["set -e; mkdir -p /otel; curl -fsSL -o /otel/opentelemetry-javaagent.jar \($AGENT_URL)"], + "volumeMounts":[{"name":"otel-agent","mountPath":"/otel"}] + } + ] end) + end + ); + + def patch_containers: + .spec.template.spec.containers + |= ( map( + if ( (.image // "" ) | test("java|openjdk|jdk|jre";"i") + or ( (.command // []) | join(" ") | test("java";"i") ) + or ( (.args // []) | join(" ") | test("java";"i") ) + ) + then + . + | .volumeMounts = ( (.volumeMounts // []) + [ {"name":"otel-agent","mountPath":"/otel"} ] ) + | .env = ( + ( .env // [] ) + | ( if any(.[]; .name=="JAVA_TOOL_OPTIONS") + then ( map( if .name=="JAVA_TOOL_OPTIONS" then .value = ("-javaagent:/otel/opentelemetry-javaagent.jar " + ( .value // "" )) | . else . end ) ) + else ( . + [ {"name":"JAVA_TOOL_OPTIONS","value":"-javaagent:/otel/opentelemetry-javaagent.jar"} ] ) + end + ) + ) + else . + end + ) + ); + + . + | ensure_volumes + | ensure_init + | patch_containers + ' ) + + # Apply patch (dry-run option available) + if [ "$DRY_RUN" = "1" ]; then + echo "DRY-RUN patch for $kind/$name in $ns:" + echo "$patch" | jq . + else + info "Applying patched $kind/$name..." + echo "$patch" | kubectl -n "$ns" apply -f - + info "Patched $kind/$name applied." + fi + + done + done + done +} + +usage() { + cat < Add OTEL instrumentation to specific service + instrument-container Add OTEL instrumentation to specific container (always starts in detached mode) + instrument-tomcat Add OTEL instrumentation to specific Tomcat application + uninstrument-all Remove OTEL instrumentation from all Java apps (services, containers, and Tomcat) + uninstrument-service Remove OTEL instrumentation from specific service + uninstrument-container Remove OTEL instrumentation from specific container + uninstrument-tomcat Remove OTEL instrumentation from specific Tomcat application + list-instrumented List all currently instrumented Java apps (services, containers, and Tomcat) + uninstall-binary Remove system-wide binary "mw-instrument" + +Environment Variables: + OTEL_EXPORTER_OTLP_ENDPOINT OpenTelemetry endpoint (priority: OTEL_EXPORTER_OTLP_ENDPOINT > MW_TARGET > localhost:4317) + MW_TARGET Alternative endpoint variable (used if OTEL_EXPORTER_OTLP_ENDPOINT not set) + OTEL_EXPORTER_OTLP_HEADERS Authentication headers (priority: OTEL_EXPORTER_OTLP_HEADERS > MW_API_KEY > default) + MW_API_KEY API key for authorization (used if OTEL_EXPORTER_OTLP_HEADERS not set) + OTEL_SERVICE_NAME Service name (optional, auto-detected from container name) + OTEL_RESOURCE_ATTRIBUTES Resource attributes (optional) + AUTO_UPDATE_SERVICES Auto-update existing services (default: 1) + +Examples: + # First time setup - install binary for easy access + sudo $0 install-binary + mw-instrument instrument-all + + # Environment variable priority examples: + sudo OTEL_EXPORTER_OTLP_ENDPOINT=https://your-endpoint:4317 $0 install-host # Uses explicit endpoint + sudo MW_TARGET=https://middleware.io:443 $0 install-host # Uses MW_TARGET (fallback) + sudo $0 install-host # Uses localhost:4317 (final fallback) + + # Authentication header priority examples: + sudo OTEL_EXPORTER_OTLP_HEADERS="authorization=my-key" $0 install-host # Uses explicit headers + sudo MW_API_KEY=abc123 $0 install-host # Uses MW_API_KEY (fallback) + sudo $0 install-host # Uses default headers + sudo $0 install-agent + sudo OTEL_DIR=/opt/otel FORCE=1 $0 install-agent + sudo $0 install-host + sudo OTEL_EXPORTER_OTLP_ENDPOINT=https://your-endpoint:4317 $0 install-host + sudo $0 update-services # Update existing services only + sudo $0 update-docker # Update existing Docker containers only + sudo $0 update-tomcat # Update existing Tomcat applications only + sudo $0 validate # Check instrumentation status + sudo $0 instrument-container my-container # Instrument specific container (detached mode) + sudo $0 instrument-tomcat tomcat-1234-myapp # Instrument specific Tomcat application + sudo docker-run-otel --name my-app -p 8080:9090 my-java-image # Service name will be "my-app" + sudo OTEL_SERVICE_NAME=custom-name docker-run-otel --name my-app my-java-image # Override service name + sudo DRY_RUN=1 K8S_NAMESPACE=default $0 patch-k8s # preview for default ns + sudo K8S_NAMESPACE=all $0 patch-k8s # patch all namespaces (careful) + +Tomcat Application IDs: + Tomcat applications are identified by format: tomcat-- + Use 'list-instrumented' to see available Tomcat application IDs +EOF +} + +main() { + case "${1:-}" in + install-binary) install_binary ;; + uninstall-binary) uninstall_binary ;; + install-agent) install_agent ;; + install-host) + install_agent + install_systemd_dropin + install_profile_snippet + update_java_services + update_docker_containers + update_tomcat_apps + validate_instrumentation + ;; + docker-wrapper) install_agent; install_docker_wrapper ;; + patch-k8s) patch_k8s_controllers ;; + update-services) update_java_services ;; + update-docker) update_docker_containers ;; + update-tomcat) update_tomcat_apps ;; + validate) validate_instrumentation ;; + all) + install_agent + install_systemd_dropin + install_profile_snippet + install_docker_wrapper + update_java_services + update_docker_containers + update_tomcat_apps + validate_instrumentation + ;; + instrument-all) + install_agent + install_systemd_dropin + install_profile_snippet + install_docker_wrapper + update_java_services + update_docker_containers + update_tomcat_apps + validate_instrumentation + ;; + list-instrumented) list_instrumented_apps ;; + instrument-service) + if [ -z "$2" ]; then + err "Service name is required for instrument-service" + # shellcheck disable=SC2317 + usage + # shellcheck disable=SC2317 + exit 1 + fi + instrument_service "$2" ;; + instrument-container) + if [ -z "$2" ]; then + err "Container name is required for instrument-container" + # shellcheck disable=SC2317 + usage + # shellcheck disable=SC2317 + exit 1 + fi + instrument_container "$2" ;; + instrument-tomcat) + if [ -z "$2" ]; then + err "Tomcat application ID is required for instrument-tomcat" + # shellcheck disable=SC2317 + usage + # shellcheck disable=SC2317 + exit 1 + fi + instrument_tomcat_app "$2" ;; + uninstrument-all) uninstrument_all ;; + uninstrument-service) + if [ -z "$2" ]; then + err "Service name is required for uninstrument-service" + # shellcheck disable=SC2317 + usage + # shellcheck disable=SC2317 + exit 1 + fi + uninstrument_service "$2" ;; + uninstrument-container) + if [ -z "$2" ]; then + err "Container name is required for uninstrument-container" + # shellcheck disable=SC2317 + usage + # shellcheck disable=SC2317 + exit 1 + fi + uninstrument_container "$2" ;; + uninstrument-tomcat) + if [ -z "$2" ]; then + err "Tomcat application ID is required for uninstrument-tomcat" + # shellcheck disable=SC2317 + usage + # shellcheck disable=SC2317 + exit 1 + fi + uninstrument_tomcat_app "$2" ;; + "") + # Default behavior: install-binary if not installed, otherwise show help + if [ -f "/usr/local/bin/mw-instrument" ]; then + # Binary already exists, show help + usage + else + # Binary not installed, install it + install_binary + fi + ;; + help|-h|--help) usage ;; + *) + echo "Unknown command: $1" >&2 + usage + exit 2 + ;; + esac +} + +main "$@"