Skip to content
This repository was archived by the owner on Mar 6, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions collector/hive/examples/compose/collector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
receivers:
prometheus:
config:
scrape_configs:
- job_name: 'hive'
scrape_interval: 10s
static_configs:
- targets: ['hive-server:9008', 'hive-server:9028']

processors:
batch:

exporters:
logging:
loglevel: debug
otlp:
endpoint: ingest.lightstep.com:443
headers:
- lightstep-access-token: ${LS_ACCESS_TOKEN}

service:
pipelines:
metrics:
receivers: [prometheus]
processors: [batch]
exporters: [logging,otlp]
94 changes: 94 additions & 0 deletions collector/hive/examples/compose/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
version: '3.7'
services:

namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
container_name: namenode
volumes:
- ./hdfs/namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=hive
env_file:
- ./hadoop-hive.env
ports:
- "50070:50070"
networks:
- integrations

datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
container_name: datanode
volumes:
- ./hdfs/datanode:/hadoop/dfs/data
env_file:
- ./hadoop-hive.env
environment:
SERVICE_PRECONDITION: "namenode:50070"
depends_on:
- namenode
ports:
- "50075:50075"
networks:
- integrations

hive-server:
container_name: hive-server
build:
context: ./hive-server
env_file:
- ./hadoop-hive.env
environment:
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore"
SERVICE_PRECONDITION: "hive-metastore:9083"
depends_on:
- hive-metastore
ports:
- "10000:10000"
- "8080:8080"
- "9005:9005"
- "9008:9008"
- "9025:9025"
- "9028:9028"
networks:
- integrations

hive-metastore:
image: bde2020/hive:2.3.2-postgresql-metastore
container_name: hive-metastore
env_file:
- ./hadoop-hive.env
command: /opt/hive/bin/hive --service metastore
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 hive-metastore-postgresql:5432"
depends_on:
- hive-metastore-postgresql
ports:
- "9083:9083"
networks:
- integrations

hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:2.3.0
container_name: hive-metastore-postgresql
volumes:
- ./metastore-postgresql/postgresql/data:/var/lib/postgresql/data
depends_on:
- datanode
networks:
- integrations

otel-collector:
container_name: otel-collector
image: otel/opentelemetry-collector-contrib:0.81.0
hostname: otel-collector
restart: always
command: [ "--config=/conf/collector.yaml" ]
volumes:
- ./collector.yaml:/conf/collector.yaml:rw
environment:
LS_ACCESS_TOKEN: "${LS_ACCESS_TOKEN}"
networks:
- integrations

networks:
integrations:
29 changes: 29 additions & 0 deletions collector/hive/examples/compose/hadoop-hive.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083

CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
27 changes: 27 additions & 0 deletions collector/hive/examples/compose/hive-server/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM curlimages/curl:7.82.0 as curler
ARG JMX_PROM_VERSION=0.20.0
USER root

RUN curl -L \
--output /jmx_prometheus_javaagent.jar \
"https://repo.maven.apache.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_PROM_VERSION}/jmx_prometheus_javaagent-${JMX_PROM_VERSION}.jar"


# Using the base Hive image
FROM bde2020/hive:2.3.2-postgresql-metastore

# Copy the jmx_prometheus_javaagent jar and jmx_exporter.yaml config
COPY --from=curler /jmx_prometheus_javaagent.jar /opt/hive/
COPY jmx_exporter.yaml /opt/hive/conf/

# Append script to hive-env.sh
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One way to clean this up would be put this as a script in the build context and just append the call to that script in the sh. Take 30 minutes max to clean that up for readability. If it takes longer we have people who specialize in that for you ;)

RUN echo 'if [ "$SERVICE" = "hiveserver2" ]; then' >> /opt/hive/conf/hive-env.sh && \
echo ' export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=9005 -Dcom.sun.management.jmxremote.local.only=false -Dcom.sun.management.jmxremote.authenticate=false -javaagent:/opt/hive/jmx_prometheus_javaagent.jar=9008:/opt/hive/conf/jmx_exporter.yaml -Dcom.sun.management.jmxremote.ssl=false"' >> /opt/hive/conf/hive-env.sh && \
echo 'fi' >> /opt/hive/conf/hive-env.sh && \
echo 'if [ "$SERVICE" = "metastore" ]; then' >> /opt/hive/conf/hive-env.sh && \
echo ' export HADOOP_OPTS="$HADOOP_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=9025 -Dcom.sun.management.jmxremote.local.only=false -Dcom.sun.management.jmxremote.authenticate=false -javaagent:/opt/hive/jmx_prometheus_javaagent.jar=9028:/opt/hive/conf/jmx_exporter.yaml -Dcom.sun.management.jmxremote.ssl=false"' >> /opt/hive/conf/hive-env.sh && \
echo 'fi' >> /opt/hive/conf/hive-env.sh

EXPOSE 10000 8080 9005 9008 9025 9028

CMD ["/opt/hive/bin/hive", "--service", "hiveserver2"]
12 changes: 12 additions & 0 deletions collector/hive/examples/compose/hive-server/jmx_exporter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
lowercaseOutputName: true
lowercaseOutputLabelNames: true
rules:
- pattern: 'org.apache.hadoop.hive<type=HiveServer2, name=.*><>Value'
name: hive_jmx_$1_$2
type: COUNTER
- pattern: 'org.apache.hadoop.hive<type=HiveMetaStore, name=.*><>Value'
name: hive_metastore_jmx_$1_$2
type: COUNTER
- pattern: 'org.apache.hadoop.hive<type=MetricsSystem, name=(.+)><>Value'
name: hive_$1
45 changes: 45 additions & 0 deletions collector/hive/examples/compose/metrics.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
jmx_config_reload_failure_created,"Number of times configuration have failed to be reloaded.",,Sum
jmx_config_reload_failure_total,"Number of times configuration have failed to be reloaded.",,Sum
jmx_config_reload_success_created,"Number of times configuration have successfully been reloaded.",,Sum
jmx_config_reload_success_total,"Number of times configuration have successfully been reloaded.",,Sum
jmx_exporter_build_info,"A metric with a constant '1' value labeled with the version of the JMX exporter.",,Gauge
jmx_scrape_cached_beans,"Number of beans with their matching rule cached",,Gauge
jmx_scrape_duration_seconds,"Time this JMX scrape took, in seconds.",,Gauge
jmx_scrape_error,"Non-zero if this scrape failed.",,Gauge
jvm,"VM version info",,Sum
jvm_buffer_pool_capacity_bytes,"Bytes capacity of a given JVM buffer pool.",,Gauge
jvm_buffer_pool_used_buffers,"Used buffers of a given JVM buffer pool.",,Gauge
jvm_buffer_pool_used_bytes,"Used bytes of a given JVM buffer pool.",,Gauge
jvm_classes_currently_loaded,"The number of classes that are currently loaded in the JVM",,Gauge
jvm_classes_loaded_total,"The total number of classes that have been loaded since the JVM has started execution",,Sum
jvm_classes_unloaded_total,"The total number of classes that have been unloaded since the JVM has started execution",,Sum
jvm_gc_collection_seconds,"Time spent in a given JVM garbage collector in seconds.",,Summary
jvm_memory_bytes_committed,"Committed (bytes) of a given JVM memory area.",,Gauge
jvm_memory_bytes_init,"Initial bytes of a given JVM memory area.",,Gauge
jvm_memory_bytes_max,"Max (bytes) of a given JVM memory area.",,Gauge
jvm_memory_bytes_used,"Used bytes of a given JVM memory area.",,Gauge
jvm_memory_objects_pending_finalization,"The number of objects waiting in the finalizer queue.",,Gauge
jvm_memory_pool_allocated_bytes_created,"Total bytes allocated in a given JVM memory pool. Only updated after GC, not continuously.",,Sum
jvm_memory_pool_allocated_bytes_total,"Total bytes allocated in a given JVM memory pool. Only updated after GC, not continuously.",,Sum
jvm_memory_pool_bytes_committed,"Committed bytes of a given JVM memory pool.",,Gauge
jvm_memory_pool_bytes_init,"Initial bytes of a given JVM memory pool.",,Gauge
jvm_memory_pool_bytes_max,"Max bytes of a given JVM memory pool.",,Gauge
jvm_memory_pool_bytes_used,"Used bytes of a given JVM memory pool.",,Gauge
jvm_memory_pool_collection_committed_bytes,"Committed after last collection bytes of a given JVM memory pool.",,Gauge
jvm_memory_pool_collection_init_bytes,"Initial after last collection bytes of a given JVM memory pool.",,Gauge
jvm_memory_pool_collection_max_bytes,"Max bytes after last collection of a given JVM memory pool.",,Gauge
jvm_memory_pool_collection_used_bytes,"Used bytes after last collection of a given JVM memory pool.",,Gauge
jvm_threads_current,"Current thread count of a JVM",,Gauge
jvm_threads_daemon,"Daemon thread count of a JVM",,Gauge
jvm_threads_deadlocked,"Cycles of JVM-threads that are in deadlock waiting to acquire object monitors or ownable synchronizers",,Gauge
jvm_threads_deadlocked_monitor,"Cycles of JVM-threads that are in deadlock waiting to acquire object monitors",,Gauge
jvm_threads_peak,"Peak thread count of a JVM",,Gauge
jvm_threads_started_total,"Started thread count of a JVM",,Sum
jvm_threads_state,"Current count of threads by state",,Gauge
process_cpu_seconds_total,"Total user and system CPU time spent in seconds.",,Sum
process_max_fds,"Maximum number of open file descriptors.",,Gauge
process_open_fds,"Number of open file descriptors.",,Gauge
process_resident_memory_bytes,"Resident memory size in bytes.",,Gauge
process_start_time_seconds,"Start time of the process since unix epoch in seconds.",,Gauge
process_virtual_memory_bytes,"Virtual memory size in bytes.",,Gauge
up,"The scraping was successful",,Gauge