replicante.example.yaml

# The section below is for the API interface configuration.
api:
  # The network interface and port to bind the API server onto.
  #
  # By default, only bind to the loopback interface.
  # Production environments should place an HTTPS proxy in front of the API.
  bind: '127.0.0.1:16016'

  # The health checks refresh frequency (in seconds).
  #
  # Health checks are an internal indicator of replicante processes ability
  # to interact with external dependencies.
  # Since they can be expensive an have a small cost to each dependency they are run periodically
  # in the background with the latest result made visible through the API and metrics.
  #
  # This option configures the amount of time (in seconds) to wait between checks.
  healthcheck_refresh: 10

  # The number of request handling threads.
  #
  # By default this is 8 * number of CPUs.
  threads_count: ~

  # API server timeouts.
  timeouts:
    # Controls the timeout, in seconds, for keep alive connections.
    #
    # NOTE: Setting this to null (~) will turn off keep alive.
    keep_alive: 5

    # Control the timeout, in seconds, for reads on existing connections.
    #
    # NOTE: Setting this to null (~) will turn off read timeouts.
    read: 5

    # Control the timeout, in seconds, for writes on existing connections.
    #
    # NOTE: Setting this to null (~) will turn off write timeouts.
    write: 1

  # TLS (for HTTPS) certificates configuration.
  #
  # By default, the server starts without TLS enabled and serves requests over HTTP.
  # This is an insecure channel and anyone could be making requests.
  #
  # HTTPS without mutual authentication can also be enabled by providing the server
  # with a TLS certificate and a TLS private key.
  tls: ~
    # Optional path to a PEM bundle of trusted CAs for client authentication.
    #clients_ca_bundle: ~
  
    # Path to a PEM file with the server's PUBLIC certificate.
    #
    # Required if the `tls` option is set (not ~).
    #server_cert: '/path/to/certs/server-cert.pem'
  
    # Path to a PEM file with the server's PRIVATE certificate.
    #
    # Required if the `tls` option is set (not ~).
    #server_key: '/path/to/certs/server-key.pem'

  # Enable/disable entire API trees.
  #
  # Useful for advanced operators that which to control access to experimental or legacy
  # API versions or reduce attack surfices by removing endpoints that are not needed.
  #
  # Example use cases are:
  #
  #   * Upgrade prep: testing new API versions while having a quick rollback plan.
  #   * Controlled rollout: be prepared for when verions are no longer supported.
  #   * Disable unstable/experimental APIs: to enusre integrated tools only use stable APIs.
  trees:
    # Enable/disable the introspection APIs.
    #
    # The introspection API is very usesul to gain insight into the system.
    # It can also be used to monitor the system for failures or performance degradation.
    introspect: true

    # Enable/disable the unstable APIs.
    #
    # The unstable APIs are for endpoints in the early development cycle
    # where the attributes and parameters can change a lot and often.
    unstable: true


# Components enabling configuration.
#
# For Replicante to function correctly ALL components need to be running
# (unless explicitly stated otherwise).
#
# By default, all required components are enabled on all nodes.
#
# While this allows the configuration to stay simple (all nodes in the cluster are identical),
# more advanced configuration options based around specialised nodes roles can be used to
# scale to larger clusters in a more efficient way (because roles can be scaled based on
# each role needs rather then scaling all roles based on the bottleneck).
#
# # Exceptions to the ALL components rule
# Some components are not required for essential operations.
# These include new features being beta-tested or features that you may not need.
#
# For example the `grafana` component can be used to enable grafana endpoints annotations.
# If these are not needed, the endpoint can be left disabled.
#
# # Advanced configurations
# Advanced configurations are aimed at large clusters or other specialised needs.
# By default, start with all components enabled on all nodes and move to roles if and when needed.
#
# ## Dedicated Web/API nodes
# The first thing to split aside are the API and Web nodes.
# API/Web nodes are the ones that receive requests from users and have less
# predictable resource requirements.
#
# Separating them from other components means that:
#
#   * Users can use all resources without background tasks impacting them.
#   * Having other components not impacted by user requests.
#
#     # For the API/Web nodes
#     components:
#       _default: false
#       grafana: true  # Optional
#       webui: true
#
#     # For all other nodes
#     components:
#       _default: true
#       grafana: false
#       webui: false
#
# ## Fully dedicated components
# If more control is needed, further roles can be defined by disabling components in the
# general case (named "all other nodes" in the example above) and enabling on their own
# in a new, specialised, role.
components:
  # Default status for all components that are not explicitly configured.
  _default: true

  # Enable Replicante Core API endpoints.
  core_api: null

  # Enable discovery periodic scheduler.
  discovery: null

  # Enable Grafana Annotations API endpoints (optional).
  grafana: null

  # Enable cluster orchestrator periodic scheduler.
  orchestrator: null

  # Enable the update checker (optional).
  #
  # The check is performed only once in the background as the process starts.
  # If a new version is available a notice will be logged and captured as a sentry event.
  #
  # This component is disabled by default and does not follow the _defaul attribute
  # to ensure the user privacy is respected (HTTP requests can be tracked).
  # If this option is not enabled, you will have to make sure you keep replicante up to date.
  update_checker: false

  # Enable the view DB updater.
  viewupdater: null

  # Enable the WebUI API endpoints (optional).
  #
  # Without them, the nodejs WebUI will NOT work.
  webui: null

  # Enable the task workers component to process tasks.
  workers: null


# Distributed coordinator configuration options.
coordinator:
  # The distributed coordination system to use for nodes coordination.
  #
  # !!! DO NOT CHANGE AFTER INITIAL CONFIGURATION !!!
  # This option is to allow users to choose a supported software that best fits
  # their use and environment.
  #
  # To change this option, follow the instructions to rebuild your system:
  #   https://www.replicante.io/docs/manual/docs/upgrades-fresh/
  #
  # Available options:
  #
  #   * 'zookeeper' (recommended)
  backend: 'zookeeper'

  # User specified key/value map attached to node IDs.
  #
  # This data is not used by the system and is provided to help users debug
  # and otherwise label nodes for whatever needs they may have.
  node_attributes: {}

  # Any backend-specific option is set here.
  # The available options vary from backend to backend and are documented below.
  #
  # Zookeeper options:
  options:
    # Zookeeper ensemble connection string.
    ensemble: 'localhost:2181/replicante'

    # Zookeeper background cleaner configuration.
    #
    # The background cleaner thread ensures that znodes no longer needed are removed
    # from zookeeper to keep the number of znodes to a minimum.
    cleanup:
      # Maximum number of nodes to delete in a single cleanup cycle.
      limit: 1000

      # Seconds to wait between cleanup cycles.
      interval: 3600

      # Number of cycles before this node will re-run an election, 0 to disable re-runs.
      #
      # Having the system re-run elections continuously ensures that failover procedures are
      # exercised constantly and not just in case of errors.
      # You do not want to discover that failover does not work when a primary fails
      # and nothing picks up after it.
      #
      # With the default interval, this default will cause a re-election about every 6 hours.
      term: 6

    # Zookeeper session timeout (in seconds).
    timeout: 10


# The section below is for DiscoverySettings scheduling configuration.
#
# Discovery is the way the clusters that should be managed are found.
discovery:
  # Interval (in seconds) to wait between checks for pending DiscoverySettings to schedule.
  interval: 15

  # Number of cycles before this node will re-run an election, 0 to disable re-runs.
  #
  # Having the system re-run elections continuously ensures that failover procedures are
  # exercised constantly and not just in case of errors.
  # You do not want to discover that failover does not work when a primary fails
  # and nothing picks up after it.
  #
  # With the default interval, this default will cause a re-election about every 3 hours.
  term: 43200


# Configuration of the streaming platform events are emitted to.
events:
  # The backend to emit events to.
  # 
  # !!! DO NOT CHANGE AFTER INITIAL CONFIGURATION !!!
  # This option is to allow users to choose a supported software that best fits
  # their use and environment.
  #
  # To change this option, follow the instructions to rebuild your system:
  #   https://www.replicante.io/docs/manual/docs/upgrades-fresh/
  #
  # Available options are:
  #
  #   * 'kafka' (recommended)
  backend: kafka

  # Any backend-specific option is set here.
  # The available options vary from backend to backend and are documented below.
  #
  # Kafka options:
  options:
    # Acknowledgement level for published messages.
    #
    # Possible values are:
    #
    #   * 'all': all in-sync-replicas for the partition published to must ack messages
    #            (default; required for consistency; see kafka documentation for details).
    #   * 'leader_only': only the leader of the partition published to must ack messages.
    #   * 'none': no acknowledgements are required for produced messages.
    ack_level: 'all'

    # (required) Comma separated list of seed brokers.
    brokers: 'localhost:9092'

    # Client session keepalive heartbeat interval (in milliseconds).
    heartbeat: 3000

    # Kafka-specific timeout options.
    timeouts:
      # Timeout (in millisecond) for non-topic requests.
      metadata: 60000

      # Timeout (in milliseconds) for tasks to be acknowledged.
      request: 5000

      # Timeout (in milliseconds) after which workers are presumed dead by the brokers.
      session: 10000

      # Default timeout (in milliseconds) for network requests.
      socket: 60000

    # Prefix for the stream's topic name.
    topic_prefix: 'stream'


# The section below is for logging configuration.
logging:
  # Flush logs asynchronously.
  # 
  # Pro:
  #     Async log flushing is more efficient as processes
  #     are not blocked waiting for logging backends to complete.
  # 
  # Con:
  #     If the process crashes logs in the buffer may be lost.
  #
  # Recommendation:
  #     Keep async logging enabled unless replicante is crashing
  #     and the logs don't have any indications of why.
  #
  #     Async logging may also be disabled in testing, debugging,
  #     or developing environments.
  async: true

  # Logging backend configuration.
  backend:
    # The backend to send logs to.
    # This option also determines the format and destination of logs.
    #
    # Available options:
    #
    #   * 'json': prints JSON formatted logs to standard output.
    #   * 'journald': sends logs to systemd journal (if enabled at compile time).
    name: json

    # Any backend-specific option is set here.
    # The available options vary from backend to backend and are documented below.
    #
    # *** None available at this time ***
    #options:

  # Include replicante's version information in every log record.
  #
  # This feature can be helpful when many different versions are logging to
  # the same central location as well as while performing upgrades.
  #
  # For general reporting on which versions are in use the `replicore_info` metric.
  include_version: false

  # The minimum logging level.
  #
  # Available options:
  #
  #   * 'critical'
  #   * 'error'
  #   * 'warning'
  #   * 'info'
  #   * 'debug' (only available in debug builds)
  level: info

  # Advanced level configuration by module prefix.
  #
  # The keys in this map are used as prefix matches against log event modules.
  # If a match is found the mapped level is used for the event.
  # If no match is found the `level` value is used as the filter.
  #
  # Example:
  #
  #     modules:
  #       'hyper::server': debug
  #       'rdkafka': error
  #
  # To find out what modules are available you can set `level` to DEBUG
  # and enable `verbose` logging to see all logs.
  # Once you know what logs you are looking for you can undo the changes to `level` and `verbose`
  # and add the module prefix you need to the `modules` option.
  modules: {}

  # Enable verbose debug logs.
  #
  # When DEBUG level is enabled, things can get loud pretty easily.
  # To allow DEBUG level to be more useful, only application events are emitted at
  # DEBUG level while dependency events are emitted at INFO level.
  #
  # Verbose mode can be used in cases where DEBUG level should be enabled by default
  # on all events and not just the application logs.
  verbose: false


# The section below is for cluster orchestration scheduling configuration.
#
# Discovery is the way the clusters that should be managed are found.
orchestrator:
  # Interval (in seconds) to wait between checks for pending cluster to schedule.
  interval: 15

  # Number of cycles before this node will re-run an election, 0 to disable re-runs.
  #
  # Having the system re-run elections continuously ensures that failover procedures are
  # exercised constantly and not just in case of errors.
  # You do not want to discover that failover does not work when a primary fails
  # and nothing picks up after it.
  #
  # With the default interval, this default will cause a re-election about every 3 hours.
  term: 43200


# Optional sentry.io integration configuration (desabled by default).
#
# Set a DSN parameter to enable centralised error reporting.
#sentry:
#  # Sentry API response capture filter.
#  #
#  # When sentry is enabled, API error responses returned to clients can be recorded
#  # as sentry events to detect and debug issues.
#  #
#  # This option sets the severity level of responses that are reported:
#  #  * `no`: disable sentry capturing of API error responses.
#  #  * `client`: report client side errors and above (status code >= 400).
#  #  * `server`: only report server side errors (status code >= 500).
#  capture_api_errors: 'server'
#
#  # (required) The DSN to use to configure sentry.
#  dsn: 'https://key@server.domain:port/project'
sentry: ~


# The section below is for storage configuration.
storage:
  # The primary store where data needed by the core Replicante logic is stored.
  primary:
    # The database to persistent data in.
    #
    # !!! DO NOT CHANGE AFTER INITIAL CONFIGURATION !!!
    # This option is to allow users to choose a supported database that best fits
    # their use and environment.
    #
    # To change this option, follow the instructions to rebuild your system:
    #   https://www.replicante.io/docs/manual/docs/upgrades-fresh/
    #
    # Available options:
    #
    #   * 'mongodb' (recommended)
    backend: mongodb

    # Any backend-specific option is set here.
    # The available options vary from backend to backend and are documented below.
    #
    # MongoDB options:
    options:
      # Name of the MongoDB database to use for persistence.
      #
      # To change this option you will need to "Update by rebuild".
      # See the documentation for more details on this process.
      db: replicore  # (recommended)

      # URI of the MongoDB Replica Set or sharded cluster to connect to.
      uri: mongodb://localhost:27017/

  # The view store where data used to answer user queries is stored.
  view:
    # The database to persistent data in.
    #
    # !!! DO NOT CHANGE AFTER INITIAL CONFIGURATION !!!
    # This option is to allow users to choose a supported database that best fits
    # their use and environment.
    #
    # To change this option, follow the instructions to rebuild your system:
    #   https://www.replicante.io/docs/manual/docs/upgrades-fresh/
    #
    # Available options:
    #
    #   * 'mongodb' (recommended)
    backend: mongodb

    # Any backend-specific option is set here.
    # The available options vary from backend to backend and are documented below.
    #
    # MongoDB options:
    options:
      # Name of the MongoDB database to use for persistence.
      #
      # To change this option you will need to "Update by rebuild".
      # See the documentation for more details on this process.
      db: repliview  # (recommended)

      # URI of the MongoDB Replica Set or sharded cluster to connect to.
      uri: mongodb://localhost:27017/


# Task workers enabling configuration.
#
# For Replicante to function correctly ALL queues need to be worked on.
# By default, all queues are enabled on all nodes.
#
# While this allows the configuration to stay simple (all nodes in the cluster are identical),
# more advanced configuration options based around specialised node roles can be used to
# scale replicante deployments in a more efficient way (because roles can be scaled based on
# each role needs rather then scaling all roles to the needs of the biggest role).
#
# This is similar to the `components` configuration section but targeted at queues.
# Processes MUST have the `workers` component enabled on the process for tasks to be processed.
task_workers:
  # Default status for all task workers that are not explicitly enabled/disabled.
  _default: true

  # Enable handling of cluster state refresh and aggregation tasks.
  cluster_refresh: null

  # Enable handling of clusters discovery tasks.
  discover_clusters: null

  # Enable handling of cluster orchestration tasks.
  orchestrate_cluster: null


# The section below is for the tasks system configuration.
tasks:
  # The queue system to use for async tasks.
  #
  # !!! DO NOT CHANGE AFTER INITIAL CONFIGURATION !!!
  # This option is to allow users to choose a supported software that best fits
  # their use and environment.
  #
  # To change this option, follow the instructions to rebuild your system:
  #   https://www.replicante.io/docs/manual/docs/upgrades-fresh/
  #
  # Available options:
  #
  #   * 'kafka' (recommended)
  backend: 'kafka'

  # Any backend-specific option is set here.
  # The available options vary from backend to backend and are documented below.
  #
  # Kafka options:
  options:
    # Acknowledgement level for published messages.
    #
    # Possible values are:
    #
    #   * 'all': all in-sync-replicas for the partition published to must ack messages
    #            (default; required for consistency; see kafka documentation for details).
    #   * 'leader_only': only the leader of the partition published to must ack messages.
    #   * 'none': no acknowledgements are required for produced messages.
    ack_level: 'all'

    # Comma separated list of seed brokers.
    brokers: 'localhost:9092'

    # Number of attempts to commit offsets before giving up and recreating the client.
    commit_retries: 10

    # Worker session keepalive heartbeat interval (in milliseconds).
    heartbeat: 3000

    # Prefix to be placed in front of queue names to derive topic names.
    queue_prefix: 'task'

    # Kafka-specific timeout options.
    timeouts:
      # Timeout (in millisecond) for non-topic requests.
      metadata: 60000

      # Timeout (in milliseconds) for tasks to be acknowledged.
      request: 5000

      # Timeout (in milliseconds) after which workers are presumed dead by the brokers.
      session: 10000

      # Default timeout (in milliseconds) for network requests.
      socket: 60000

  # Number of task processing threads to spawn.
  #threads_count: number of CPUs


# Timeouts configured here are used throughout the system for various reasons.
timeouts:
  # Time (in seconds) after which API requests to agents are failed.
  agents_api: 15

      
# The section below is for distributed tracing configuration.
tracing:
  # The distributed tracing backend to integrate with.
  #
  # Available options:
  #
  #   * 'noop'
  #   * 'zipkin'
  backend: noop

  # Any backend-specific option is set here.
  # The available options vary from tracer to tracer and are documented below.
  #
  # Zipkin options
  #options:
  #  # (required) The transport to send tracing information to zipkin.
  #  #
  #  # Available options:
  #  #
  #  #  * 'http'
  #  #  * 'kafka'
  #  transport: 'http'
  #
  #  # Any transport-specific option is set here.
  #  # The available options vary and are documented below.
  #  #
  #  # HTTP transport options
  #  options:
  #    # Number of buffered spans that should trigger a flush.
  #    #
  #    # This option is a best-effort configuration and the size of the buffer may grow
  #    # slightly above this threshold.
  #    flush_count: 100
  #
  #    # Muximum delay between span flushes in milliseconds.
  #    #
  #    # This option is a best-effort configuration and the size of the buffer may grow
  #    # slightly above this threshold.
  #    flush_timeout_millis: 2000
  #
  #    # Custom headers to attach to POST requests.
  #    headers: {}
  #
  #    # (required) Target URL to POST spans to.
  #    url: 'https://zipkin.corp/'
  #
  #  # Kafka transport options
  #  options:
  #    # (required) List of kafka seed hostnames.
  #    kafka:
  #      - HOST1:9092
  #      - HOST2:9092
  #
  #    # The kafka topic to publish spans to.
  #    topic: zipkin