Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 255 lines (222 sloc) 11.272 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
### Default (baseline) configuration parameters.
### DO NOT ever change this config, use -c commandline option instead!


collectors:
  # Modules that collect the actual datapoints to be sent

  _default: # used as a base for all other sections here
    enabled: true
    # debug: # auto-filled from global "debug" section, if not specified

  ping:
    # Reports average (ewma) rtt of icmp ping to each specified host and packet loss (if any).
    interval: 5 # seconds between sending-out pings
    ewma_factor: 0.3 # ewma factor for rtt values
    resolve:
      no_reply: 30 # re-resolve hostnames after 30 seconds w/o reply
      time: 600 # re-resolve hostnames after fixed 600s intervals
      # "max_retries" restarts ping subprocess (e.g. to apply changes to
      # /etc/hosts or other libc resolver configuration) after N name resolution failures.
      # Also, if resolver fails even after restart (i.e. on start), disable warnings
      # (but issuing a message on next success) after that number of retries.
      max_retries: 5
    hosts: # explicitly split into ipv4/ipv6 to control how hostnames are resolved
      ipv4:
        # google_com: google.com
        # google_dns: 8.8.8.8
      ipv6:
        # ipv6_google_com: ipv6.google.com
        # ipv6_tunnelbroker_net: ipv6.tunnelbroker.net

  cron_log:
    # Reports start/stop, run time and errors for cron jobs from a logfile.
    # I use simple wrappers for cron-jobs to produce these logs (among other things):
    # https://github.com/mk-fg/fgtk#task https://github.com/mk-fg/fgtk/tree/master/task
    source: # must be filled with path to a log file
    aliases: # either [alias, regexp] or ["_" + regexp_group, regexp], see "_script" example below
      # - ['logrotate', '(^|\b)logrotate\b']
      # - ['locate', '(^|\b)updatedb\b']
      # - ['_script', '/etc/cron\.\w+/*(?P<script>\S+)(\s+|$)']
    lines: # only named regexp groups here are mandatory, all lines are optional
      init: 'task\[(\d+|-)\]:\s+Queued\b[^:]*: (?P<job>.*)$'
      start: 'task\[(\d+|-)\]:\s+Started\b[^:]*: (?P<job>.*)$'
      finish: 'task\[(\d+|-)\]:\s+Finished\b[^:]*: (?P<job>.*)$'
      duration: 'task\[(\d+|-)\]:\s+Finished \([^):]*\bduration=(?P<val>\d+)[,)][^:]*: (?P<job>.*)$'
      error: 'task\[(\d+|-)\]:\s+Finished \([^):]*\bstatus=0*[^0]+0*[,)][^:]*: (?P<job>.*)$'
    xattr_name: user.collectd.logtail.pos # used to mark "last position" in sa logs

  slabinfo:
    # Reports RAM usage by kernel, allocated via slab subsystem.
    include_prefixes: # takes priority over exclude_prefixes
    exclude_prefixes: ['kmalloc-', 'kmem_cache', 'dma-kmalloc-']
    pass_zeroes: False # to skip creating a lot metrics for always-0 (for particular hosts) slab counts

  cgacct:
    # Accounting of cpu/mem/io for systemd-created per-service cgroups.
    cg_root: /sys/fs/cgroup
    resource_controllers: ['cpuacct', 'memory', 'blkio'] # mapped to methods in cgacct.py

  sysstat:
    # Processing of sysstat logs - cpu, io, network, temperatures, etc.
    # See collectors/sysstat.py for full list of parameters.
    force_interval: true # skip intervals of different length than core.interval
    force_interval_fuzz: 10 # +/- % to consider acceptable interval fuzz
    sa_path: /var/log/sa
    rate: # see "graphite_metrics.collectors.rate_limit"
      limiting_enabled: true
      max_interval: 30 # cycles
      sampling: 3
    skip:
      redundant: true # skip metrics, redundant with other default collectors
      sections: # optional list of sections in "sadf -j -- -A" output to skip, example: ['disk', 'cpu-load-all']
      older_than_days: 4 # do not check sysstat logs older than this number of days on each run
    xattr_name: user.sa_carbon.pos # used to mark "last position" in sa logs

    # Max timestan to dump with "sadf -j" in seconds.
    # Use if resulting json is too huge for processing in one go (e.g. ram-wise).
    max_dump_span: # example: 7200

  iptables_counts:
    # Packet/byte counters from iptables/ip6tables.
    # In my case, these bindings are generated from higher-level configuration
    # by trilobite script (https://github.com/mk-fg/trilobite).
    rule_metrics_path:
      # Paths to files with "table_name chain_name rule_no metric_name"
      # lines for iptables/ip6tables.
      # Example line in such files: "filter FORWARD 30 network.services.tor.out"
      ipv4: # example: /var/lib/iptables/metrics.list
      ipv6: # example: /var/lib/ip6tables/metrics.list
    # One of: pkt, bytes, both (metric.pkt + metric.bytes), both_flat (metric_pkt + metric_bytes)
    units: both_flat
    # Consider counter invalid (and skip it) if rule has changed without rule_metrics file update
    discard_changed_rules: true

  irq:
    # Interrupt counters (/proc/interrupts, /proc/softirqs) processing.
    # No configuration.
  memstats:
    # System memory usage statistics (/proc/vmstat, /proc/meminfo).
    # No configuration.
  memfrag:
    # Memory fragmentation statistics (/proc/buddyinfo, /proc/pagetypeinfo).
    # No configuration.
  stats:
    # General system statistics (/proc/stats) - irq.total.{hard,soft}, processes.forks, etc.
    # No configuration.

  cjdns_peer_stats:
    # Traffic/state stats for cjdns daemon - https://github.com/cjdelisle/cjdns/
    # Collects these via InterfaceController_peerStats() admin interface call.
    # Requires bencode python module to be installed.
    # Doesn't need/use threaded cjdnsadmin module that comes with it.
    enabled: false # more rare than other stats
    # How to get peer metric name.
    # Can be either "pubkey", "ipv6" or whatever key (e.g. "user")
    # that cjdns returns or a list of these, to use first one available (e.g. ["user", "ipv6"]).
    # Note that "pubkey" and "ipv6" keys are synthetic and always available.
    peer_id: ipv6
    # Path to standard cjdcmd/cjdmaid/cjdnsadmin configuration file,
    # which should contain address, port and password keys.
    # See https://github.com/cjdelisle/cjdns/blob/master/contrib/python/cjdnsadminmaker.py
    cjdnsadmin_conf: ~/.cjdnsadmin
    # Prefix under which create "<peer_id>.{bytes_in,bytes_out}" counters
    prefix: network.services.cjdns.peers
    filter:
      # Log stats only for peers with following connection properties.
      direction: any # one of "any", "incoming", "outgoing"
      established_only: true # don't send byte counters of configured but disconnected peers
    # Some extra metrics to pass along with byte counters.
    # Each one can be set to null or false to skip sending it.
    special_metrics:
      # Add specified key for each peer, set to 0 or 1, depending on connection state.
      peer_link: link
      # Total number of configured peers.
      count: network.services.cjdns.peer_state.total
      # Prefix for counts of peers by state (e.g. "established", "unresponsive", etc).
      count_state: network.services.cjdns.peer_state
    timeout: 2 # how long to wait for cjdns responses

  # self_profiling: # TODO
  # main_loop: true
  # collectors: true


processors:
  # Modules that process the datapoints before they are passed to sinks
  # Datapoints are passed to processors in the same order as they're specified here,
  # with all the entry points without config section afterwards in no particular order
  # Passed a list of sinks along with the datapoints,
  # so can facilitate filtering, by dropping particular sinks from the list

  _default: # used as a base for all other sections here
    enabled: true
    # debug: # auto-filled from global "debug" section, if not specified

  hostname_prefix:
    hostname: # uname(), if unset


sinks:
  _default: # used as a base for all other sections here
    # Default host/port for sinks can be overidden by CLI flags
    host: localhost # can be specified as "host[:port]"
    default_port: 2003

    enabled: false # should be explicitly enabled
    # debug: # auto-filled from global "debug" section, if not specified

  carbon_socket:
    enabled: true # the only sink enabled by default
    max_reconnects: # before bailing out with the error
    reconnect_delay: 5 # seconds

  librato_metrics: # see http://dev.librato.com/v1/post/metrics

    http_parameters:
      # See http://docs.python-requests.org/en/latest/api/#main-interface for a complete list
      url: https://metrics-api.librato.com/v1/metrics
      auth: ['example@librato.com:', '75AFDB82'] # override with the actual values, no url-encoding needed
      timeout: # defaults to half of the loop.interval or 30, if former is inaccessible
      # Might be useful in some setups:
      # proxies:
      # cert:
      # verify: false

    # Derive "source" field from first component of metric name
    # See also "hostname_prefix" processor
    source_from_prefix: true
    # Explicit source specification, overrides "source_from_prefix", if set
    # If neither "source" or "source_from_prefix" are set, it won't be sent at all
    source:
    # Discard "measure_time" field of individual metrics,
    # sending just one value (when data reached the sink)
    # Saves quite a bit of traffic (roughly 1/3),
    # but MUST NOT be used with historical data collectors, like sysstat
    unified_measure_time: false
    # Split measurement submissions into concurrent requests, as suggested by docs
    # Goal is to minimize overall submission time given the current api limitations
    # Uses async api in requests module, which requires gevent (gevent.org),
    # will be disabled with a warning (or fail, if enabled explicitly), if unavailable
    chunk_data:
      # Can be explicitly disabled (enabled: false) to remove gevent-related
      # warnings on init, or enabled (=true) to fail if async api is unavailable
      enabled:
      max_chunk_size: 500
      max_concurrent_requests: 10 # 0 or false to remove this limit

  # dump: # just logs all the datapoints with level=INFO for testing purposes
  # enabled: true


loop:
  name: basic # entry point name to use, only one loop can be used
  interval: 60 # seconds


core:
  # Emulate filesystem extended attributes (used in some collectors
  # like sysstat or cron_log), storing per-path data in a simple shelve db.
  # Done by faking "xattr" module. Attached data will be lost on path changes.
  # Specify a path to db file (will be created) to use it.
  xattr_emulation:

debug: # values here can be overidden by special CLI flags
  dry_run: false


logging: # see http://docs.python.org/library/logging.config.html
  # "custom" level means WARNING or DEBUG, depending on CLI options
  warnings: true # capture python warnings
  tracebacks: true # much easier to debug with these, but noisy and multiline
  version: 1
  formatters:
    basic:
      format: '%(asctime)s :: %(levelname)s :: %(name)s: %(message)s'
      datefmt: '%Y-%m-%d %H:%M:%S'
  handlers:
    console:
      class: logging.StreamHandler
      stream: ext://sys.stdout
      formatter: basic
      level: custom
    # file:
    # class: logging.handlers.WatchedFileHandler
    # filename: /var/log/harvestd.log
    # formatter: basic
    # encoding: utf-8
    # level: DEBUG
  # loggers:
  # graphite_metrics.collectors.irq:
  # level: ERROR
  root:
    handlers: [console]
    level: custom
Something went wrong with that request. Please try again.