Permalink
Browse files

Drops sigar.

We no rely on existing monitoring tools readily
available for containerized environments.
  • Loading branch information...
andyHa committed Jul 16, 2018
1 parent f9f32a3 commit c46bd7145c9d1e2937ab42d9558fc01ab9e569b7
View
@@ -42,13 +42,6 @@
<version>3.0.1</version>
</dependency>
<!-- Include sigar bindings for system monitoring -->
<dependency>
<groupId>org.fusesource</groupId>
<artifactId>sigar</artifactId>
<version>1.6.4</version>
</dependency>
<!-- Required logging bridge to make slf4j log to log4j -->
<dependency>
<groupId>org.slf4j</groupId>
@@ -8,27 +8,20 @@
package sirius.kernel.health.metrics;
import org.hyperic.sigar.CpuPerc;
import org.hyperic.sigar.FileSystem;
import org.hyperic.sigar.FileSystemUsage;
import org.hyperic.sigar.Mem;
import org.hyperic.sigar.NetInterfaceStat;
import org.hyperic.sigar.ProcCpu;
import org.hyperic.sigar.Sigar;
import org.hyperic.sigar.SigarException;
import sirius.kernel.Sirius;
import sirius.kernel.async.CallContext;
import sirius.kernel.commons.Monoflop;
import sirius.kernel.di.std.ConfigValue;
import sirius.kernel.di.std.Part;
import sirius.kernel.di.std.Register;
import sirius.kernel.health.Exceptions;
import sirius.kernel.health.Log;
import sirius.kernel.health.MemoryBasedHealthMonitor;
import java.io.IOException;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryPoolMXBean;
import java.nio.file.FileStore;
import java.nio.file.FileSystems;
import java.util.List;
/**
@@ -39,22 +32,15 @@
private List<GarbageCollectorMXBean> gcs = ManagementFactory.getGarbageCollectorMXBeans();
private List<MemoryPoolMXBean> pools = ManagementFactory.getMemoryPoolMXBeans();
private Sigar sigar = new Sigar();
private volatile boolean sigarEnabled = true;
private Monoflop openFilesChecked = Monoflop.create();
private static final Log LOG = Log.get("sigar");
@Part
private MemoryBasedHealthMonitor monitor;
@ConfigValue("health.minimalOpenFilesLimit")
private long minimalOpenFilesLimit;
@Override
public void gather(MetricsCollector collector) {
gatherMemoryMetrics(collector);
gatherGCMetrics(collector);
gatherOSMetrics(collector);
gatherFS(collector);
collector.differentialMetric("sys-interactions",
"sys-interactions",
@@ -75,48 +61,6 @@ public void gather(MetricsCollector collector) {
"MB");
}
private void gatherOSMetrics(MetricsCollector collector) {
try {
if (sigarEnabled) {
gatherCPUandMem(collector);
gatherNetworkStats(collector);
gatherFS(collector);
checkMaxNumberOfOpenFiles();
}
} catch (SigarException e) {
Exceptions.handle(LOG, e);
} catch (UnsatisfiedLinkError e) {
Exceptions.ignore(e);
sigarEnabled = false;
}
}
/*
* We check the max number of open files for the underlying system as this is commonly too
* low on many linux machines and causes ugly errors.
*
* As we rely on sigar anyway and don't want to risk an link error / exception on startup, we check
* this value on the first run of the metrics collector
*/
private void checkMaxNumberOfOpenFiles() throws SigarException {
if (openFilesChecked.firstCall()) {
long maxOpenFiles = sigar.getResourceLimit().getOpenFilesMax();
if (maxOpenFiles > 0 && minimalOpenFilesLimit > 0 && maxOpenFiles < minimalOpenFilesLimit) {
Exceptions.handle()
.withSystemErrorMessage(
"The ulimit -f (number of open files) is too low: %d - It should be at least: %d",
maxOpenFiles,
minimalOpenFilesLimit)
.to(LOG)
.handle();
} else {
LOG.INFO("The maximal number of open files on this system is good (%d, Required are at least: %d)",
maxOpenFiles,
minimalOpenFilesLimit);
}
}
}
private void gatherGCMetrics(MetricsCollector collector) {
for (GarbageCollectorMXBean gc : gcs) {
collector.differentialMetric("jvm-gc-" + gc.getName(),
@@ -138,38 +82,13 @@ private void gatherMemoryMetrics(MetricsCollector collector) {
}
}
private void gatherNetworkStats(MetricsCollector collector) throws SigarException {
long rxSum = 0;
long txSum = 0;
for (String eth : sigar.getNetInterfaceList()) {
NetInterfaceStat stat = sigar.getNetInterfaceStat(eth);
rxSum += stat.getRxBytes();
txSum += stat.getTxBytes();
}
collector.differentialMetric("sys-eth-tx", "sys-eth-tx", "Network Bytes-Out", txSum / 1024d / 60, "KB/s");
collector.differentialMetric("sys-eth-rx", "sys-eth-rx", "Network Bytes-In", rxSum / 1024d / 60, "KB/s");
}
private void gatherCPUandMem(MetricsCollector collector) throws SigarException {
CpuPerc cpu = sigar.getCpuPerc();
collector.metric("sys-cpu", "System CPU Usage", cpu.getCombined() * 100d, "%");
Mem mem = sigar.getMem();
mem.gather(sigar);
collector.metric("sys-mem", "System Memory Usage", mem.getUsedPercent(), "%");
ProcCpu proc = sigar.getProcCpu(sigar.getPid());
collector.metric("jvm-cpu", "JVM CPU Usage", proc.getPercent(), "%");
Runtime rt = Runtime.getRuntime();
collector.metric("jvm-heap",
"JVM Heap Usage",
(double) (rt.totalMemory() - rt.freeMemory()) / rt.maxMemory() * 100d,
"%");
}
private void gatherFS(MetricsCollector collector) throws SigarException {
for (FileSystem fs : sigar.getFileSystemList()) {
if (fs.getType() == FileSystem.TYPE_LOCAL_DISK) {
FileSystemUsage fsu = sigar.getMountedFileSystemUsage(fs.getDirName());
collector.metric("sys-fs", "FS: Usage of " + fs.getDirName(), fsu.getUsePercent() * 100d, "%");
private void gatherFS(MetricsCollector collector) {
for (FileStore store : FileSystems.getDefault().getFileStores()) {
try {
double usage = 100d - (100d * store.getUsableSpace() / store.getTotalSpace());
collector.metric("sys-fs", "FS: Usage of " + store.name(), usage, "%");
} catch (IOException e) {
Exceptions.ignore(e);
}
}
}
@@ -161,8 +161,6 @@ async {
# Configures the system health monitoring
health {
minimalOpenFilesLimit = 64K
# Determines settings for the MemoryBasedHealthMonitor, which provides logs and error reporting
# if no other LogTap or ExceptionHandler is installed. This is also used to report how many
# logs / min and exceptions / min occur.
@@ -198,16 +196,6 @@ health {
sys-unique-incidents.warning = 10
sys-unique-incidents.error = 25
# Overall CPU utilization in %
sys-cpu.gray = 15
sys-cpu.warning = 95
sys-cpu.error = 0
# Ram utilization in %
# We ignore most of the values as memory is always used as buffer etc. So one has to read this
# metric with a grain of salt
sys-mem.gray = 90
# Size of all log files in mb
sys-log-size.gray = 32
sys-log-size.warning = 128
@@ -218,16 +206,6 @@ health {
sys-fs.warning = 80
sys-fs.error = 95
# Outgoing network bandwidth
sys-eth-tx.gray = 500
sys-eth-tx.yellow = 0
sys-eth-tx.red = 0
# Incoming network bandwidth
sys-eth-rx.gray = 500
sys-eth-rx.yellow = 0
sys-eth-rx.red = 0
# CPU utilization caused by the JVM in %
jvm-cpu.gray = 10
jvm-cpu.warning = 80

0 comments on commit c46bd71

Please sign in to comment.