Skip to content

Commit

Permalink
[enhancement]: add VM measurement with state and CPU/Memory capacity …
Browse files Browse the repository at this point in the history
…information
  • Loading branch information
Tesifonte Belda committed Aug 26, 2022
1 parent bab4f47 commit d68b61e
Show file tree
Hide file tree
Showing 7 changed files with 276 additions and 28 deletions.
22 changes: 21 additions & 1 deletion METRICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
- connection_state_code (int) 0-connected, 1-notresponding, 2-disconnected
- memory_size (int) in bytes
- num_cpus (int)
- cpu_freq (int)
- cpu_freq (int) in MHz
- vcstat_host_esxcli
- tags:
- esxhostname
Expand Down Expand Up @@ -134,6 +134,26 @@
- status (string)
- status_code (int) 0-green, 1-gray, 2-yellow, 3-red
- num_ports (int)
- vcstat_vm
- tags:
- esxhostname
- moid
- vcenter
- dcname
- clustername
- vmname
- fields:
- status (string)
- status_code (int) 0-green, 1-gray, 2-yellow, 3-red
- consolidation_needed (bool)
- memory_size (int) in bytes
- memory_overhead (int) in bytes
- num_eth_cards (int)
- num_vdisks (int)
- num_vcpus (int)
- power_state (string)
- power_state_code (int) 0-on, 1-suspended, 2-off, 3-other
- template (bool)
- internal_vcstat
- tags:
- vcenter
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,9 @@ vcstat_host_nic,dcname=MyDC,clustername=MyCluster-01,device=vmnic0,driver=ntg3,e
vcstat_host_esxcli,dcname=MyDC,clustername=MyCluster-01,esxhostname=myesxi01.local,moid=host-706,vcenter=vcenter.local responding_code=0i,response_time_ns=109185876i 1653060681000000000
vcstat_net_dvs,dcname=MyDC,dvs=DSwitch-E1,moid=dvs-e1,vcenter=vcenter.local num_standalone_ports=0i,status="green",status_code=0i,num_ports=421i,max_ports=2147483647i 1653060682000000000
vcstat_net_dvp,dcname=MyDC,dvp=DSwitch-E1-DVUplinks-e1,moid=dvportgroup-e1,uplink=true,vcenter=vcenter.local status="green",status_code=0i,num_ports=16i 1653060682000000000
vcstat_datastore,dcname=MyDC,dsname=DS_Departement1,moid=datastore-725,type=VMFS,vcenter=vcenter.local accessible=true,capacity=2198754820096i,freespace=730054262784i,uncommitted=20511i,maintenance_mode="normal" 1653060682000000000
internal_vcstat,vcenter=vcenter.local sessions_created=1i,gather_time_ns=1764839000i,notresponding_esxcli_hosts=0i 1653060682000000000
vcstat_datastore,dcname=MyDC,dsname=DS_Departement1,moid=datastore-725,type=VMFS,vcenter=vcenter.local accessible=true,capacity=2198754820096i,freespace=730054262784i,uncommitted=20511i,maintenance_mode="normal"
vcstat_vm,clustername=MyCluster-01,dcname=MyDC,esxhostname=myesxi01.local,moid=vm-4524,vcenter.local,vmname=vmserver01 consolidation_needed=false,memory_overhead=0i,num_eth_cards=2i,status="green",memory_size=25769803776i,num_vdisks=8i,num_vcpus=4i,power_state="poweredOn",power_state_code=0i,status_code=0i,template=false 1653060683000000000
internal_vcstat,vcenter=vcenter.local sessions_created=1i,gather_time_ns=1764839000i,notresponding_esxcli_hosts=0i 1653060683000000000
```

# Metrics
Expand Down
14 changes: 8 additions & 6 deletions etc/vcstat.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,21 @@
# insecure_skip_verify = false

#### you may enable or disable data collection per instance type ####
## collect cluster measurements (vcstat_cluster)
## collect cluster measurement (vcstat_cluster)
# cluster_instances = true
## collect datastore measurement (vcstat_datastore)
# datastore_instances = false
## collect host status measurements (vcstat_host)
## collect host status measurement (vcstat_host)
# host_instances = true
## collect host firewall measurement (vcstat_host_firewall)
# host_firewall_instances = false
## collect host bus adapter measurements (vcstat_host_hba)
## collect host bus adapter measurement (vcstat_host_hba)
# host_hba_instances = false
## collect host network interface measurements (vcstat_host_nic)
## collect host network interface measurement (vcstat_host_nic)
# host_nic_instances = false
## collect network distributed virtual switch measurements (vcstat_net_dvs)
## collect network distributed virtual switch measurement (vcstat_net_dvs)
# net_dvs_instances = true
## collect network distributed virtual portgroup measurements (vcstat_net_dvp)
## collect network distributed virtual portgroup measurement (vcstat_net_dvp)
# net_dvp_instances = false
## collect virtual machine measurement (vcstat_vm)
# vm_instances = false
35 changes: 35 additions & 0 deletions pkg/vccollector/vccollector_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ type VcCache struct {
lastCHUpdate time.Time //nolint
lastDsUpdate time.Time //nolint
lastNtUpdate time.Time //nolint
lastVmUpdate time.Time //nolint
dcs []*object.Datacenter //nolint
clusters [][]*object.ClusterComputeResource //nolint
dss [][]*object.Datastore //nolint
hosts [][]*object.HostSystem //nolint
hostStates [][]hostState //nolint
nets [][]object.NetworkReference //nolint
vms [][]*object.VirtualMachine //nolint
}

func (c *VcCollector) getDatacenters(ctx context.Context) error {
Expand Down Expand Up @@ -179,6 +181,39 @@ func (c *VcCollector) getAllDatacentersDatastores(ctx context.Context) error {
return nil
}

func (c *VcCollector) getAllDatacentersVMs(ctx context.Context) error {
if time.Since(c.lastVmUpdate) < c.dataDuration {
return nil
}
err := c.getAllDatacentersClustersAndHosts(ctx)
if err != nil {
return err
}

numdcs := len(c.dcs)
if numdcs != len(c.vms) {
if numdcs > 0 {
c.vms = make([][]*object.VirtualMachine, numdcs)
} else {
c.vms = nil
}
}

for i, dc := range c.dcs {
finder := find.NewFinder(c.client.Client, false)
finder.SetDatacenter(dc)

if c.vms[i], err = finder.VirtualMachineList(ctx, StrAsterisk); err != nil {
if !strings.Contains(err.Error(), StrErrorNotFoud) {
return fmt.Errorf("Could not get virtual machine list %w", err)
}
}
}
c.lastVmUpdate = time.Now()

return nil
}

func (c *VcCollector) IsHostConnected(dc *object.Datacenter, host *object.HostSystem) bool {
for i, searcdc := range c.dcs {
if searcdc == dc {
Expand Down
28 changes: 21 additions & 7 deletions pkg/vccollector/vccollector_host.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func (c *VcCollector) CollectHostInfo(
hstags := getHostTags(
c.client.Client.URL().Host,
dc.Name(),
c.getClusterFromHost(i, host),
c.getClusternameFromHost(i, host),
host.Name(),
host.Reference().Value,
)
Expand Down Expand Up @@ -164,7 +164,7 @@ func (c *VcCollector) CollectHostHBA(
hbatags := getHbaTags(
c.client.Client.URL().Host,
dc.Name(),
c.getClusterFromHost(i, host),
c.getClusternameFromHost(i, host),
host.Name(),
rv["HBAName"][0],
rv["Driver"][0],
Expand Down Expand Up @@ -244,7 +244,7 @@ func (c *VcCollector) CollectHostNIC(
nictags := getNicTags(
c.client.Client.URL().Host,
dc.Name(),
c.getClusterFromHost(i, host),
c.getClusternameFromHost(i, host),
host.Name(),
rv["Name"][0],
rv["Driver"][0],
Expand Down Expand Up @@ -326,7 +326,7 @@ func (c *VcCollector) CollectHostFw(
fwtags := getFirewallTags(
c.client.Client.URL().Host,
dc.Name(),
c.getClusterFromHost(i, host),
c.getClusternameFromHost(i, host),
host.Name(),
)
enabled, err := strconv.ParseBool(res.Values[0]["Enabled"][0])
Expand Down Expand Up @@ -388,7 +388,7 @@ func (c *VcCollector) ReportHostEsxcliResponse(
hstags := getHostTags(
c.client.Client.URL().Host,
dc.Name(),
c.getClusterFromHost(i, host),
c.getClusternameFromHost(i, host),
host.Name(),
host.Reference().Value,
)
Expand All @@ -412,7 +412,7 @@ func (c *VcCollector) ReportHostEsxcliResponse(
return nil
}

func (c *VcCollector) getClusterFromHost(dcindex int, host *object.HostSystem) string {
func (c *VcCollector) getClusternameFromHost(dcindex int, host *object.HostSystem) string {
for _, cluster := range c.clusters[dcindex] {
if strings.HasPrefix(host.InventoryPath, cluster.InventoryPath+"/") {
return cluster.Name()
Expand All @@ -422,7 +422,21 @@ func (c *VcCollector) getClusterFromHost(dcindex int, host *object.HostSystem) s
return ""
}

// hostConnectionStateCode converts types.HostSystemConnectionState to int16 for easy alerting from telegraf metrics
func (c *VcCollector) getHostObjectFromReference(
dcindex int,
r *types.ManagedObjectReference,
) *object.HostSystem {
for _, host := range c.hosts[dcindex] {
if host.Reference().Type == r.Type && host.Reference().Value == r.Value {
return host
}
}

return nil
}

// hostConnectionStateCode converts types.HostSystemConnectionState to int16 for easy
// alerting from telegraf metrics
func hostConnectionStateCode(state types.HostSystemConnectionState) int16 {
switch state {
case types.HostSystemConnectionStateConnected:
Expand Down
150 changes: 150 additions & 0 deletions pkg/vccollector/vccollector_vm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// This file contains vccollector methods to gathers stats at vm level
//
// Author: Tesifonte Belda
// License: The MIT License (MIT)

package vccollector

import (
"context"
"fmt"
"time"

"github.com/influxdata/telegraf"

"github.com/vmware/govmomi/object"
"github.com/vmware/govmomi/vim25/mo"
)

// CollectVmsInfo gathers basic virtual machine info
func (c *VcCollector) CollectVmsInfo(
ctx context.Context,
acc telegraf.Accumulator,
) error {
var (
vmMo mo.VirtualMachine
err error
vmCode int16
host *object.HostSystem
hostname, clustername string
)

if c.client == nil {
return fmt.Errorf(string(Error_NoClient))
}

if err := c.getAllDatacentersVMs(ctx); err != nil {
return fmt.Errorf("Could not get virtual machine entity list: %w", err)
}

for i, dc := range c.dcs {
for _, vm := range c.vms[i] {
err = vm.Properties(ctx, vm.Reference(), []string{"summary"}, &vmMo)
if err != nil {
if err, exit := govQueryError(err); exit {
return fmt.Errorf(
"Could not get vm %s summary property: %w",
vm.Name(),
err,
)
}
acc.AddError(
fmt.Errorf(
"Could not get vm %s summary property: %w",
vm.Name(),
err,
),
)
continue
}
s := vmMo.Summary
r := s.Runtime
t := s.Config
vmCode = entityStatusCode(s.OverallStatus)
hostname = ""
clustername = ""
if host = c.getHostObjectFromReference(i, r.Host); host != nil {
hostname = host.Name()
clustername = c.getClusternameFromHost(i, host)
}

vmtags := getVmTags(
c.client.Client.URL().Host,
dc.Name(),
clustername,
hostname,
vm.Reference().Value,
t.Name,
s.Guest.HostName,
)
vmfields := getVmFields(
string(s.OverallStatus),
vmCode,
string(r.PowerState),
vmPowerStateCode(string(r.PowerState)),
r.MemoryOverhead,
int64(s.Config.MemorySizeMB)*(1024*1024),
s.Config.NumCpu,
t.NumEthernetCards,
t.NumVirtualDisks,
t.Template,
*(r.ConsolidationNeeded),
)
acc.AddFields("vcstat_vm", vmfields, vmtags, time.Now())
}
}

return nil
}

func getVmTags(
vcenter, dcname, cluster, hostname, moid, vmname, guesthostname string,
) map[string]string {
return map[string]string{
"clustername": cluster,
"dcname": dcname,
"esxhostname": hostname,
"guesthostname": guesthostname,
"moid": moid,
"vcenter": vcenter,
"vmname": vmname,
}
}

func getVmFields(
overallstatus string,
vmstatuscode int16,
powerstate string,
powerstatecode int16,
memoryoverhead, memorysize int64,
numcpu, numeth, numvdisk int32,
template, consolidationneeded bool,
) map[string]interface{} {
return map[string]interface{}{
"consolidation_needed": consolidationneeded,
"memory_size": memorysize,
"memory_overhead": memoryoverhead,
"num_eth_cards": numeth,
"num_vdisks": numvdisk,
"num_vcpus": numcpu,
"power_state": powerstate,
"power_state_code": powerstatecode,
"status": overallstatus,
"status_code": vmstatuscode,
"template": template,
}
}

// vmPowerStateCode converts VM PowerStateCode to int16 for easy alerting
func vmPowerStateCode(state string) int16 {
switch state {
case "poweredOn":
return 0
case "suspended":
return 1
case "poweredOff":
return 2
default:
return 3
}
}
Loading

0 comments on commit d68b61e

Please sign in to comment.