Skip to content

Commit

Permalink
[feature] New optional metrics for host services
Browse files Browse the repository at this point in the history
  • Loading branch information
Tesifonte Belda committed Jan 6, 2023
1 parent 32037a2 commit a81cc66
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 4 deletions.
12 changes: 12 additions & 0 deletions METRICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@
- duplex (string)
- speed (int)
- mac (string)
- vcstat_host_service
- tags:
- key
- esxhostname
- vcenter
- dcname
- clustername
- fields:
- label (string)
- policy (string)
- required (boolean)
- running (boolean)
- vcstat_net_dvs
- tags:
- dvs
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ Use telegraf v1.14 or above so that execd input is available.
# host_hba_instances = false
## collect host network interface measurement (vcstat_host_nic)
# host_nic_instances = false
## collect host services measurement (vcstat_host_service)
# host_service_instances = false
## collect network distributed virtual switch measurement (vcstat_net_dvs)
# net_dvs_instances = true
## collect network distributed virtual portgroup measurement (vcstat_net_dvp)
Expand Down Expand Up @@ -105,6 +107,8 @@ vcstat_host_firewall,dcname=MyDC,clustername=MyCluster-01,esxhostname=myesxi01.l
vcstat_host_hba,dcname=MyDC,clustername=MyCluster-01,device=vmhba0,driver=lpfc,esxhostname=myesxi01.local,vcenter=vcenter.local status="link-n/a",status_code=1i 1653060681000000000
vcstat_host_nic,dcname=MyDC,clustername=MyCluster-01,device=vmnic0,driver=ntg3,esxhostname=myesxi01.local,vcenter=vcenter.local link_status="Down",link_status_code=2i 1653060681000000000
vcstat_host_esxcli,dcname=MyDC,clustername=MyCluster-01,esxhostname=myesxi01.local,moid=host-706,vcenter=vcenter.local responding_code=0i,response_time_ns=109185876i 1653060681000000000
vcstat_host_service,dcname=MyDC,clustername=MyCluster-01,esxhostname=myesxi01.local,key=ntpd,vcenter=vcenter.local label="NTP Daemon",policy="on",required=false,running=true 1653060681000000000
vcstat_host_service,dcname=MyDC,clustername=MyCluster-01,esxhostname=myesxi01.local,key=vpxa,vcenter=vcenter.local label="VMware vCenter Agent",policy="on",required=false,running=true 1653060681000000000
vcstat_net_dvs,dcname=MyDC,dvs=DSwitch-E1,moid=dvs-e1,vcenter=vcenter.local num_standalone_ports=0i,status="green",status_code=0i,num_ports=421i,max_ports=2147483647i 1653060682000000000
vcstat_net_dvp,dcname=MyDC,dvp=DSwitch-E1-DVUplinks-e1,moid=dvportgroup-e1,uplink=true,vcenter=vcenter.local status="green",status_code=0i,num_ports=16i 1653060682000000000
vcstat_datastore,dcname=MyDC,dsname=DS_Departement1,moid=datastore-725,type=VMFS,vcenter=vcenter.local accessible=true,capacity=2198754820096i,freespace=730054262784i,uncommitted=20511i,maintenance_mode="normal"
Expand Down
2 changes: 2 additions & 0 deletions etc/vcstat.conf
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
# host_hba_instances = false
## collect host network interface measurement (vcstat_host_nic)
# host_nic_instances = false
## collect host services measurement (vcstat_host_service)
# host_service_instances = false
## collect network distributed virtual switch measurement (vcstat_net_dvs)
# net_dvs_instances = true
## collect network distributed virtual portgroup measurement (vcstat_net_dvp)
Expand Down
86 changes: 86 additions & 0 deletions internal/vccollector/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,92 @@ func (c *VcCollector) CollectHostFw(
return nil
}

// CollectHostServices gathers host services info (like govc: host.service.ls)
func (c *VcCollector) CollectHostServices(
ctx context.Context,
acc telegraf.Accumulator,
) error {
var (
hstags = make(map[string]string)
hsfields = make(map[string]interface{})
hsref, sref types.ManagedObjectReference
hsMos []mo.HostServiceSystem
hrefs, srefs []types.ManagedObjectReference
host *object.HostSystem
s *object.HostServiceSystem
hostSt *hostState
t time.Time
err error
)

if c.client == nil || c.coll == nil {
return fmt.Errorf("Could not get host services info: %w", govplus.ErrorNoClient)
}
if err = c.getAllDatacentersClustersAndHosts(ctx); err != nil {
return fmt.Errorf("Could not get cluster and host entity list: %w", err)
}

for i, dc := range c.dcs {
// get HostServiceSystem references and split the list into chunks
for j, host := range c.hosts[i] {
if hostSt = c.getHostStateIdx(i, j); hostSt == nil {
acc.AddError(fmt.Errorf("Could not find host state for %s", host.Name()))
continue
}
s, err = host.ConfigManager().ServiceSystem(ctx)
if err != nil {
return fmt.Errorf("Could not get host service system: %w", err)
}
hrefs = append(hrefs, host.Reference())
srefs = append(srefs, s.Reference())
}
chunks := chunckMoRefSlice(srefs, c.queryBulkSize)

for _, refs := range chunks {
err = c.coll.Retrieve(ctx, refs, []string{"serviceInfo.service"}, &hsMos)
if err != nil {
if err, exit := govplus.IsHardQueryError(err); exit {
return err
}
acc.AddError(
fmt.Errorf("Could not retrieve info for host service reference list: %w", err),
)
continue
}
t = time.Now()

for _, hsMo := range hsMos {
services := hsMo.ServiceInfo.Service

// find host of this service
sref = hsMo.Self.Reference()
for k, r := range srefs {
if r == sref {
hsref = hrefs[k]
break
}
}
host = c.getHostObjectFromReference(i, &hsref)
hstags["clustername"] = c.getClusternameFromHost(i, host)
hstags["dcname"] = dc.Name()
hstags["esxhostname"] = host.Name()
hstags["vcenter"] = c.client.Client.URL().Host

for _, service := range services {
hstags["key"] = service.Key
hsfields["label"] = service.Label
hsfields["policy"] = service.Policy
hsfields["required"] = service.Required
hsfields["running"] = service.Running
acc.AddFields("vcstat_host_service", hsfields, hstags, t)
}
}
}
}

return nil
}

// ReportHostEsxcliResponse reports metrics about host esxcli command responses
func (c *VcCollector) ReportHostEsxcliResponse(
ctx context.Context,
Expand Down
18 changes: 14 additions & 4 deletions plugins/inputs/vcstat/vcstat.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ type VCstatConfig struct {
Password string `toml:"password"`
Timeout config.Duration
IntSkipNotRespondig int16 `toml:"intervals_skip_notresponding_esxcli_hosts"`
QueryBulkSize int `toml:"query_bulk_size"`
QueryBulkSize int `toml:"query_bulk_size"`
Log telegraf.Logger `toml:"-"`

ClusterInstances bool `toml:"cluster_instances"`
Expand All @@ -39,6 +39,7 @@ type VCstatConfig struct {
HostNICInstances bool `toml:"host_nic_instances"`
HostFwInstances bool `toml:"host_firewall_instances"`
HostGraphics bool `toml:"host_graphics_instances"`
HostServices bool `toml:"host_service_instances"`
NetDVSInstances bool `toml:"net_dvs_instances"`
NetDVPInstances bool `toml:"net_dvp_instances"`
VMInstances bool `toml:"vm_instances"`
Expand Down Expand Up @@ -85,6 +86,8 @@ var sampleConfig = `
# host_hba_instances = false
## collect host network interface measurement (vcstat_host_nic)
# host_nic_instances = false
## collect host services measurement (vcstat_host_service)
# host_service_instances = false
## collect network distributed virtual switch measurement (vcstat_net_dvs)
# net_dvs_instances = true
## collect network distributed virtual portgroup measurement (vcstat_net_dvp)
Expand All @@ -101,13 +104,14 @@ func init() {
Username: "user@corp.local",
Password: "secret",
Timeout: config.Duration(time.Second * 0),
QueryBulkSize: 100,
QueryBulkSize: 100,
IntSkipNotRespondig: 20,
ClusterInstances: true,
DatastoreInstances: false,
HostInstances: true,
HostFwInstances: false,
HostGraphics: false,
HostServices: false,
HostHBAInstances: false,
HostNICInstances: false,
NetDVSInstances: true,
Expand Down Expand Up @@ -231,7 +235,7 @@ func (vcs *VCstatConfig) Gather(acc telegraf.Accumulator) error {

// selfmonitoring
vcs.GatherTime.Set(int64(time.Since(startTime).Nanoseconds()))
if vcs.HostHBAInstances || vcs.HostNICInstances || vcs.HostFwInstances || vcs.HostGraphics {
if vcs.HostHBAInstances || vcs.HostNICInstances || vcs.HostFwInstances {
vcs.NotRespondingHosts.Set(int64(vcs.vcc.GetNumberNotRespondingHosts()))
}
for _, m := range selfstat.Metrics() {
Expand Down Expand Up @@ -338,7 +342,13 @@ func (vcs *VCstatConfig) gatherHost(ctx context.Context, acc telegraf.Accumulato
}
}

if vcs.HostHBAInstances || vcs.HostNICInstances || vcs.HostFwInstances || vcs.HostGraphics {
if vcs.HostServices {
if err = col.CollectHostServices(ctx, acc); err != nil {
return err
}
}

if vcs.HostHBAInstances || vcs.HostNICInstances || vcs.HostFwInstances {
if err = col.ReportHostEsxcliResponse(ctx, acc); err != nil {
return err
}
Expand Down

0 comments on commit a81cc66

Please sign in to comment.