Skip to content

Commit

Permalink
feat(services): add database metrics interface (#1739)
Browse files Browse the repository at this point in the history
* feat(services): add database metrics interface

* remove some metrics
  • Loading branch information
HUAHUAI23 committed Dec 25, 2023
1 parent bda02f5 commit 74518b9
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 22 deletions.
6 changes: 6 additions & 0 deletions build/charts/laf-server/templates/rumtime-exporter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ spec:
value: {{ .Values.default_region.runtime_exporter_secret | quote }}
- name: NAMESPACE
value: {{ .Release.Namespace | quote }}
- name: DB_NAMESPACE
value: {{ .Release.Namespace | quote }}
---
apiVersion: v1
kind: Service
Expand Down Expand Up @@ -69,6 +71,10 @@ spec:
path: "/runtime/metrics/{{ .Values.default_region.runtime_exporter_secret}}"
scrapeTimeout: 10s
honorLabels: true
- interval: 60s
path: "/database/metrics/{{ .Values.default_region.runtime_exporter_secret}}"
scrapeTimeout: 10s
honorLabels: true
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
Expand Down
3 changes: 3 additions & 0 deletions services/runtime-exporter/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ export default class Config {
static get NAMESPACE(): string {
return process.env.NAMESPACE || ''
}
static get DB_NAMESPACE(): string {
return process.env.DB_NAMESPACE || ''
}

static get API_SECRET(): string {
if (!process.env.API_SECRET) {
Expand Down
58 changes: 58 additions & 0 deletions services/runtime-exporter/src/handler/get-db-metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { RequestHandler } from 'express'
import { ClusterService, Metric } from '../helper/cluster.service'
import Config from '../config'
import * as prom from 'prom-client'

const register = new prom.Registry()

const DATABASE_CPU = new prom.Gauge({
name: 'laf_mongo_cpu',
help: 'the cpu of the mongo',
registers: [register],
labelNames: ['container', 'pod', 'appid'],
})

const DATABASE_MEMORY = new prom.Gauge({
name: 'laf_mongo_memory',
help: 'the memory of the mongo',
registers: [register],
labelNames: ['container', 'pod', 'appid'],
})

function updateMetrics(metric: Metric) {
DATABASE_CPU.labels(metric.containerName, metric.podName, metric.appid).set(
metric.cpu,
)
DATABASE_MEMORY.labels(
metric.containerName,
metric.podName,
metric.appid,
).set(metric.memory)
}

const getDatabaseMetrics: RequestHandler = async (req, res) => {
const token = req.params.token

if (!token || Config.API_SECRET !== token) {
return res.status(403).send('forbidden')
}

// Clear the metrics data generated by the last request
DATABASE_CPU.reset()
DATABASE_MEMORY.reset()

const databaseMetrics = await ClusterService.getPodMetrics(
ClusterService.DB_NAMESPACE,
ClusterService.LABEL_DATABASE,
'DATABASE',
)

for (const metric of databaseMetrics) {
updateMetrics(metric)
}

res.set('Content-Type', 'text/plain')
res.send(await register.metrics())
}

export default getDatabaseMetrics
7 changes: 5 additions & 2 deletions services/runtime-exporter/src/handler/get-runtime-metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,11 @@ const getRuntimeMetrics: RequestHandler = async (req, res) => {
RUNTIME_MEMORY.reset()
RUNTIME_MEMORY_LIMIT.reset()

const runtimeMetrics =
await ClusterService.getRuntimePodMetricsForAllNamespaces()
const runtimeMetrics = await ClusterService.getPodMetrics(
ClusterService.NAMESPACE,
ClusterService.LABEL_KEY_APP_ID,
'RUNTIME',
)

for (const metric of runtimeMetrics) {
updateMetrics(metric)
Expand Down
71 changes: 51 additions & 20 deletions services/runtime-exporter/src/helper/cluster.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ export class ClusterService {
* - if kubeconfig is not empty, load from string
*/
static LABEL_KEY_APP_ID = 'laf.dev/appid'
static LABEL_DATABASE = 'app.kubernetes.io/managed-by=kubeblocks'
static NAMESPACE = Config.NAMESPACE
static DB_NAMESPACE = Config.DB_NAMESPACE

static loadKubeConfig() {
const conf = Config.KUBECONF
Expand Down Expand Up @@ -55,42 +57,71 @@ export class ClusterService {
return new k8s.Metrics(kc)
}

static async getRuntimePodMetricsForAllNamespaces(): Promise<Metric[]> {
static async getPodMetrics(
namespace: string,
label: string,
app: string,
): Promise<Metric[]> {
const metricsClient = this.getMetricsClient()
let res: any
if (ClusterService.NAMESPACE) {
if (namespace) {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
res = await metricsClient.metricsApiRequest(
`/apis/metrics.k8s.io/v1beta1/namespaces/${ClusterService.NAMESPACE}/pods?labelSelector=laf.dev/appid`,
`/apis/metrics.k8s.io/v1beta1/namespaces/${namespace}/pods?labelSelector=${label}`,
)
} else {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
res = await metricsClient.metricsApiRequest(
'/apis/metrics.k8s.io/v1beta1/pods?labelSelector=laf.dev/appid',
`/apis/metrics.k8s.io/v1beta1/pods?labelSelector=${label}`,
)
}

const metricsList: Metric[] = []
for (const item of res.items) {
const appid = item.metadata.labels[ClusterService.LABEL_KEY_APP_ID]
const podName = item.metadata.name
for (const container of item.containers) {
const containerName = container.name
// millicores
const cpu = Number(quantityToScalar(container.usage.cpu || 0))
// bytes
const memory = Number(quantityToScalar(container.usage.memory || 0))

const metric: Metric = {
cpu: cpu,
memory: memory,
appid: appid,
containerName: containerName,
podName: podName,
if (app === 'RUNTIME') {
for (const item of res.items) {
const appid: string =
item.metadata.labels[ClusterService.LABEL_KEY_APP_ID]
const podName = item.metadata.name
for (const container of item.containers) {
// millicores
const cpu = Number(quantityToScalar(container.usage.cpu || 0))
// bytes
const memory = Number(quantityToScalar(container.usage.memory || 0))

const metric: Metric = {
cpu: cpu,
memory: memory,
appid: appid,
containerName: container.name,
podName: podName,
}
metricsList.push(metric)
}
}
} else {
for (const item of res.items) {
const appid: string = item.metadata.labels['app.kubernetes.io/instance']
const podName = item.metadata.name
for (const container of item.containers) {
if (container.name === 'mongodb') {
// millicores
const cpu = Number(quantityToScalar(container.usage.cpu || 0))
// bytes
const memory = Number(quantityToScalar(container.usage.memory || 0))

const metric: Metric = {
cpu: cpu,
memory: memory,
appid: appid,
containerName: container.name,
podName: podName,
}
metricsList.push(metric)
}
}
metricsList.push(metric)
}
}

Expand Down
2 changes: 2 additions & 0 deletions services/runtime-exporter/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import express, { Request, Response } from 'express'
import Config from './config'
import { logger } from './logger'
import getRuntimeMetrics from './handler/get-runtime-metrics'
import getDatabaseMetrics from './handler/get-db-metrics'

require('express-async-errors')
const app = express()
Expand All @@ -14,6 +15,7 @@ process.on('uncaughtException', (err: Error) => {
})

app.get('/runtime/metrics/:token', getRuntimeMetrics)
app.get('/database/metrics/:token', getDatabaseMetrics)
app.get('/healthz', (_, res: Response) => res.send('ok'))

// express error capture middleware
Expand Down

0 comments on commit 74518b9

Please sign in to comment.