Skip to content

Commit

Permalink
Merge pull request #247 from lidofinance/develop
Browse files Browse the repository at this point in the history
develop -> main
  • Loading branch information
Amuhar committed Feb 5, 2024
2 parents e34ba26 + e5c5fc1 commit 495ab7d
Show file tree
Hide file tree
Showing 43 changed files with 597 additions and 212 deletions.
15 changes: 12 additions & 3 deletions README.md
Expand Up @@ -32,12 +32,21 @@ For running locally in container run

1. `docker-compose -f docker-compose.yml build`
2. `docker-compose -f docker-compose.yml up`
3. `docker-compose -f docker-compose.metrics.yml up`

To configure grafana go to `http://localhost:8000/dashboards` and dashboards from `./grafana` folder.

For running KAPI, one can also use the image from this page https://docs.lido.fi/guides/tooling#keys-api. Please always use the SHA256 hash of the Docker image for the latest release: lidofinance/lido-keys-api@<latest-hash>.

## Metrics and alerts

To launch Prometheus, Grafana, and Alertmanager, execute the following command:

```
docker-compose -f docker-compose.metrics.yml up
```

For configuring Grafana, navigate to http://localhost:8000/dashboards. Here, you can import dashboards from the ./grafana folder.

To view the list of alerts in Prometheus, visit http://localhost:9090/alerts. For checking the list of fired alerts in Alertmanager, go to http://localhost:9093/#/alerts.

## E2E tests

`$ yarn test:e2e`
Expand Down
13 changes: 13 additions & 0 deletions alerts/keys-api-outdated-keys.empty-db.rule.yml
@@ -0,0 +1,13 @@
groups:
- name: Keys API. Initial keys update check
rules:
- alert: KeysApiOutdatedKeysEmptyDB
expr: |
(time() - process_start_time_seconds{}) >= 15 * 60 and (lido_keys_api_last_update_timestamp{} == 0)
labels:
severity: critical
service: keys_api
app_team: tooling
annotations:
summary: Keys are outdated
description: 'Initial update took more than {{ $value | humanizeDuration }}'
36 changes: 36 additions & 0 deletions alerts/keys-api-outdated-keys.empty-db.test.yml
@@ -0,0 +1,36 @@
rule_files:
- 'keys-api-outdated-keys.empty-db.rule.yml'

evaluation_interval: 1m

tests:
# Test Case 1: Keys API not updated since process start
- interval: 1m
input_series:
- series: 'process_start_time_seconds{}'
values: '0x15'
- series: 'lido_keys_api_last_update_timestamp{}'
values: '0x15'
alert_rule_test:
- eval_time: 15m
alertname: KeysApiOutdatedKeysEmptyDB
exp_alerts:
- exp_labels:
severity: 'critical'
service: 'keys_api'
app_team: 'tooling'
exp_annotations:
summary: 'Keys are outdated'
description: 'Initial update took more than 15m 0s'

#Test Case 2: Keys API updated after process start
- interval: 1m
input_series:
- series: 'process_start_time_seconds{}'
values: '0x15'
- series: 'lido_keys_api_last_update_timestamp{}'
values: '0x12 0 0 1'
alert_rule_test:
- eval_time: 15m
alertname: KeysApiOutdatedKeysEmptyDB
exp_alerts: []
13 changes: 13 additions & 0 deletions alerts/keys-api-outdated-keys.non-empty-db.rule.yml
@@ -0,0 +1,13 @@
groups:
- name: Keys API. Update on non empty db
rules:
- alert: KeysApiOutdatedKeysNonEmptyDB
expr: |
(time() - lido_keys_api_last_update_timestamp{}) >= 10*60 and lido_keys_api_last_update_timestamp{} > 0
labels:
severity: critical
service: keys_api
app_team: tooling
annotations:
summary: Keys are outdated
description: 'Keys were not updated for more than {{ $value | humanizeDuration }}'
32 changes: 32 additions & 0 deletions alerts/keys-api-outdated-keys.non-empty-db.test.yml
@@ -0,0 +1,32 @@
rule_files:
- 'keys-api-outdated-keys.non-empty-db.rule.yml'

evaluation_interval: 1m

tests:
# Test Case 1: Keys API not updated for more than 10 minutes
- interval: 1m
input_series:
- series: 'lido_keys_api_last_update_timestamp{}'
values: '0x10 60'
alert_rule_test:
- eval_time: 11m
alertname: KeysApiOutdatedKeysNonEmptyDB
exp_alerts:
- exp_labels:
severity: 'critical'
service: 'keys_api'
app_team: 'tooling'
exp_annotations:
summary: 'Keys are outdated'
description: 'Keys were not updated for more than 10m 0s'

# Test Case 2: Keys API updated during 10 minutes
- interval: 1m
input_series:
- series: 'lido_keys_api_last_update_timestamp{}'
values: '60x10 61'
alert_rule_test:
- eval_time: 11m
alertname: KeysApiOutdatedKeysNonEmptyDB
exp_alerts: []
13 changes: 0 additions & 13 deletions alerts/keys-api-outdated-keys.rule.yml

This file was deleted.

49 changes: 0 additions & 49 deletions alerts/keys-api-outdated-keys.test.yml

This file was deleted.

4 changes: 2 additions & 2 deletions alerts/keys-api-outdated-validators.rule.yml
Expand Up @@ -2,11 +2,11 @@ groups:
- name: Keys API. Validators
rules:
- alert: KeysApiOutdatedValidators
expr: validators_registry_enabled{} == 1 AND (time() - lido_keys_api_validators_registry_last_update_block_timestamp{} >= 3600)
expr: (time() - lido_keys_api_validators_registry_last_update_block_timestamp{} >= 3600) AND validators_registry_enabled{} == 1
labels:
severity: critical
service: keys_api
app_team: tooling
annotations:
summary: Validators are outdated
description: 'Validators were not updated for more than 60 minutes'
description: 'Validators were not updated for more than {{ $value | humanizeDuration }}'
4 changes: 2 additions & 2 deletions alerts/keys-api-outdated-validators.test.yml
Expand Up @@ -32,7 +32,7 @@ tests:
app_team: tooling
exp_annotations:
summary: Validators are outdated
description: Validators were not updated for more than 60 minutes
description: Validators were not updated for more than 1h 0m 0s
- eval_time: 75m
alertname: KeysApiOutdatedValidators
exp_alerts:
Expand All @@ -42,7 +42,7 @@ tests:
app_team: tooling
exp_annotations:
summary: Validators are outdated
description: Validators were not updated for more than 60 minutes
description: Validators were not updated for more than 1h 0m 0s

# Actual validators list
- interval: 15m
Expand Down
11 changes: 11 additions & 0 deletions docker-compose.metrics.yml
Expand Up @@ -8,6 +8,7 @@ services:
- 9090:9090
volumes:
- ./prometheus/:/etc/prometheus/
- ./alerts/:/etc/prometheus/alerts/
command: --config.file=/etc/prometheus/prometheus.yml --enable-feature=remote-write-receiver

keys_api_grafana:
Expand All @@ -20,3 +21,13 @@ services:
- ./grafana/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml
depends_on:
- keys_api_prometheus

alertmanager:
image: prom/alertmanager:latest
ports:
- 9093:9093
restart: always
volumes:
- ./prometheus/:/etc/alertmanager/
depends_on:
- keys_api_prometheus
2 changes: 1 addition & 1 deletion package.json
@@ -1,6 +1,6 @@
{
"name": "lido-keys-api",
"version": "1.0.0",
"version": "1.0.1",
"description": "Lido Node Operators keys service",
"author": "Lido team",
"private": true,
Expand Down
32 changes: 32 additions & 0 deletions prometheus/alertmanager.yml
@@ -0,0 +1,32 @@
global:
# The default SMTP From header field.
smtp_from: 'alertmanager@example.com'
# # The default SMTP smarthost used for sending emails.
smtp_smarthost: 'localhost:25'
# The default route.
resolve_timeout: '5m'

route:
# The route's receiver's name.
receiver: 'team-X-mails'
# Group by these labels.
group_by: ['alertname', 'cluster', 'service']
# Wait for 30 seconds before sending a notification.
group_wait: '30s'
# Send a notification every 5 minutes.
group_interval: '5m'
# Repeat a notification every 2 hours if the alert is still firing.
repeat_interval: '2h'
# Routes for specific alerts.
routes:
- match:
severity: critical
receiver: 'team-Y-mails'
continue: true
receivers:
- name: 'team-X-mails'
email_configs:
- to: 'team-X+alerts@example.com'
- name: 'team-Y-mails'
email_configs:
- to: 'team-Y+alerts@example.com'
9 changes: 9 additions & 0 deletions prometheus/prometheus.yml
Expand Up @@ -6,3 +6,12 @@ scrape_configs:
- targets: ['docker.for.mac.host.internal:3000']
remote_write:
- url: https://localhost/api/v1/write

rule_files:
- 'alerts/keys-api-outdated-keys.empty-db.rule.yml'
- 'alerts/keys-api-outdated-keys.non-empty-db.rule.yml'

alerting:
alertmanagers:
- static_configs:
- targets: ['docker.for.mac.host.internal:9093']
56 changes: 31 additions & 25 deletions src/app/database-testing.module.ts
@@ -1,29 +1,35 @@
import { Module } from '@nestjs/common';
import { DynamicModule, Module } from '@nestjs/common';
import { MikroOrmModule } from '@mikro-orm/nestjs';
import config from 'mikro-orm.config';
import { ConfigModule, ConfigService } from 'common/config';

@Module({
imports: [
ConfigModule,
MikroOrmModule.forRootAsync({
async useFactory(configService: ConfigService) {
return {
...config,
dbName: configService.get('DB_NAME'),
host: configService.get('DB_HOST'),
port: configService.get('DB_PORT'),
user: configService.get('DB_USER'),
password: configService.get('DB_PASSWORD'),
autoLoadEntities: false,
cache: { enabled: false },
debug: false,
registerRequestContext: true,
allowGlobalContext: true,
};
},
inject: [ConfigService],
}),
],
})
export class DatabaseTestingModule {}
@Module({})
export class DatabaseTestingModule {
static forRoot(mikroOrmConfigOverrides: Partial<typeof config> = {}): DynamicModule {
return {
module: DatabaseTestingModule,
imports: [
ConfigModule,
MikroOrmModule.forRootAsync({
async useFactory(configService: ConfigService) {
return {
...config,
...mikroOrmConfigOverrides,
dbName: configService.get('DB_NAME'),
host: configService.get('DB_HOST'),
port: configService.get('DB_PORT'),
user: configService.get('DB_USER'),
password: configService.get('DB_PASSWORD'),
autoLoadEntities: false,
cache: { enabled: false },
debug: false,
registerRequestContext: true,
allowGlobalContext: true,
};
},
inject: [ConfigService],
}),
],
};
}
}

0 comments on commit 495ab7d

Please sign in to comment.