Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion base/monitoring/cadvisor/cadvisor.DaemonSet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
serviceAccountName: cadvisor
containers:
- name: cadvisor
image: index.docker.io/sourcegraph/cadvisor:4.4.2@sha256:4c3af0c4fd9ea4425d38f7d1a784833c5fd542542cdbb81292044773e686fa60
image: index.docker.io/sourcegraph/cadvisor:4.5.0@sha256:5117f2bc817c16fb129acb6f9b070af8f1be09d3d9a8f88e3297f7adfff9af0d
args:
# Kubernetes-specific flags below (other flags are baked into the Docker image)
#
Expand Down
2 changes: 1 addition & 1 deletion base/monitoring/grafana/grafana.StatefulSet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
spec:
containers:
- name: grafana
image: index.docker.io/sourcegraph/grafana:4.4.2@sha256:69777c3a895a03eee035c173c91c0f25893285118c06e51a67728ec4259e2296
image: index.docker.io/sourcegraph/grafana:4.5.0@sha256:f70a7f79c5c90cab0d5cfb8f3dbca4dc60ed390b045aff1a86079c87bfe9a8af
terminationMessagePolicy: FallbackToLogsOnError
ports:
- containerPort: 3370
Expand Down
4 changes: 4 additions & 0 deletions base/monitoring/grafana/rbac/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- grafana.ServiceAccount.yaml
2 changes: 1 addition & 1 deletion base/monitoring/jaeger/jaeger.Deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ spec:
spec:
containers:
- name: jaeger
image: index.docker.io/sourcegraph/jaeger-all-in-one:insiders@sha256:462ef3b4a5fa9227f04c2f4bc2968970fad0fcc9efbaf89adaad0ef98a24b53f
image: index.docker.io/sourcegraph/jaeger-all-in-one:4.5.0@sha256:461476b01968324a0d8cb43a0176713e006f99cdb1f2efc3ab2210fd0bb812c2
args: ["--memory.max-traces=20000"]
ports:
- containerPort: 5775
Expand Down
2 changes: 1 addition & 1 deletion base/monitoring/node-exporter/node-exporter.DaemonSet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
spec:
containers:
- name: node-exporter
image: index.docker.io/sourcegraph/node-exporter:4.4.2@sha256:fa8e5700b7762fffe0674e944762f44bb787a7e44d97569fe55348260453bf80
image: index.docker.io/sourcegraph/node-exporter:4.5.0@sha256:fa8e5700b7762fffe0674e944762f44bb787a7e44d97569fe55348260453bf80
imagePullPolicy: IfNotPresent
resources:
limits:
Expand Down
2 changes: 1 addition & 1 deletion base/monitoring/otel-collector/otel-agent.DaemonSet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
spec:
containers:
- name: otel-agent
image: index.docker.io/sourcegraph/opentelemetry-collector:4.4.2@sha256:f0723c96c973258ad3123ddc479261bb8f5827bbac1d091b6a683fde55334413
image: index.docker.io/sourcegraph/opentelemetry-collector:4.5.0@sha256:12f3fc137edea8319ebf574e15e6c27c19fb0b7ca17165973f98c8d8c342ca1d
command:
- "/bin/otelcol-sourcegraph"
- "--config=/etc/otel-agent/config.yaml"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spec:
spec:
containers:
- name: otel-collector
image: index.docker.io/sourcegraph/opentelemetry-collector:4.4.2@sha256:f0723c96c973258ad3123ddc479261bb8f5827bbac1d091b6a683fde55334413
image: index.docker.io/sourcegraph/opentelemetry-collector:4.5.0@sha256:12f3fc137edea8319ebf574e15e6c27c19fb0b7ca17165973f98c8d8c342ca1d
command:
- "/bin/otelcol-sourcegraph"
# To use a custom configuration, edit otel-collector.ConfigMap.yaml
Expand Down
214 changes: 108 additions & 106 deletions base/monitoring/prometheus/prometheus.ConfigMap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ data:
prometheus.yml: |
# Prometheus global config
global:
scrape_interval: 30s
scrape_interval: 30s
evaluation_interval: 30s
# scrape_timeout is set to the global default (10s).

Expand All @@ -19,47 +19,36 @@ data:
alertmanagers:
# bundled alertmanager, started by prom-wrapper
- static_configs:
- targets: ['127.0.0.1:9093']
- targets: ["127.0.0.1:9093"]
path_prefix: /alertmanager
# add more alertmanagers here

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- '/sg_config_prometheus/*_rules.yml'
- '/sg_prometheus_add_ons/*_rules.yml'
- "/sg_config_prometheus/*_rules.yml"
- "/sg_prometheus_add_ons/*_rules.yml"

# Configure targets to scrape
scrape_configs:

# Scrape prometheus itself for metrics.
- job_name: 'builtin-prometheus'
- job_name: "builtin-prometheus"
static_configs:
- targets: ['127.0.0.1:9092']
- targets: ["127.0.0.1:9092"]

- job_name: 'builtin-alertmanager'
- job_name: "builtin-alertmanager"
metrics_path: /alertmanager/metrics
static_configs:
- targets: ['127.0.0.1:9093']
- targets: ["127.0.0.1:9093"]

- job_name: 'sourcegraph-services'
relabel_configs:
- source_labels: [__address__]
target_label: instance
regex: (.*)\.(.*)
replacement: ${1}_${2}
metric_relabel_configs:
- source_labels: [container_label_io_kubernetes_pod_namespace]
regex: kube-system
action: drop
file_sd_configs:
- files:
- '/sg_prometheus_add_ons/*_targets.yml'

- job_name: 'cadvisor'
##########################################################################################
# cadvisor
##########################################################################################

- job_name: "kubernetes-pods"
dns_sd_configs:
- names:
- 'cadvisor.default.svc.cluster.local'
- 'cadvisor.ns-sourcegraph.svc.cluster.local'
- "cadvisor.default.svc.cluster.local"
- "cadvisor.ns-sourcegraph.svc.cluster.local"
type: A
port: 48080
relabel_configs:
Expand All @@ -73,59 +62,97 @@ data:
- source_labels: [container_label_io_kubernetes_pod_namespace]
regex: kube-system
action: drop
- source_labels: [container_label_io_kubernetes_container_name, container_label_io_kubernetes_pod_name]
- source_labels:
[
container_label_io_kubernetes_container_name,
container_label_io_kubernetes_pod_name,
]
regex: (.+)
action: replace
target_label: name
separator: '-'
# - source_labels: [container_label_io_kubernetes_pod_namespace]
# regex: ^$|ns-sourcegraph # ACTION: replace ns-sourcegraph with your namespace
# action: keep

- job_name: 'sourcegraph-statefulsets'
separator: "-"

##########################################################################################
# sourcegraph-services
##########################################################################################

- job_name: "sourcegraph-services"
relabel_configs:
- source_labels: [__address__]
target_label: instance
regex: (.*)\.(.*)
replacement: ${1}_${2}
metric_relabel_configs:
- source_labels: [container_label_io_kubernetes_pod_namespace]
regex: kube-system
action: drop
file_sd_configs:
- files:
- "/sg_prometheus_add_ons/*_targets.yml"

- job_name: "sourcegraph-statefulsets"
dns_sd_configs:
- names:
- 'symbols.default.svc.cluster.local'
- 'symbols.ns-sourcegraph.svc.cluster.local'
- 'searcher.default.svc.cluster.local'
- 'searcher.ns-sourcegraph.svc.cluster.local'
- 'gitserver.default.svc.cluster.local'
- 'gitserver.ns-sourcegraph.svc.cluster.local'
- 'sourcegraph-frontend.default.svc.cluster.local'
- 'sourcegraph-frontend.ns-sourcegraph.svc.cluster.local'
type: A
port: 6060
- names:
- 'indexed-search.default.svc.cluster.local'
- 'indexed-search.ns-sourcegraph.svc.cluster.local'
type: A
port: 6070
- names:
- 'indexed-search-indexer.default.svc.cluster.local'
- 'indexed-search-indexer.ns-sourcegraph.svc.cluster.local'
type: A
port: 6072
- "symbols.default.svc.cluster.local"
- "symbols.ns-sourcegraph.svc.cluster.local"
- "symbols.$SG_NAMESPACE.svc.cluster.local"
- "searcher.default.svc.cluster.local"
- "searcher.ns-sourcegraph.svc.cluster.local"
- "searcher.$SG_NAMESPACE.svc.cluster.local"
- "gitserver.default.svc.cluster.local"
- "gitserver.ns-sourcegraph.svc.cluster.local"
- "gitserver.$SG_NAMESPACE.svc.cluster.local"
- "sourcegraph-frontend.default.svc.cluster.local"
- "sourcegraph-frontend.ns-sourcegraph.svc.cluster.local"
- "sourcegraph-frontend.$SG_NAMESPACE.svc.cluster.local"
- "indexed-search.default.svc.cluster.local"
- "indexed-search.ns-sourcegraph.svc.cluster.local"
- "indexed-search.$SG_NAMESPACE.svc.cluster.local"
- "indexed-search-indexer.default.svc.cluster.local"
- "indexed-search-indexer.ns-sourcegraph.svc.cluster.local"
- "indexed-search-indexer.$SG_NAMESPACE.svc.cluster.local"
type: SRV
relabel_configs:
- source_labels: [__meta_dns_srv_record_target]
target_label: __address__
regex: (.*)\.
replacement: ${1}:6060
- source_labels: [__meta_dns_srv_record_target]
target_label: __address__
regex: ^(indexed-search.*)\.
replacement: ${1}:6070
- source_labels: [__meta_dns_srv_record_target]
target_label: __address__
regex: (.*)\.(indexed-search-indexer.*)\.
replacement: ${1}.${2}:6072
- source_labels: [__meta_dns_srv_record_port]
target_label: __meta_dns_srv_record_port
replacement: 6060
- source_labels: [__address__]
regex: ^(indexed-search).*$
target_label: __meta_dns_srv_record_port
replacement: 6070
- source_labels: [__meta_dns_name]
target_label: service_name
target_label: job
regex: (.*)\..*\..*\..*\..*
replacement: ${1}
- source_labels: [__meta_dns_srv_record_target]
regex: (.*)\.(.*)\..*\..*\..*\..*\..*
target_label: instance
replacement: ${2}_${1}
metric_relabel_configs:
- source_labels: [container_label_io_kubernetes_pod_namespace]
regex: kube-system
action: drop
- source_labels: [__address__]
target_label: instance
regex: (.*)\:.*
replacement: $1:6060
- source_labels: [__address__]
target_label: instance
regex: (.*)\.(.*)\..*\..*\..*\..*\..*
replacement: ${2}_${1}

# Extra rules
extra_rules.yml: |
groups:
- name: container.rules
rules:
- record: container:process_cpu_seconds_total:ratio_rate5m
expr: sum by (instance) (rate(process_cpu_seconds_total[5m])) / engine_daemon_engine_cpus_cpus
- record: container:process_cpu_seconds_total:sum
expr: sum by (instance) (irate(process_cpu_seconds_total[1m]))
- record: container:process_resident_memory_bytes:max
expr: max by (instance) (process_resident_memory_bytes)
- record: container:process_virtual_memory_bytes:max
expr: max by (instance) (process_virtual_memory_bytes)

# List of static targets
prometheus_targets.yml: |
- labels:
nodename: "sourcegraph-services"
Expand Down Expand Up @@ -202,40 +229,15 @@ data:
job: otel-collector
targets:
- otel-collector:8888

# Add new targets based on replica count of symbols
symbols_targets.yml: |
- labels:
nodename: "sourcegraph-services"
job: symbols
targets:
- symbols-0.symbols:6060

# Add new targets based on replica count of searcher
searcher_targets.yml: |
- labels:
nodename: "sourcegraph-services"
job: searcher
targets:
- searcher-0.searcher:6060

# Add new targets based on replica count of gitserver
gitserver_targets.yml: |
- labels:
nodename: "sourcegraph-services"
job: gitserver
targets:
- gitserver-0.gitserver:6060

# Add new targets based on replica count of indexed-search
indexed-search_targets.yml: |
- labels:
nodename: "sourcegraph-services"
job: zoekt-indexserver
targets:
- indexed-search-0.indexed-search:6072
- labels:
nodename: "sourcegraph-services"
job: zoekt-webserver
targets:
- indexed-search-0.indexed-search:6070
extra_rules.yml: |
groups:
- name: container.rules
rules:
- record: container:process_cpu_seconds_total:ratio_rate5m
expr: sum by (instance) (rate(process_cpu_seconds_total[5m])) / engine_daemon_engine_cpus_cpus
- record: container:process_cpu_seconds_total:sum
expr: sum by (instance) (irate(process_cpu_seconds_total[1m]))
- record: container:process_resident_memory_bytes:max
expr: max by (instance) (process_resident_memory_bytes)
- record: container:process_virtual_memory_bytes:max
expr: max by (instance) (process_virtual_memory_bytes)
5 changes: 2 additions & 3 deletions base/monitoring/prometheus/prometheus.Deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ spec:
spec:
containers:
- name: prometheus
image: index.docker.io/sourcegraph/prometheus:4.4.2@sha256:d833d00a39937cf700f276f816dc789615d6396979418a7d9362386513b1fc9d
image: index.docker.io/sourcegraph/prometheus:4.5.0@sha256:4fe9a5fdee206b1aac9d32afb31ad57e1882394aad9e7e9f719a1b2741afcae5
terminationMessagePolicy: FallbackToLogsOnError
env:
- name: MY_POD_NAMESPACE
- name: SG_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
Expand Down Expand Up @@ -70,7 +70,6 @@ spec:
runAsUser: 100
fsGroup: 100
fsGroupChangePolicy: "OnRootMismatch"
# serviceAccountName: prometheus
volumes:
- name: data
persistentVolumeClaim:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ spec:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
storage: 200Gi
7 changes: 7 additions & 0 deletions base/monitoring/prometheus/rbac/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- prometheus.ConfigMap.yaml
- prometheus.ClusterRole.yaml
- prometheus.ClusterRoleBinding.yaml
- prometheus.ServiceAccount.yaml
Loading