diff --git a/.env.example b/.env.example
index f798bd7f..5fd724fc 100644
--- a/.env.example
+++ b/.env.example
@@ -34,5 +34,13 @@ ENV_DOCKER_USER_GROUP="ggroup"
 ENV_PING_USERNAME=
 ENV_PING_PASSWORD=
 
+# --- HTTP Server
+ENV_HTTP_PORT=8080
+
 # --- SEO: SPA application directory
 ENV_SPA_DIR=
+ENV_SPA_IMAGES_DIR=
+
+# --- Monitoring: Grafana admin password
+#     REQUIRED for Grafana dashboard access
+GRAFANA_ADMIN_PASSWORD=
diff --git a/.gitignore b/.gitignore
index 17ac5765..c1d9c088 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,8 +10,8 @@ storage/seo/*.*
 !storage/seo/.gitkeep
 
 # --- [Caddy]: mtls
-caddy/mtls/*.*
-!caddy/mtls/.gitkeep
+infra/caddy/mtls/*.*
+!infra/caddy/mtls/.gitkeep
 
 # --- [API]: Bin
 bin/*
diff --git a/Makefile b/Makefile
index 70b54ce6..694ddc59 100644
--- a/Makefile
+++ b/Makefile
@@ -34,14 +34,15 @@ CGO_ENABLED           := 1
 # -------------------------------------------------------------------------------------------------------------------- #
 # -------------------------------------------------------------------------------------------------------------------- #
 
-include ./metal/makefile/helpers.mk
-include ./metal/makefile/env.mk
-include ./metal/makefile/db.mk
-include ./metal/makefile/app.mk
-include ./metal/makefile/logs.mk
-include ./metal/makefile/build.mk
-include ./metal/makefile/infra.mk
-include ./metal/makefile/caddy.mk
+include ./infra/makefile/helpers.mk
+include ./infra/makefile/env.mk
+include ./infra/makefile/db.mk
+include ./infra/makefile/app.mk
+include ./infra/makefile/logs.mk
+include ./infra/makefile/build.mk
+include ./infra/makefile/infra.mk
+include ./infra/makefile/caddy.mk
+include ./infra/makefile/monitor.mk
 
 # -------------------------------------------------------------------------------------------------------------------- #
 # -------------------------------------------------------------------------------------------------------------------- #
@@ -104,6 +105,14 @@ help:
 	@printf "$(BOLD)$(BLUE)Caddy Commands:$(NC)\n"
 	@printf "  $(BOLD)$(GREEN)caddy-gen-cert$(NC)   : Generate the caddy's mtls certificates.\n"
 	@printf "  $(BOLD)$(GREEN)caddy-del-cert$(NC)   : Remove the caddy's mtls certificates.\n"
-	@printf "  $(BOLD)$(GREEN)caddy-validate$(NC)   : Validates caddy's files syntax.\n"
+	@printf "  $(BOLD)$(GREEN)caddy-validate$(NC)   : Validates caddy's files syntax.\n\n"
+
+	@printf "$(BOLD)$(BLUE)Monitoring Commands:$(NC)\n"
+	@printf "  $(BOLD)$(GREEN)monitor-up$(NC)       : Start the monitoring stack (Prometheus, Grafana).\n"
+	@printf "  $(BOLD)$(GREEN)monitor-down$(NC)     : Stop the monitoring stack.\n"
+	@printf "  $(BOLD)$(GREEN)monitor-status$(NC)   : Show status of monitoring services.\n"
+	@printf "  $(BOLD)$(GREEN)monitor-test$(NC)     : Run monitoring stack test suite.\n"
+	@printf "  $(BOLD)$(GREEN)monitor-grafana$(NC)  : Open Grafana dashboards in browser.\n"
+	@printf "  $(BOLD)$(GREEN)monitor-help$(NC)     : Show detailed monitoring commands.\n"
 
 	@printf "$(NC)\n"
diff --git a/docker-compose.yml b/docker-compose.yml
index 513b806a..a23ffc80 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,6 +12,14 @@ volumes:
     caddy_config:
     go_mod_cache:
         driver: local
+    prometheus_data_prod:
+        driver: local
+    prometheus_data_local:
+        driver: local
+    grafana_data_prod:
+        driver: local
+    grafana_data_local:
+        driver: local
 
     # --- DB: Define a named volume at the top level.
     #     Docker will manage its lifecycle.
@@ -30,7 +38,7 @@ services:
         caddy_prod:
             image: api-caddy_prod
             build:
-                context: ./caddy
+                context: ./infra/caddy
                 dockerfile: Dockerfile
                 args:
                     - CADDY_VERSION=2.10.2
@@ -40,16 +48,27 @@ services:
             restart: unless-stopped
             depends_on:
                 - api
+
+            # --- The 443:443/udp is required for HTTP/3
+            #     NOTES:
+            #           - Admin API (2019) listens on all interfaces but is NOT published to host
+            #           - Prometheus scrapes metrics from dedicated endpoint (9180) via Docker internal DNS
             ports:
                 - "80:80"
                 - "443:443"
                 - "443:443/udp" # Required for HTTP/3
+                # NOTE: Admin API (2019) is NOT published to host (internal Docker network only)
+                # Prometheus scrapes Caddy metrics from :9180 via Docker internal DNS
+
+            # --- Dedicated /metrics endpoint for Prometheus (internal network only)
+            expose:
+                - "9180"
             volumes:
                 - caddy_data:/data
                 - caddy_config:/config
-                - ./caddy/Caddyfile.prod:/etc/caddy/Caddyfile
+                - ./infra/caddy/Caddyfile.prod:/etc/caddy/Caddyfile
                 - ${CADDY_LOGS_PATH}:/var/log/caddy
-                - ./caddy/mtls:/etc/caddy/mtls:ro
+                - ./infra/caddy/mtls:/etc/caddy/mtls:ro
             networks:
                 caddy_net:
                   aliases:
@@ -57,7 +76,7 @@ services:
 
         caddy_local:
             build:
-                context: ./caddy
+                context: ./infra/caddy
                 dockerfile: Dockerfile
                 args:
                     - CADDY_VERSION=2.10.2
@@ -68,15 +87,247 @@ services:
             depends_on:
                 - api
             ports:
-                - "8080:80"
+                - "18080:80"
                 - "8443:443"
+                - "127.0.0.1:2019:2019" # Admin API - localhost only for debugging
+
+            # --- Dedicated /metrics endpoint for Prometheus (internal network only)
+            expose:
+                - "9180"
+
             volumes:
                 - caddy_data:/data
                 - caddy_config:/config
-                - ./caddy/mtls:/etc/caddy/mtls:ro
-                - ./caddy/Caddyfile.local:/etc/caddy/Caddyfile
+                - ./infra/caddy/mtls:/etc/caddy/mtls:ro
+                - ./infra/caddy/Caddyfile.local:/etc/caddy/Caddyfile
+            networks:
+                - caddy_net
+
+        prometheus:
+            image: prom/prometheus:v3.0.1
+            profiles: ["prod"]
+            container_name: oullin_prometheus
+            restart: unless-stopped
+            command:
+                - '--config.file=/etc/prometheus/prometheus.yml'
+                - '--storage.tsdb.path=/prometheus'
+                - '--storage.tsdb.retention.time=30d'
+                - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+                - '--web.console.templates=/usr/share/prometheus/consoles'
+            ports:
+                - "127.0.0.1:9090:9090"
+            volumes:
+                - ./infra/metrics/prometheus/provisioning/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+                - prometheus_data_prod:/prometheus
+            networks:
+                - caddy_net
+                - oullin_net
+            depends_on:
+                caddy_prod:
+                    condition: service_started
+                postgres_exporter:
+                    condition: service_healthy
+            healthcheck:
+                test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy"]
+                interval: 10s
+                timeout: 5s
+                retries: 5
+                start_period: 10s
+            deploy:
+                resources:
+                    limits:
+                        cpus: '1.0'
+                        memory: 1G
+                    reservations:
+                        cpus: '0.25'
+                        memory: 256M
+
+        prometheus_local:
+            image: prom/prometheus:v3.0.1
+            profiles: ["local"]
+            container_name: oullin_prometheus_local
+            restart: unless-stopped
+            command:
+                - '--config.file=/etc/prometheus/prometheus.yml'
+                - '--storage.tsdb.path=/prometheus'
+                - '--storage.tsdb.retention.time=7d'
+                - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+                - '--web.console.templates=/usr/share/prometheus/consoles'
+            ports:
+                - "9090:9090"
+            volumes:
+                - ./infra/metrics/prometheus/provisioning/prometheus.local.yml:/etc/prometheus/prometheus.yml:ro
+                - prometheus_data_local:/prometheus
+            networks:
+                - caddy_net
+                - oullin_net
+            depends_on:
+                caddy_local:
+                    condition: service_started
+                postgres_exporter_local:
+                    condition: service_healthy
+            healthcheck:
+                test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy"]
+                interval: 10s
+                timeout: 5s
+                retries: 5
+                start_period: 10s
+            deploy:
+                resources:
+                    limits:
+                        cpus: '1.0'
+                        memory: 1G
+                    reservations:
+                        cpus: '0.25'
+                        memory: 256M
+
+        postgres_exporter:
+            image: prometheuscommunity/postgres-exporter:v0.15.0
+            profiles: ["prod"]
+            container_name: oullin_postgres_exporter
+            restart: unless-stopped
+            entrypoint: ["/postgres-exporter-entrypoint.sh"]
+            volumes:
+                - ./infra/metrics/prometheus/scripts/postgres-exporter-entrypoint.sh:/postgres-exporter-entrypoint.sh:ro
+            secrets:
+                - pg_username
+                - pg_password
+                - pg_dbname
+            networks:
+                - oullin_net
+                - caddy_net
+            depends_on:
+                api-db:
+                    condition: service_healthy
+            expose:
+                - "9187"
+            healthcheck:
+                test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9187/"]
+                interval: 10s
+                timeout: 5s
+                retries: 5
+                start_period: 10s
+            deploy:
+                resources:
+                    limits:
+                        cpus: '0.25'
+                        memory: 128M
+                    reservations:
+                        cpus: '0.05'
+                        memory: 32M
+
+        postgres_exporter_local:
+            image: prometheuscommunity/postgres-exporter:v0.15.0
+            profiles: ["local"]
+            container_name: oullin_postgres_exporter_local
+            restart: unless-stopped
+            entrypoint: ["/postgres-exporter-entrypoint.sh"]
+            volumes:
+                - ./infra/metrics/prometheus/scripts/postgres-exporter-entrypoint.sh:/postgres-exporter-entrypoint.sh:ro
+            secrets:
+                - pg_username
+                - pg_password
+                - pg_dbname
+            networks:
+                - oullin_net
+                - caddy_net
+            depends_on:
+                api-db:
+                    condition: service_healthy
+            expose:
+                - "9187"
+            healthcheck:
+                test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9187/"]
+                interval: 10s
+                timeout: 5s
+                retries: 5
+                start_period: 10s
+            deploy:
+                resources:
+                    limits:
+                        cpus: '0.25'
+                        memory: 128M
+                    reservations:
+                        cpus: '0.05'
+                        memory: 32M
+
+        grafana:
+            image: grafana/grafana:11.4.0
+            profiles: ["prod"]
+            container_name: oullin_grafana
+            restart: unless-stopped
+            ports:
+                - "127.0.0.1:3000:3000"
+            environment:
+                - GF_SERVER_ROOT_URL=http://localhost:3000
+                - GF_SECURITY_ADMIN_USER=admin
+                - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:?GRAFANA_ADMIN_PASSWORD must be set in .env file}
+                - GF_USERS_ALLOW_SIGN_UP=false
+                - GF_AUTH_ANONYMOUS_ENABLED=false
+                - GF_INSTALL_PLUGINS=
+                - GF_DATASOURCE_PROMETHEUS_URL=http://oullin_prometheus:9090
+            volumes:
+                - grafana_data_prod:/var/lib/grafana
+                - ./infra/metrics/grafana/provisioning:/etc/grafana/provisioning:ro
+                - ./infra/metrics/grafana/dashboards:/var/lib/grafana/dashboards:ro
+            networks:
+                - caddy_net
+            depends_on:
+                prometheus:
+                    condition: service_healthy
+            healthcheck:
+                test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/"]
+                interval: 10s
+                timeout: 5s
+                retries: 5
+                start_period: 30s
+            deploy:
+                resources:
+                    limits:
+                        cpus: '0.5'
+                        memory: 512M
+                    reservations:
+                        cpus: '0.1'
+                        memory: 128M
+
+        grafana_local:
+            image: grafana/grafana:11.4.0
+            profiles: ["local"]
+            container_name: oullin_grafana_local
+            restart: unless-stopped
+            ports:
+                - "3000:3000"
+            environment:
+                - GF_SERVER_ROOT_URL=http://localhost:3000
+                - GF_SECURITY_ADMIN_USER=admin
+                - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:?GRAFANA_ADMIN_PASSWORD must be set in .env file}
+                - GF_USERS_ALLOW_SIGN_UP=false
+                - GF_AUTH_ANONYMOUS_ENABLED=false
+                - GF_INSTALL_PLUGINS=
+                - GF_DATASOURCE_PROMETHEUS_URL=http://oullin_prometheus_local:9090
+            volumes:
+                - grafana_data_local:/var/lib/grafana
+                - ./infra/metrics/grafana/provisioning:/etc/grafana/provisioning:ro
+                - ./infra/metrics/grafana/dashboards:/var/lib/grafana/dashboards:ro
             networks:
                 - caddy_net
+            depends_on:
+                prometheus_local:
+                    condition: service_healthy
+            healthcheck:
+                test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/"]
+                interval: 10s
+                timeout: 5s
+                retries: 5
+                start_period: 30s
+            deploy:
+                resources:
+                    limits:
+                        cpus: '0.5'
+                        memory: 512M
+                    reservations:
+                        cpus: '0.1'
+                        memory: 128M
 
         # A dedicated service for running one-off Go commands
         api-runner:
@@ -86,7 +337,7 @@ services:
             - ./.env
           build:
             context: .
-            dockerfile: ./docker/dockerfile-api
+            dockerfile: ./infra/docker/dockerfile-api
             target: builder
           volumes:
             - .:/app
@@ -128,7 +379,7 @@ services:
                 ENV_HTTP_HOST: 0.0.0.0
             build:
                 context: .
-                dockerfile: ./docker/dockerfile-api
+                dockerfile: ./infra/docker/dockerfile-api
                 args:
                     - APP_VERSION=0.0.0.1
                     - APP_HOST_PORT=${ENV_HTTP_PORT}
diff --git a/go.mod b/go.mod
index 642b8e8b..837d0b83 100644
--- a/go.mod
+++ b/go.mod
@@ -13,6 +13,7 @@ require (
 	github.com/joho/godotenv v1.5.1
 	github.com/klauspost/compress v1.18.0
 	github.com/lib/pq v1.10.9
+	github.com/prometheus/client_golang v1.20.5
 	github.com/rs/cors v1.11.1
 	github.com/testcontainers/testcontainers-go v0.39.0
 	github.com/testcontainers/testcontainers-go/modules/postgres v0.39.0
@@ -29,7 +30,9 @@ require (
 	dario.cat/mergo v1.0.2 // indirect
 	github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
 	github.com/Microsoft/go-winio v0.6.2 // indirect
+	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/containerd/errdefs v1.0.0 // indirect
 	github.com/containerd/errdefs/pkg v0.3.0 // indirect
 	github.com/containerd/log v0.1.0 // indirect
@@ -66,11 +69,15 @@ require (
 	github.com/moby/sys/userns v0.1.0 // indirect
 	github.com/moby/term v0.5.2 // indirect
 	github.com/morikuni/aec v1.0.0 // indirect
+	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
 	github.com/opencontainers/image-spec v1.1.1 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
+	github.com/prometheus/client_model v0.6.1 // indirect
+	github.com/prometheus/common v0.55.0 // indirect
+	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/shirou/gopsutil/v4 v4.25.9 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
 	github.com/stretchr/testify v1.11.1 // indirect
diff --git a/go.sum b/go.sum
index 81962bc9..f2d5a7aa 100644
--- a/go.sum
+++ b/go.sum
@@ -10,8 +10,12 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
 github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
 github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
 github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
 github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/chai2010/webp v1.4.0 h1:6DA2pkkRUPnbOHvvsmGI3He1hBKf/bkRlniAiSGuEko=
 github.com/chai2010/webp v1.4.0/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU=
 github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
@@ -92,6 +96,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
 github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
 github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
 github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
@@ -120,6 +126,8 @@ github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
 github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
 github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
 github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
@@ -132,6 +140,14 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
 github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
+github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
+github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
+github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
+github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
 github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA=
@@ -185,8 +201,6 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
 golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
-golang.org/toolchain v0.0.1-go1.25.3.linux-amd64 h1:OsvRiFtt0A9JsTaoQsnFK4wKOOAY2UtJvkOT+Djl7tQ=
-golang.org/toolchain v0.0.1-go1.25.3.linux-amd64/go.mod h1:c/4eKWFBYMD/i1j7ipNwtrHQP02jj74611NzmDqwkJE=
 golang.org/x/image v0.32.0 h1:6lZQWq75h7L5IWNk0r+SCpUJ6tUVd3v4ZHnbRKLkUDQ=
 golang.org/x/image v0.32.0/go.mod h1:/R37rrQmKXtO6tYXAjtDLwQgFLHmhW+V6ayXlxzP2Pc=
 golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
diff --git a/handler/metrics.go b/handler/metrics.go
new file mode 100644
index 00000000..9cbcbcdd
--- /dev/null
+++ b/handler/metrics.go
@@ -0,0 +1,23 @@
+package handler
+
+import (
+	"net/http"
+
+	"github.com/oullin/pkg/endpoint"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+)
+
+type MetricsHandler struct{}
+
+func NewMetricsHandler() MetricsHandler {
+	return MetricsHandler{}
+}
+
+// Handle returns the Prometheus metrics handler
+// Protected by Docker network isolation - only accessible from containers
+// within caddy_net and oullin_net networks (not exposed to host)
+func (h MetricsHandler) Handle(w http.ResponseWriter, r *http.Request) *endpoint.ApiError {
+	// Serve Prometheus metrics using the standard promhttp handler
+	promhttp.Handler().ServeHTTP(w, r)
+	return nil
+}
diff --git a/caddy/Caddyfile.local b/infra/caddy/Caddyfile.local
similarity index 74%
rename from caddy/Caddyfile.local
rename to infra/caddy/Caddyfile.local
index d1c84dbc..57e3819c 100644
--- a/caddy/Caddyfile.local
+++ b/infra/caddy/Caddyfile.local
@@ -2,6 +2,15 @@
 # This is the most reliable way to ensure Caddy acts as a simple HTTP proxy locally.
 {
 	auto_https off
+
+	# Enable metrics collection for HTTP handlers
+	servers {
+		metrics
+	}
+
+	# Admin API listens only on localhost within container for security
+	# Prometheus accesses /metrics via the dedicated :9180 listener, not the admin API
+	admin 127.0.0.1:2019
 }
 
 # It tells Caddy to listen on its internal port 80 for any incoming hostname.
@@ -35,8 +44,27 @@
 		respond 204
 	}
 
+	# Block protected paths
+	@protected path /metrics /generate-signature*
+	handle @protected {
+		respond 403
+	}
+
 	# Reverse proxy all incoming requests to the 'api' service.
 	# 	- The service name 'api' is resolved by Docker's internal DNS to the correct container IP on the 'caddy_net' network.
 	# 	- The API container listens on port 8080 (from the ENV_HTTP_PORT).
 	reverse_proxy api:8080
 }
+
+# INTERNAL metrics endpoint for Prometheus scraping
+# This exposes ONLY /metrics, not the full admin API
+# Listens on all interfaces but not published to host (Docker network only)
+:9180 {
+	handle /metrics {
+		reverse_proxy localhost:2019
+	}
+
+	handle {
+		respond 404
+	}
+}
diff --git a/caddy/Caddyfile.prod b/infra/caddy/Caddyfile.prod
similarity index 82%
rename from caddy/Caddyfile.prod
rename to infra/caddy/Caddyfile.prod
index 16f46287..b71aca96 100644
--- a/caddy/Caddyfile.prod
+++ b/infra/caddy/Caddyfile.prod
@@ -1,3 +1,15 @@
+# Global options: Enable the admin API and metrics
+{
+	# Enable metrics collection for HTTP handlers
+	servers {
+		metrics
+	}
+
+	# Admin API listens only on localhost within container for security
+	# Prometheus accesses /metrics via the dedicated :9180 listener, not the admin API
+	admin 127.0.0.1:2019
+}
+
 # Caddy will automatically provision a Let's Encrypt certificate.
 gocanto.dev, www.gocanto.dev {
 	log {
@@ -29,8 +41,8 @@ oullin.io {
 		format json
 	}
 
-	# --- Public listener: block protected path
-	@protected_public path /api/generate-signature*
+	# --- Public listener: block protected paths
+	@protected_public path /api/generate-signature* /api/metrics
 	handle @protected_public {
 		respond 403
 	}
@@ -117,3 +129,16 @@ oullin.io {
 		respond 403
 	}
 }
+
+# INTERNAL metrics endpoint for Prometheus scraping
+# This exposes ONLY /metrics, not the full admin API
+# Listens on all interfaces but not published to host (Docker network only)
+:9180 {
+	handle /metrics {
+		reverse_proxy localhost:2019
+	}
+
+	handle {
+		respond 404
+	}
+}
diff --git a/caddy/Dockerfile b/infra/caddy/Dockerfile
similarity index 93%
rename from caddy/Dockerfile
rename to infra/caddy/Dockerfile
index 6de850e3..69757ec0 100644
--- a/caddy/Dockerfile
+++ b/infra/caddy/Dockerfile
@@ -1,4 +1,4 @@
-# Filename: caddy/Dockerfile
+# Filename: infra/caddy/Dockerfile
 # This Dockerfile builds a Caddy image using a specific, stable version number.
 
 # Define a build argument for the Caddy version with a sensible default.
diff --git a/caddy/mtls/.gitkeep b/infra/caddy/mtls/.gitkeep
similarity index 100%
rename from caddy/mtls/.gitkeep
rename to infra/caddy/mtls/.gitkeep
diff --git a/caddy/readme.md b/infra/caddy/readme.md
similarity index 100%
rename from caddy/readme.md
rename to infra/caddy/readme.md
diff --git a/docker/dockerfile-api b/infra/docker/dockerfile-api
similarity index 100%
rename from docker/dockerfile-api
rename to infra/docker/dockerfile-api
diff --git a/metal/makefile/app.mk b/infra/makefile/app.mk
similarity index 66%
rename from metal/makefile/app.mk
rename to infra/makefile/app.mk
index b29af9bb..ad08b22a 100644
--- a/metal/makefile/app.mk
+++ b/infra/makefile/app.mk
@@ -1,12 +1,44 @@
-.PHONY: fresh destroy audit watch format run-cli test-all run-cli-docker run-metal
+# -------------------------------------------------------------------------------------------------------------------- #
+# Application Management Targets
+# -------------------------------------------------------------------------------------------------------------------- #
 
-DB_SECRET_USERNAME ?= ./database/infra/secrets/pg_username
-DB_SECRET_PASSWORD ?= ./database/infra/secrets/pg_password
-DB_SECRET_DBNAME   ?= ./database/infra/secrets/pg_dbname
+# -------------------------------------------------------------------------------------------------------------------- #
+# Configuration Variables
+# -------------------------------------------------------------------------------------------------------------------- #
+
+ROOT_PATH           := $(shell pwd)
+DB_SECRETS_DIR      := $(ROOT_PATH)/database/infra/secrets
+
+DB_SECRET_USERNAME  ?= $(DB_SECRETS_DIR)/pg_username
+DB_SECRET_PASSWORD  ?= $(DB_SECRETS_DIR)/pg_password
+DB_SECRET_DBNAME    ?= $(DB_SECRETS_DIR)/pg_dbname
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# PHONY Targets
+# -------------------------------------------------------------------------------------------------------------------- #
+
+.PHONY: fresh destroy audit watch format run-cli test-all run-cli-docker run-metal install-air
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Code Quality Commands
+# -------------------------------------------------------------------------------------------------------------------- #
 
 format:
 	gofmt -w -s .
 
+audit:
+	$(call external_deps,'.')
+	$(call external_deps,'./app/...')
+	$(call external_deps,'./database/...')
+	$(call external_deps,'./docs/...')
+
+test-all:
+	go test ./...
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Docker Management Commands
+# -------------------------------------------------------------------------------------------------------------------- #
+
 fresh:
 	docker compose down --volumes --rmi all --remove-orphans
 	docker ps
@@ -22,11 +54,9 @@ destroy:
 	docker ps -aq | xargs --no-run-if-empty docker rm && \
 	docker ps
 
-audit:
-	$(call external_deps,'.')
-	$(call external_deps,'./app/...')
-	$(call external_deps,'./database/...')
-	$(call external_deps,'./docs/...')
+# -------------------------------------------------------------------------------------------------------------------- #
+# Development Tools
+# -------------------------------------------------------------------------------------------------------------------- #
 
 watch:
 	# --- Works with (air).
@@ -39,6 +69,10 @@ install-air:
 	@echo "Installing air ..."
 	@go install github.com/air-verse/air@latest
 
+# -------------------------------------------------------------------------------------------------------------------- #
+# CLI Commands
+# -------------------------------------------------------------------------------------------------------------------- #
+
 run-cli:
 	@missing_values=""; \
 	missing_files=""; \
@@ -115,11 +149,9 @@ run-cli:
 		printf "\n$(RED)❌ CLI exited with status $$status.$(NC)\n"; \
 		exit $$status; \
 	fi
+
 run-cli-docker:
 	make run-cli DB_SECRET_USERNAME=$(DB_SECRET_USERNAME) DB_SECRET_PASSWORD=$(DB_SECRET_PASSWORD) DB_SECRET_DBNAME=$(DB_SECRET_DBNAME)
 
-test-all:
-	go test ./...
-
 run-metal:
 	go run metal/cli/main.go
diff --git a/metal/makefile/build.mk b/infra/makefile/build.mk
similarity index 100%
rename from metal/makefile/build.mk
rename to infra/makefile/build.mk
diff --git a/metal/makefile/caddy.mk b/infra/makefile/caddy.mk
similarity index 91%
rename from metal/makefile/caddy.mk
rename to infra/makefile/caddy.mk
index c2f6e748..8e71e1c0 100644
--- a/metal/makefile/caddy.mk
+++ b/infra/makefile/caddy.mk
@@ -1,8 +1,8 @@
 .PHONY: caddy-gen-certs caddy-del-certs caddy-validate caddy-fresh caddy-restart
 
-CADDY_MTLS_DIR = $(ROOT_PATH)/caddy/mtls
-APP_CADDY_CONFIG_PROD_FILE ?= caddy/Caddyfile.prod
-APP_CADDY_CONFIG_LOCAL_FILE ?= caddy/Caddyfile.local
+CADDY_MTLS_DIR = $(ROOT_PATH)/infra/caddy/mtls
+APP_CADDY_CONFIG_PROD_FILE ?= infra/caddy/Caddyfile.prod
+APP_CADDY_CONFIG_LOCAL_FILE ?= infra/caddy/Caddyfile.local
 
 caddy-restart:
 	docker compose up -d --force-recreate caddy_prod
@@ -66,6 +66,6 @@ caddy-del-certs:
 
 caddy-validate:
 	@docker run --rm \
-	  -v "$(ROOT_PATH)/caddy/Caddyfile.prod:/etc/caddy/Caddyfile:ro" \
-	  -v "$(ROOT_PATH)/caddy/mtls:/etc/caddy/mtls:ro" \
+	  -v "$(ROOT_PATH)/infra/caddy/Caddyfile.prod:/etc/caddy/Caddyfile:ro" \
+	  -v "$(ROOT_PATH)/infra/caddy/mtls:/etc/caddy/mtls:ro" \
 	  caddy:2.10.0 caddy validate --config /etc/caddy/Caddyfile
diff --git a/metal/makefile/db.mk b/infra/makefile/db.mk
similarity index 100%
rename from metal/makefile/db.mk
rename to infra/makefile/db.mk
diff --git a/metal/makefile/env.mk b/infra/makefile/env.mk
similarity index 100%
rename from metal/makefile/env.mk
rename to infra/makefile/env.mk
diff --git a/metal/makefile/helpers.mk b/infra/makefile/helpers.mk
similarity index 100%
rename from metal/makefile/helpers.mk
rename to infra/makefile/helpers.mk
diff --git a/metal/makefile/infra.mk b/infra/makefile/infra.mk
similarity index 100%
rename from metal/makefile/infra.mk
rename to infra/makefile/infra.mk
diff --git a/metal/makefile/logs.mk b/infra/makefile/logs.mk
similarity index 100%
rename from metal/makefile/logs.mk
rename to infra/makefile/logs.mk
diff --git a/infra/makefile/monitor.mk b/infra/makefile/monitor.mk
new file mode 100644
index 00000000..2288b911
--- /dev/null
+++ b/infra/makefile/monitor.mk
@@ -0,0 +1,556 @@
+# -------------------------------------------------------------------------------------------------------------------- #
+# Monitoring Stack Targets
+# -------------------------------------------------------------------------------------------------------------------- #
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Configuration Variables
+# -------------------------------------------------------------------------------------------------------------------- #
+
+ROOT_PATH           := $(shell pwd)
+MONITORING_DIR      := $(ROOT_PATH)/infra/metrics
+BACKUPS_DIR         := $(ROOT_PATH)/storage/monitoring/backups
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Volume Labels (defined in docker-compose.yml)
+# -------------------------------------------------------------------------------------------------------------------- #
+
+PROMETHEUS_VOLUME_LOCAL := prometheus_data_local
+PROMETHEUS_VOLUME_PROD  := prometheus_data_prod
+GRAFANA_VOLUME_LOCAL    := grafana_data_local
+GRAFANA_VOLUME_PROD     := grafana_data_prod
+
+# Docker service names (defined in docker-compose.yml)
+PROMETHEUS_SERVICE_LOCAL        := prometheus_local
+PROMETHEUS_SERVICE_PROD         := prometheus
+GRAFANA_SERVICE_LOCAL           := grafana_local
+GRAFANA_SERVICE_PROD            := grafana
+POSTGRES_EXPORTER_SERVICE_LOCAL := postgres_exporter_local
+POSTGRES_EXPORTER_SERVICE_PROD  := postgres_exporter
+
+# Monitoring service URLs and ports
+GRAFANA_HOST        := localhost
+GRAFANA_PORT        := 3000
+GRAFANA_URL         := http://$(GRAFANA_HOST):$(GRAFANA_PORT)
+
+PROMETHEUS_HOST     := localhost
+PROMETHEUS_PORT     := 9090
+PROMETHEUS_URL      := http://$(PROMETHEUS_HOST):$(PROMETHEUS_PORT)
+
+CADDY_ADMIN_HOST    := localhost
+CADDY_ADMIN_PORT    := 2019
+CADDY_ADMIN_URL     := http://$(CADDY_ADMIN_HOST):$(CADDY_ADMIN_PORT)
+
+API_HOST            := localhost
+API_PORT            := 18080
+API_URL             := http://$(API_HOST):$(API_PORT)
+PING_USERNAME       ?= $(ENV_PING_USERNAME)
+PING_PASSWORD       ?= $(ENV_PING_PASSWORD)
+PING_AUTH_FLAG      := $(if $(and $(PING_USERNAME),$(PING_PASSWORD)),-u $(PING_USERNAME):$(PING_PASSWORD),)
+
+# Production API endpoint (behind Caddy)
+API_PROD_HOST       := localhost
+API_PROD_URL        := http://$(API_PROD_HOST)
+
+# Internal service URLs (Docker network)
+PG_EXPORTER_HOST    := postgres_exporter_local
+PG_EXPORTER_PORT    := 9187
+PG_EXPORTER_URL     := http://$(PG_EXPORTER_HOST):$(PG_EXPORTER_PORT)
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# PHONY Targets
+# -------------------------------------------------------------------------------------------------------------------- #
+
+.PHONY: monitor-up monitor-up-prod monitor-down monitor-down-prod monitor-restart monitor-restart-prod \
+	monitor-up-full monitor-up-full-prod monitor-up-logs monitor-up-logs-prod monitor-down-remove monitor-down-remove-prod \
+	monitor-pull monitor-pull-prod monitor-docker-config monitor-docker-config-prod monitor-docker-exec-prometheus monitor-docker-exec-prometheus-prod \
+	monitor-docker-exec-grafana monitor-docker-exec-grafana-prod monitor-docker-ps monitor-docker-inspect monitor-docker-inspect-prod \
+	monitor-docker-logs-prometheus monitor-docker-logs-prometheus-prod monitor-docker-logs-grafana monitor-docker-logs-grafana-prod monitor-docker-logs-db monitor-docker-logs-db-prod \
+	monitor-status monitor-logs monitor-logs-prod \
+	monitor-test monitor-targets monitor-config monitor-config-prod monitor-grafana monitor-prometheus \
+	monitor-caddy-metrics monitor-api-metrics monitor-db-metrics monitor-db-metrics-prod monitor-metrics \
+	monitor-traffic monitor-traffic-heavy monitor-traffic-prod monitor-traffic-heavy-prod \
+	monitor-clean monitor-clean-prod monitor-stats monitor-stats-prod monitor-backup monitor-backup-prod monitor-export-dashboards monitor-help \
+	monitor-volumes-local-check monitor-volumes-prod-check
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Start/Stop Commands
+# -------------------------------------------------------------------------------------------------------------------- #
+
+## Start monitoring stack (local development)
+monitor-up:
+	@printf "$(BOLD)$(CYAN)Starting monitoring stack (local)...$(NC)\n"
+	@docker compose --profile local up -d $(PROMETHEUS_SERVICE_LOCAL) $(GRAFANA_SERVICE_LOCAL) $(POSTGRES_EXPORTER_SERVICE_LOCAL)
+	@sleep 3
+	@printf "$(BOLD)$(GREEN)✓ Monitoring stack started$(NC)\n"
+	@printf "\n$(BOLD)Access points:$(NC)\n"
+	@printf "  $(GREEN)Grafana:$(NC)     $(GRAFANA_URL)\n"
+	@printf "  $(GREEN)Prometheus:$(NC)  $(PROMETHEUS_URL)\n"
+	@printf "  $(GREEN)Caddy Admin:$(NC) $(CADDY_ADMIN_URL)\n\n"
+
+## Start monitoring stack (production)
+monitor-up-prod:
+	@printf "$(BOLD)$(CYAN)Starting monitoring stack (production)...$(NC)\n"
+	@docker compose --profile prod up -d $(PROMETHEUS_SERVICE_PROD) $(GRAFANA_SERVICE_PROD) $(POSTGRES_EXPORTER_SERVICE_PROD)
+	@sleep 3
+	@printf "$(BOLD)$(GREEN)✓ Monitoring stack started$(NC)\n"
+	@printf "\n$(BOLD)Access points (from server):$(NC)\n"
+	@printf "  $(GREEN)Grafana:$(NC)     $(GRAFANA_URL)\n"
+	@printf "  $(GREEN)Prometheus:$(NC)  $(PROMETHEUS_URL)\n"
+	@printf "  $(GREEN)Caddy Admin:$(NC) $(CADDY_ADMIN_URL)\n\n"
+
+## Stop monitoring stack (local)
+monitor-down:
+	@printf "$(BOLD)$(CYAN)Stopping monitoring stack (local)...$(NC)\n"
+	@docker compose --profile local stop $(PROMETHEUS_SERVICE_LOCAL) $(GRAFANA_SERVICE_LOCAL) $(POSTGRES_EXPORTER_SERVICE_LOCAL)
+	@printf "$(BOLD)$(GREEN)✓ Monitoring stack stopped$(NC)\n\n"
+
+## Stop monitoring stack (production)
+monitor-down-prod:
+	@printf "$(BOLD)$(CYAN)Stopping monitoring stack (production)...$(NC)\n"
+	@docker compose --profile prod stop $(PROMETHEUS_SERVICE_PROD) $(GRAFANA_SERVICE_PROD) $(POSTGRES_EXPORTER_SERVICE_PROD)
+	@printf "$(BOLD)$(GREEN)✓ Monitoring stack stopped$(NC)\n\n"
+
+## Restart monitoring stack (local)
+monitor-restart:
+	@printf "$(BOLD)$(CYAN)Restarting monitoring stack (local)...$(NC)\n"
+	@docker compose --profile local restart $(PROMETHEUS_SERVICE_LOCAL) $(GRAFANA_SERVICE_LOCAL) $(POSTGRES_EXPORTER_SERVICE_LOCAL)
+	@printf "$(BOLD)$(GREEN)✓ Monitoring stack restarted$(NC)\n\n"
+
+## Restart monitoring stack (production)
+monitor-restart-prod:
+	@printf "$(BOLD)$(CYAN)Restarting monitoring stack (production)...$(NC)\n"
+	@docker compose --profile prod restart $(PROMETHEUS_SERVICE_PROD) $(GRAFANA_SERVICE_PROD) $(POSTGRES_EXPORTER_SERVICE_PROD)
+	@printf "$(BOLD)$(GREEN)✓ Monitoring stack restarted$(NC)\n\n"
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Docker Compose Commands
+# -------------------------------------------------------------------------------------------------------------------- #
+
+## Start monitoring with full stack (API + DB + monitoring) - local
+monitor-up-full:
+	@printf "$(BOLD)$(CYAN)Starting full stack with monitoring (local)...$(NC)\n"
+	@docker compose --profile local up -d
+	@sleep 3
+	@printf "$(BOLD)$(GREEN)✓ Full stack started$(NC)\n\n"
+
+## Start monitoring with full stack (API + DB + monitoring) - production
+monitor-up-full-prod:
+	@printf "$(BOLD)$(CYAN)Starting full stack with monitoring (production)...$(NC)\n"
+	@docker compose --profile prod up -d
+	@sleep 3
+	@printf "$(BOLD)$(GREEN)✓ Full stack started$(NC)\n\n"
+
+## Start monitoring stack with logs (foreground) - local
+monitor-up-logs:
+	@printf "$(BOLD)$(CYAN)Starting monitoring stack with logs (local)...$(NC)\n"
+	@docker compose --profile local up $(PROMETHEUS_SERVICE_LOCAL) $(GRAFANA_SERVICE_LOCAL) $(POSTGRES_EXPORTER_SERVICE_LOCAL)
+
+## Start monitoring stack with logs (foreground) - production
+monitor-up-logs-prod:
+	@printf "$(BOLD)$(CYAN)Starting monitoring stack with logs (production)...$(NC)\n"
+	@docker compose --profile prod up $(PROMETHEUS_SERVICE_PROD) $(GRAFANA_SERVICE_PROD) $(POSTGRES_EXPORTER_SERVICE_PROD)
+
+## Stop and remove monitoring containers - local
+monitor-down-remove:
+	@printf "$(BOLD)$(CYAN)Stopping and removing monitoring containers (local)...$(NC)\n"
+	@docker compose --profile local down $(PROMETHEUS_SERVICE_LOCAL) $(GRAFANA_SERVICE_LOCAL) $(POSTGRES_EXPORTER_SERVICE_LOCAL)
+	@printf "$(BOLD)$(GREEN)✓ Containers stopped and removed$(NC)\n\n"
+
+## Stop and remove monitoring containers - production
+monitor-down-remove-prod:
+	@printf "$(BOLD)$(CYAN)Stopping and removing monitoring containers (production)...$(NC)\n"
+	@docker compose --profile prod down $(PROMETHEUS_SERVICE_PROD) $(GRAFANA_SERVICE_PROD) $(POSTGRES_EXPORTER_SERVICE_PROD)
+	@printf "$(BOLD)$(GREEN)✓ Containers stopped and removed$(NC)\n\n"
+
+## Pull latest monitoring images (local)
+monitor-pull:
+	@printf "$(BOLD)$(CYAN)Pulling latest monitoring images (local)...$(NC)\n"
+	@docker compose pull $(PROMETHEUS_SERVICE_LOCAL) $(GRAFANA_SERVICE_LOCAL) $(POSTGRES_EXPORTER_SERVICE_LOCAL)
+	@printf "$(BOLD)$(GREEN)✓ Images pulled$(NC)\n\n"
+
+## Pull latest monitoring images (production)
+monitor-pull-prod:
+	@printf "$(BOLD)$(CYAN)Pulling latest monitoring images (production)...$(NC)\n"
+	@docker compose pull $(PROMETHEUS_SERVICE_PROD) $(GRAFANA_SERVICE_PROD) $(POSTGRES_EXPORTER_SERVICE_PROD)
+	@printf "$(BOLD)$(GREEN)✓ Images pulled$(NC)\n\n"
+
+## Show docker compose config for monitoring services (local)
+monitor-docker-config:
+	@printf "$(BOLD)$(CYAN)Docker Compose Configuration (monitoring - local)$(NC)\n\n"
+	@docker compose config --profile local | grep -A 20 "$(PROMETHEUS_SERVICE_LOCAL)\|$(GRAFANA_SERVICE_LOCAL)\|$(POSTGRES_EXPORTER_SERVICE_LOCAL)" || docker compose config --profile local
+
+## Show docker compose config for monitoring services (production)
+monitor-docker-config-prod:
+	@printf "$(BOLD)$(CYAN)Docker Compose Configuration (monitoring - production)$(NC)\n\n"
+	@docker compose config --profile prod | grep -A 20 "$(PROMETHEUS_SERVICE_PROD)\|$(GRAFANA_SERVICE_PROD)\|$(POSTGRES_EXPORTER_SERVICE_PROD)" || docker compose config --profile prod
+
+## Execute command in Prometheus container (local)
+monitor-docker-exec-prometheus:
+	@printf "$(BOLD)$(CYAN)Executing shell in Prometheus container (local)...$(NC)\n"
+	@docker exec -it oullin_prometheus_local /bin/sh
+
+## Execute command in Prometheus container (production)
+monitor-docker-exec-prometheus-prod:
+	@printf "$(BOLD)$(CYAN)Executing shell in Prometheus container (production)...$(NC)\n"
+	@docker exec -it oullin_prometheus /bin/sh
+
+## Execute command in Grafana container (local)
+monitor-docker-exec-grafana:
+	@printf "$(BOLD)$(CYAN)Executing shell in Grafana container (local)...$(NC)\n"
+	@docker exec -it oullin_grafana_local /bin/sh
+
+## Execute command in Grafana container (production)
+monitor-docker-exec-grafana-prod:
+	@printf "$(BOLD)$(CYAN)Executing shell in Grafana container (production)...$(NC)\n"
+	@docker exec -it oullin_grafana /bin/sh
+
+## Show docker ps for monitoring containers
+monitor-docker-ps:
+	@printf "$(BOLD)$(CYAN)Monitoring Containers$(NC)\n\n"
+	@docker ps --filter "name=prometheus" --filter "name=grafana" --filter "name=exporter" --format "table {{.ID}}\t{{.Names}}\t{{.Status}}\t{{.Ports}}"
+	@printf "\n"
+
+## Show docker inspect for monitoring containers (local)
+monitor-docker-inspect:
+	@printf "$(BOLD)$(CYAN)Inspecting Monitoring Containers (local)$(NC)\n\n"
+	@docker inspect oullin_prometheus_local oullin_grafana_local oullin_postgres_exporter_local 2>/dev/null | jq '.[].Name, .[].State, .[].NetworkSettings.Networks' || echo "$(RED)Containers not running$(NC)"
+
+## Show docker inspect for monitoring containers (production)
+monitor-docker-inspect-prod:
+	@printf "$(BOLD)$(CYAN)Inspecting Monitoring Containers (production)$(NC)\n\n"
+	@docker inspect oullin_prometheus oullin_grafana oullin_postgres_exporter 2>/dev/null | jq '.[].Name, .[].State, .[].NetworkSettings.Networks' || echo "$(RED)Containers not running$(NC)"
+
+## View monitoring container logs (docker logs - local)
+monitor-docker-logs-prometheus:
+	@docker logs -f oullin_prometheus_local
+
+monitor-docker-logs-grafana:
+	@docker logs -f oullin_grafana_local
+
+monitor-docker-logs-db:
+	@docker logs -f oullin_postgres_exporter_local
+
+## View monitoring container logs (docker logs - production)
+monitor-docker-logs-prometheus-prod:
+	@docker logs -f oullin_prometheus
+
+monitor-docker-logs-grafana-prod:
+	@docker logs -f oullin_grafana
+
+monitor-docker-logs-db-prod:
+	@docker logs -f oullin_postgres_exporter
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Status & Information Commands
+# -------------------------------------------------------------------------------------------------------------------- #
+
+## Show status of monitoring services
+monitor-status:
+	@printf "$(BOLD)$(CYAN)Monitoring Stack Status$(NC)\n\n"
+	@docker ps --filter "name=prometheus" --filter "name=grafana" --filter "name=exporter" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
+	@printf "\n"
+
+## Show logs from all monitoring services (local)
+monitor-logs:
+	@printf "$(BOLD)$(CYAN)Monitoring Stack Logs (local)$(NC)\n\n"
+	@docker compose logs -f $(PROMETHEUS_SERVICE_LOCAL) $(GRAFANA_SERVICE_LOCAL) $(POSTGRES_EXPORTER_SERVICE_LOCAL)
+
+## Show logs from all monitoring services (production)
+monitor-logs-prod:
+	@printf "$(BOLD)$(CYAN)Monitoring Stack Logs (production)$(NC)\n\n"
+	@docker compose logs -f $(PROMETHEUS_SERVICE_PROD) $(GRAFANA_SERVICE_PROD) $(POSTGRES_EXPORTER_SERVICE_PROD)
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Testing & Verification Commands
+# -------------------------------------------------------------------------------------------------------------------- #
+
+## Run full monitoring stack test suite (local profile only)
+monitor-test:
+	@printf "$(BOLD)$(CYAN)Running monitoring stack tests (local profile)...$(NC)\n"
+	@printf "$(YELLOW)Note: This target is for local development only.$(NC)\n"
+	@printf "$(YELLOW)For production, verify monitoring from the server directly.$(NC)\n\n"
+	@printf "$(BOLD)1. Checking services are running...$(NC)\n"
+	@docker ps --filter "name=$(PROMETHEUS_SERVICE_LOCAL)" --filter "name=$(GRAFANA_SERVICE_LOCAL)" --filter "name=$(POSTGRES_EXPORTER_SERVICE_LOCAL)" --format "  ✓ {{.Names}}: {{.Status}}" || echo "  $(RED)✗ Services not running$(NC)"
+	@printf "\n$(BOLD)2. Testing Prometheus targets...$(NC)\n"
+	@curl -s $(PROMETHEUS_URL)/api/v1/targets | grep -q '"health":"up"' && echo "  $(GREEN)✓ Prometheus targets are UP$(NC)" || echo "  $(RED)✗ Some targets are DOWN$(NC)"
+	@printf "\n$(BOLD)3. Testing Caddy metrics endpoint...$(NC)\n"
+	@curl -s $(CADDY_ADMIN_URL)/metrics | grep -q "caddy_http_requests_total" && echo "  $(GREEN)✓ Caddy metrics accessible$(NC)" || echo "  $(RED)✗ Caddy metrics unavailable$(NC)"
+	@printf "\n$(BOLD)4. Testing API metrics endpoint...$(NC)\n"
+	@curl -s $(API_URL)/metrics | grep -q "go_goroutines" && echo "  $(GREEN)✓ API metrics accessible$(NC)" || echo "  $(RED)✗ API metrics unavailable$(NC)"
+	@printf "\n$(BOLD)5. Testing Grafana...$(NC)\n"
+	@curl -s $(GRAFANA_URL)/api/health | grep -q "ok" && echo "  $(GREEN)✓ Grafana is healthy$(NC)" || echo "  $(RED)✗ Grafana is unhealthy$(NC)"
+	@printf "\n$(BOLD)$(GREEN)Test suite completed!$(NC)\n\n"
+
+## Verify Prometheus targets status
+monitor-targets:
+	@printf "$(BOLD)$(CYAN)Prometheus Targets Status$(NC)\n\n"
+	@curl -s $(PROMETHEUS_URL)/api/v1/targets | jq -r '.data.activeTargets[] | "[\(.health | ascii_upcase)] \(.labels.job) - \(.scrapeUrl)"' || echo "$(RED)Failed to fetch targets. Is Prometheus running?$(NC)"
+	@printf "\n"
+
+## Check Prometheus configuration (local)
+monitor-config:
+	@printf "$(BOLD)$(CYAN)Prometheus Configuration (local)$(NC)\n\n"
+	@docker exec oullin_prometheus_local cat /etc/prometheus/prometheus.yml
+
+## Check Prometheus configuration (production)
+monitor-config-prod:
+	@printf "$(BOLD)$(CYAN)Prometheus Configuration (production)$(NC)\n\n"
+	@docker exec oullin_prometheus cat /etc/prometheus/prometheus.yml
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Metrics Access Commands
+# -------------------------------------------------------------------------------------------------------------------- #
+
+## Open Grafana in browser
+monitor-grafana:
+	@printf "$(BOLD)$(CYAN)Opening Grafana...$(NC)\n"
+	@printf "URL: $(GREEN)$(GRAFANA_URL)$(NC)\n"
+	@printf "Credentials: admin / (set via GRAFANA_ADMIN_PASSWORD)\n\n"
+	@which xdg-open > /dev/null && xdg-open $(GRAFANA_URL) || which open > /dev/null && open $(GRAFANA_URL) || echo "Please open $(GRAFANA_URL) in your browser"
+
+## Open Prometheus in browser
+monitor-prometheus:
+	@printf "$(BOLD)$(CYAN)Opening Prometheus...$(NC)\n"
+	@printf "URL: $(GREEN)$(PROMETHEUS_URL)$(NC)\n\n"
+	@which xdg-open > /dev/null && xdg-open $(PROMETHEUS_URL) || which open > /dev/null && open $(PROMETHEUS_URL) || echo "Please open $(PROMETHEUS_URL) in your browser"
+
+## Show Caddy metrics
+monitor-caddy-metrics:
+	@printf "$(BOLD)$(CYAN)Caddy Metrics$(NC)\n\n"
+	@curl -s $(CADDY_ADMIN_URL)/metrics | grep "^caddy_" | head -20
+	@printf "\n$(YELLOW)... (showing first 20 metrics)$(NC)\n"
+	@printf "Full metrics: $(GREEN)$(CADDY_ADMIN_URL)/metrics$(NC)\n\n"
+
+## Show API metrics
+monitor-api-metrics:
+	@printf "$(BOLD)$(CYAN)API Metrics$(NC)\n\n"
+	@curl -s $(API_URL)/metrics | grep "^go_" | head -20
+	@printf "\n$(YELLOW)... (showing first 20 metrics)$(NC)\n"
+	@printf "Full metrics: $(GREEN)$(API_URL)/metrics$(NC)\n\n"
+
+## Show PostgreSQL metrics (local)
+monitor-db-metrics:
+	@printf "$(BOLD)$(CYAN)PostgreSQL Metrics (local)$(NC)\n\n"
+	@docker exec oullin_prometheus_local curl -s $(PG_EXPORTER_URL)/metrics | grep "^pg_" | head -20
+	@printf "\n$(YELLOW)... (showing first 20 metrics)$(NC)\n\n"
+
+## Show PostgreSQL metrics (production)
+monitor-db-metrics-prod:
+	@printf "$(BOLD)$(CYAN)PostgreSQL Metrics (production)$(NC)\n\n"
+	@docker exec oullin_prometheus curl -s http://postgres_exporter:9187/metrics | grep "^pg_" | head -20
+	@printf "\n$(YELLOW)... (showing first 20 metrics)$(NC)\n\n"
+
+## Show all metrics endpoints
+monitor-metrics:
+	@printf "$(BOLD)$(CYAN)Available Metrics Endpoints$(NC)\n\n"
+	@printf "  $(GREEN)Caddy:$(NC)      $(CADDY_ADMIN_URL)/metrics\n"
+	@printf "  $(GREEN)API:$(NC)        $(API_URL)/metrics\n"
+	@printf "  $(GREEN)PostgreSQL:$(NC) $(PG_EXPORTER_URL)/metrics (internal)\n"
+	@printf "  $(GREEN)Prometheus:$(NC) $(PROMETHEUS_URL)/metrics\n\n"
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Traffic Generation & Testing
+# -------------------------------------------------------------------------------------------------------------------- #
+
+## Generate test traffic to populate metrics (local profile)
+monitor-traffic:
+	@if [ -z "$(PING_USERNAME)" ] || [ -z "$(PING_PASSWORD)" ]; then \
+		printf "$(RED)Missing ping credentials. Export ENV_PING_USERNAME/ENV_PING_PASSWORD or pass PING_USERNAME/PING_PASSWORD to make.$(NC)\n"; \
+		exit 1; \
+	fi
+	@printf "$(BOLD)$(CYAN)Generating test traffic (local)...$(NC)\n"
+	@printf "Making 100 requests to /ping endpoint...\n"
+	@for i in $$(seq 1 100); do \
+		curl -s $(PING_AUTH_FLAG) $(API_URL)/ping > /dev/null && printf "." || printf "$(RED)✗$(NC)"; \
+		sleep 0.1; \
+	done
+	@printf "\n$(BOLD)$(GREEN)✓ Test traffic generated$(NC)\n"
+	@printf "\nCheck dashboards at: $(GREEN)$(GRAFANA_URL)$(NC)\n\n"
+
+## Generate heavy test traffic (local profile)
+monitor-traffic-heavy:
+	@printf "$(BOLD)$(CYAN)Generating heavy test traffic (local)...$(NC)\n"
+	@printf "Making 500 requests with 5 concurrent connections...\n"
+	@for i in $$(seq 1 100); do \
+		(for j in $$(seq 1 5); do curl -s $(API_URL)/ping > /dev/null & done; wait); \
+		printf "."; \
+		sleep 0.05; \
+	done
+	@printf "\n$(BOLD)$(GREEN)✓ Heavy test traffic generated$(NC)\n\n"
+
+## Generate test traffic to populate metrics (production profile)
+monitor-traffic-prod:
+	@printf "$(BOLD)$(CYAN)Generating test traffic (production)...$(NC)\n"
+	@printf "Making 100 requests to /api/ping endpoint...\n"
+	@for i in $$(seq 1 100); do \
+		curl -s $(API_PROD_URL)/api/ping > /dev/null && printf "." || printf "$(RED)✗$(NC)"; \
+		sleep 0.1; \
+	done
+	@printf "\n$(BOLD)$(GREEN)✓ Test traffic generated$(NC)\n"
+	@printf "\n$(YELLOW)Note: Run this from the production server$(NC)\n"
+	@printf "SSH tunnel for Grafana: $(GREEN)ssh -L 3000:localhost:3000 user@server$(NC)\n\n"
+
+## Generate heavy test traffic (production profile)
+monitor-traffic-heavy-prod:
+	@printf "$(BOLD)$(CYAN)Generating heavy test traffic (production)...$(NC)\n"
+	@printf "Making 500 requests with 5 concurrent connections...\n"
+	@for i in $$(seq 1 100); do \
+		(for j in $$(seq 1 5); do curl -s $(API_PROD_URL)/api/ping > /dev/null & done; wait); \
+		printf "."; \
+		sleep 0.05; \
+	done
+	@printf "\n$(BOLD)$(GREEN)✓ Heavy test traffic generated$(NC)\n"
+	@printf "\n$(YELLOW)Note: Run this from the production server$(NC)\n\n"
+
+# -------------------------------------------------------------------------------------------------------------------- #
+# Utility Commands
+# -------------------------------------------------------------------------------------------------------------------- #
+
+## Clean monitoring data (removes all metrics/dashboard data) - local
+monitor-clean: monitor-volumes-local-check
+	@printf "$(BOLD)$(RED)WARNING: This will delete all monitoring data (local)!$(NC)\n"
+	@printf "Press Ctrl+C to cancel, or Enter to continue..."
+	@read
+	@printf "$(BOLD)$(CYAN)Stopping monitoring stack...$(NC)\n"
+	@docker compose --profile local down $(PROMETHEUS_SERVICE_LOCAL) $(GRAFANA_SERVICE_LOCAL)
+	@printf "$(BOLD)$(CYAN)Removing volumes...$(NC)\n"
+	@docker volume rm -f $(PROMETHEUS_VOLUME_LOCAL) $(GRAFANA_VOLUME_LOCAL) || true
+	@printf "$(BOLD)$(GREEN)✓ Monitoring data cleaned$(NC)\n\n"
+
+## Clean monitoring data (removes all metrics/dashboard data) - production
+monitor-clean-prod: monitor-volumes-prod-check
+	@printf "$(BOLD)$(RED)WARNING: This will delete all monitoring data (production)!$(NC)\n"
+	@printf "Press Ctrl+C to cancel, or Enter to continue..."
+	@read
+	@printf "$(BOLD)$(CYAN)Stopping monitoring stack...$(NC)\n"
+	@docker compose --profile prod down $(PROMETHEUS_SERVICE_PROD) $(GRAFANA_SERVICE_PROD)
+	@printf "$(BOLD)$(CYAN)Removing volumes...$(NC)\n"
+	@docker volume rm -f $(PROMETHEUS_VOLUME_PROD) $(GRAFANA_VOLUME_PROD) || true
+	@printf "$(BOLD)$(GREEN)✓ Monitoring data cleaned$(NC)\n\n"
+
+## Show monitoring stack resource usage (local)
+monitor-stats:
+	@printf "$(BOLD)$(CYAN)Monitoring Stack Resource Usage (local)$(NC)\n\n"
+	@docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}" \
+		oullin_prometheus_local oullin_grafana_local oullin_postgres_exporter_local 2>/dev/null || \
+		echo "$(RED)No monitoring containers running$(NC)"
+	@printf "\n"
+
+## Show monitoring stack resource usage (production)
+monitor-stats-prod:
+	@printf "$(BOLD)$(CYAN)Monitoring Stack Resource Usage (production)$(NC)\n\n"
+	@docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}" \
+		oullin_prometheus oullin_grafana oullin_postgres_exporter 2>/dev/null || \
+		echo "$(RED)No monitoring containers running$(NC)"
+	@printf "\n"
+
+## Backup Prometheus data (with automatic rotation) - local
+monitor-backup: monitor-volumes-local-check
+	@printf "$(BOLD)$(CYAN)Backing up Prometheus data (local)...$(NC)\n"
+	@mkdir -p $(BACKUPS_DIR)
+	@docker run --rm -v $(PROMETHEUS_VOLUME_LOCAL):/data -v $(BACKUPS_DIR):/backup alpine \
+		tar czf /backup/prometheus-backup-$$(date +%Y%m%d-%H%M%S).tar.gz /data
+	@printf "$(BOLD)$(GREEN)✓ Backup created in $(BACKUPS_DIR)/$(NC)\n"
+	@printf "$(YELLOW)Rotating backups (keeping last 5)...$(NC)\n"
+	@for f in $$(ls -t $(BACKUPS_DIR)/prometheus-backup-*.tar.gz 2>/dev/null | tail -n +6); do rm -f "$$f"; done || true
+	@BACKUP_COUNT=$$(ls -1 $(BACKUPS_DIR)/prometheus-backup-*.tar.gz 2>/dev/null | wc -l); \
+		printf "$(BOLD)$(GREEN)✓ Backup rotation complete ($${BACKUP_COUNT} backups kept)$(NC)\n\n"
+
+monitor-volumes-local-check:
+	@[ -n "$(PROMETHEUS_VOLUME_LOCAL)" ] && [ -n "$(GRAFANA_VOLUME_LOCAL)" ] || \
+		{ printf "$(RED)Unable to resolve monitoring volumes from docker compose config (local profile).$(NC)\n"; exit 1; }
+
+## Backup Prometheus data (with automatic rotation) - production
+monitor-backup-prod: monitor-volumes-prod-check
+	@printf "$(BOLD)$(CYAN)Backing up Prometheus data (production)...$(NC)\n"
+	@mkdir -p $(BACKUPS_DIR)
+	@docker run --rm -v $(PROMETHEUS_VOLUME_PROD):/data -v $(BACKUPS_DIR):/backup alpine \
+		tar czf /backup/prometheus-prod-backup-$$(date +%Y%m%d-%H%M%S).tar.gz /data
+	@printf "$(BOLD)$(GREEN)✓ Backup created in $(BACKUPS_DIR)/$(NC)\n"
+	@printf "$(YELLOW)Rotating backups (keeping last 5)...$(NC)\n"
+	@for f in $$(ls -t $(BACKUPS_DIR)/prometheus-prod-backup-*.tar.gz 2>/dev/null | tail -n +6); do rm -f "$$f"; done || true
+	@BACKUP_COUNT=$$(ls -1 $(BACKUPS_DIR)/prometheus-prod-backup-*.tar.gz 2>/dev/null | wc -l); \
+		printf "$(BOLD)$(GREEN)✓ Backup rotation complete ($${BACKUP_COUNT} backups kept)$(NC)\n\n"
+
+monitor-volumes-prod-check:
+	@[ -n "$(PROMETHEUS_VOLUME_PROD)" ] && [ -n "$(GRAFANA_VOLUME_PROD)" ] || \
+		{ printf "$(RED)Unable to resolve monitoring volumes from docker compose config (production profile).$(NC)\n"; exit 1; }
+
+## Export Grafana dashboards to JSON files
+monitor-export-dashboards:
+	@printf "$(BOLD)$(CYAN)Exporting Grafana dashboards...$(NC)\n"
+	@$(MONITORING_DIR)/grafana/scripts/export-dashboards.sh
+
+## Show monitoring help
+monitor-help:
+	@printf "\n$(BOLD)$(CYAN)Monitoring Stack Commands$(NC)\n\n"
+	@printf "$(BOLD)$(BLUE)Start/Stop:$(NC)\n"
+	@printf "  $(GREEN)monitor-up$(NC)                         - Start monitoring stack (local)\n"
+	@printf "  $(GREEN)monitor-up-prod$(NC)                    - Start monitoring stack (production)\n"
+	@printf "  $(GREEN)monitor-up-full$(NC)                    - Start full stack with monitoring (local)\n"
+	@printf "  $(GREEN)monitor-up-full-prod$(NC)               - Start full stack with monitoring (prod)\n"
+	@printf "  $(GREEN)monitor-up-logs$(NC)                    - Start with logs in foreground (local)\n"
+	@printf "  $(GREEN)monitor-up-logs-prod$(NC)               - Start with logs in foreground (prod)\n"
+	@printf "  $(GREEN)monitor-down$(NC)                       - Stop monitoring stack (local)\n"
+	@printf "  $(GREEN)monitor-down-prod$(NC)                  - Stop monitoring stack (production)\n"
+	@printf "  $(GREEN)monitor-down-remove$(NC)                - Stop and remove containers (local)\n"
+	@printf "  $(GREEN)monitor-down-remove-prod$(NC)           - Stop and remove containers (prod)\n"
+	@printf "  $(GREEN)monitor-restart$(NC)                    - Restart monitoring stack (local)\n"
+	@printf "  $(GREEN)monitor-restart-prod$(NC)               - Restart monitoring stack (prod)\n\n"
+	@printf "$(BOLD)$(BLUE)Docker Commands:$(NC)\n"
+	@printf "  $(GREEN)monitor-docker-ps$(NC)                  - Show running monitoring containers\n"
+	@printf "  $(GREEN)monitor-docker-config$(NC)              - Show docker compose config (local)\n"
+	@printf "  $(GREEN)monitor-docker-config-prod$(NC)         - Show docker compose config (prod)\n"
+	@printf "  $(GREEN)monitor-docker-inspect$(NC)             - Inspect monitoring containers (local)\n"
+	@printf "  $(GREEN)monitor-docker-inspect-prod$(NC)        - Inspect monitoring containers (prod)\n"
+	@printf "  $(GREEN)monitor-docker-exec-prometheus$(NC)     - Shell into Prometheus container (local)\n"
+	@printf "  $(GREEN)monitor-docker-exec-prometheus-prod$(NC)- Shell into Prometheus container (prod)\n"
+	@printf "  $(GREEN)monitor-docker-exec-grafana$(NC)        - Shell into Grafana container (local)\n"
+	@printf "  $(GREEN)monitor-docker-exec-grafana-prod$(NC)   - Shell into Grafana container (prod)\n"
+	@printf "  $(GREEN)monitor-docker-logs-prometheus$(NC)     - Docker logs for Prometheus (local)\n"
+	@printf "  $(GREEN)monitor-docker-logs-prometheus-prod$(NC)- Docker logs for Prometheus (prod)\n"
+	@printf "  $(GREEN)monitor-docker-logs-grafana$(NC)        - Docker logs for Grafana (local)\n"
+	@printf "  $(GREEN)monitor-docker-logs-grafana-prod$(NC)   - Docker logs for Grafana (prod)\n"
+	@printf "  $(GREEN)monitor-docker-logs-db$(NC)             - Docker logs for DB exporter (local)\n"
+	@printf "  $(GREEN)monitor-docker-logs-db-prod$(NC)        - Docker logs for DB exporter (prod)\n"
+	@printf "  $(GREEN)monitor-pull$(NC)                       - Pull latest monitoring images (local)\n"
+	@printf "  $(GREEN)monitor-pull-prod$(NC)                  - Pull latest monitoring images (prod)\n\n"
+	@printf "$(BOLD)$(BLUE)Status & Logs:$(NC)\n"
+	@printf "  $(GREEN)monitor-status$(NC)                     - Show status of monitoring services\n"
+	@printf "  $(GREEN)monitor-logs$(NC)                       - Show logs from all services (local)\n"
+	@printf "  $(GREEN)monitor-logs-prod$(NC)                  - Show logs from all services (prod)\n\n"
+	@printf "$(BOLD)$(BLUE)Testing:$(NC)\n"
+	@printf "  $(GREEN)monitor-test$(NC)                       - Run full test suite (local only)\n"
+	@printf "  $(GREEN)monitor-targets$(NC)                    - Show Prometheus targets status\n"
+	@printf "  $(GREEN)monitor-traffic$(NC)                    - Generate test traffic (local)\n"
+	@printf "  $(GREEN)monitor-traffic-heavy$(NC)              - Generate heavy test traffic (local)\n"
+	@printf "  $(GREEN)monitor-traffic-prod$(NC)               - Generate test traffic (production)\n"
+	@printf "  $(GREEN)monitor-traffic-heavy-prod$(NC)         - Generate heavy test traffic (prod)\n\n"
+	@printf "$(BOLD)$(BLUE)Access:$(NC)\n"
+	@printf "  $(GREEN)monitor-grafana$(NC)                    - Open Grafana in browser\n"
+	@printf "  $(GREEN)monitor-prometheus$(NC)                 - Open Prometheus in browser\n"
+	@printf "  $(GREEN)monitor-metrics$(NC)                    - Show all metrics endpoints\n"
+	@printf "  $(GREEN)monitor-caddy-metrics$(NC)              - Show Caddy metrics\n"
+	@printf "  $(GREEN)monitor-api-metrics$(NC)                - Show API metrics\n"
+	@printf "  $(GREEN)monitor-db-metrics$(NC)                 - Show PostgreSQL metrics (local)\n"
+	@printf "  $(GREEN)monitor-db-metrics-prod$(NC)            - Show PostgreSQL metrics (prod)\n\n"
+	@printf "$(BOLD)$(BLUE)Utilities:$(NC)\n"
+	@printf "  $(GREEN)monitor-stats$(NC)                      - Show resource usage (local)\n"
+	@printf "  $(GREEN)monitor-stats-prod$(NC)                 - Show resource usage (prod)\n"
+	@printf "  $(GREEN)monitor-config$(NC)                     - Show Prometheus config (local)\n"
+	@printf "  $(GREEN)monitor-config-prod$(NC)                - Show Prometheus config (prod)\n"
+	@printf "  $(GREEN)monitor-backup$(NC)                     - Backup Prometheus data (local)\n"
+	@printf "  $(GREEN)monitor-backup-prod$(NC)                - Backup Prometheus data (prod)\n"
+	@printf "  $(GREEN)monitor-export-dashboards$(NC)          - Export Grafana dashboards to JSON\n"
+	@printf "  $(GREEN)monitor-clean$(NC)                      - Clean all monitoring data (local)\n"
+	@printf "  $(GREEN)monitor-clean-prod$(NC)                 - Clean all monitoring data (prod)\n\n"
+	@printf "$(BOLD)Quick Start:$(NC)\n"
+	@printf "  1. $(YELLOW)make monitor-up$(NC)           - Start the stack\n"
+	@printf "  2. $(YELLOW)make monitor-test$(NC)         - Verify everything works\n"
+	@printf "  3. $(YELLOW)make monitor-traffic$(NC)      - Generate some traffic\n"
+	@printf "  4. $(YELLOW)make monitor-grafana$(NC)      - Open dashboards\n\n"
+	@printf "$(BOLD)Docker Compose Examples:$(NC)\n"
+	@printf "  $(YELLOW)docker compose --profile local up -d$(NC)           - Start local stack\n"
+	@printf "  $(YELLOW)docker compose --profile prod up -d$(NC)            - Start prod stack\n"
+	@printf "  $(YELLOW)docker ps --filter name=prometheus$(NC)             - List containers\n"
+	@printf "  $(YELLOW)docker exec -it oullin_prometheus_local /bin/sh$(NC) - Shell access\n\n"
diff --git a/infra/metrics/README.md b/infra/metrics/README.md
new file mode 100644
index 00000000..81112578
--- /dev/null
+++ b/infra/metrics/README.md
@@ -0,0 +1,712 @@
+# Monitoring Stack Documentation
+
+Complete guide for managing and monitoring the Oullin application stack with Prometheus, Grafana, and related tools.
+
+## Table of Contents
+
+1. [Overview](#overview)
+2. [Quick Start](#quick-start)
+3. [Security Model](#security-model)
+4. [Grafana Dashboards](#grafana-dashboards)
+5. [Creating Custom Dashboards](#creating-custom-dashboards)
+6. [Prometheus Queries](#prometheus-queries)
+7. [Troubleshooting](#troubleshooting)
+8. [Maintenance & Backup](#maintenance--backup)
+9. [Resources](#resources)
+
+**For VPS deployment instructions, see [VPS_DEPLOYMENT.md](./VPS_DEPLOYMENT.md)**
+
+---
+
+## Overview
+
+### Stack Components
+
+- **Prometheus**: Metrics collection and time-series storage
+- **Grafana**: Visualization dashboards and alerting
+- **postgres_exporter**: PostgreSQL database metrics
+- **Caddy Admin API**: Reverse proxy metrics
+
+### Pre-configured Dashboards
+
+Three dashboards are automatically provisioned:
+
+1. **Oullin - Overview** (`grafana/dashboards/oullin-overview-oullin-overview.json`)
+   - Caddy request rate
+   - PostgreSQL active connections
+   - HTTP requests by status code
+   - API memory usage and goroutines
+
+2. **PostgreSQL - Database Metrics** (`grafana/dashboards/oullin-postgresql-postgresql-database-metrics.json`)
+   - Active connections
+   - Database size
+   - Transaction rates
+   - Cache hit ratio
+   - Lock statistics
+
+3. **Caddy - Proxy Metrics** (`grafana/dashboards/oullin-caddy-caddy-proxy-metrics.json`)
+   - Total request rate
+   - Response time percentiles
+   - Requests by status code
+   - Traffic rate
+   - Request errors
+
+### Directory Structure
+
+```text
+infra/metrics/
+├── README.md                    # This file
+├── grafana/
+│   ├── dashboards/              # Dashboard JSON files
+│   ├── provisioning/
+│   │   ├── dashboards/          # Dashboard provisioning config
+│   │   └── datasources/         # Data source configuration
+│   └── scripts/
+│       └── export-dashboards.sh
+└── prometheus/
+    ├── provisioning/
+    │   ├── prometheus.yml       # Production Prometheus config
+    │   └── prometheus.local.yml # Local Prometheus config
+    └── scripts/
+        └── postgres-exporter-entrypoint.sh
+```
+
+### Configuration Consistency
+
+The monitoring stack is designed to maintain configuration consistency across local and production environments while respecting environment-specific differences.
+
+#### Shared Configuration Elements
+
+The following configurations are **identical** across both environments:
+
+1. **Grafana Settings:**
+   - Same Grafana version (`grafana/grafana:11.4.0`)
+   - Identical security settings (admin user, sign-up disabled, anonymous disabled)
+   - Same dashboard and datasource provisioning structure
+   - Same volume mount paths
+
+2. **Prometheus Core Settings:**
+   - Same Prometheus version (`prom/prometheus:v3.0.1`)
+   - Identical scrape interval (15s) and evaluation interval (15s)
+   - Same job structure (caddy, postgresql, api, prometheus) with per-environment targets
+   - Same metrics endpoints and paths
+
+3. **Postgres Exporter:**
+   - Same exporter version (`prometheuscommunity/postgres-exporter:v0.15.0`)
+   - Identical port exposure (9187)
+   - Same entrypoint script and secrets handling
+
+#### Environment-Specific Variables
+
+These settings **differ intentionally** based on environment:
+
+| Configuration | Local | Production | Reason |
+|--------------|-------|------------|--------|
+| **Container Names** | `oullin_*_local` | `oullin_*` | Distinguish environments |
+| **Prometheus URL** | `oullin_prometheus_local:9090` | `oullin_prometheus:9090` | Network addressing |
+| **Grafana Port** | `3000:3000` | `127.0.0.1:3000:3000` | Security (prod localhost-only) |
+| **Prometheus Port** | `9090:9090` | `127.0.0.1:9090:9090` | Security (prod localhost-only) |
+| **Data Retention** | 7 days | 30 days | Storage/cost optimization |
+| **Caddy Target** | `caddy_local:9180` | `caddy_prod:9180` | Service dependencies |
+| **PostgreSQL Exporter Target** | `oullin_postgres_exporter_local:9187` | `oullin_postgres_exporter:9187` | Service dependencies |
+| **External Labels** | `monitor: 'oullin-local'`<br>`environment: 'local'` | `monitor: 'oullin-prod'`<br>`environment: 'production'` | Metric identification |
+| **Admin API** | `127.0.0.1:2019:2019` | Not exposed | Debugging access |
+
+#### Environment Variable Usage
+
+The configuration uses environment variables to maintain consistency while adapting to each environment:
+
+**Grafana Datasource** (`grafana/provisioning/datasources/prometheus.yml`):
+```yaml
+url: ${GF_DATASOURCE_PROMETHEUS_URL}
+```
+
+Set via Docker Compose:
+- **Local:** `GF_DATASOURCE_PROMETHEUS_URL=http://oullin_prometheus_local:9090`
+- **Production:** `GF_DATASOURCE_PROMETHEUS_URL=http://oullin_prometheus:9090`
+
+**Required Environment Variables:**
+- `GRAFANA_ADMIN_PASSWORD` - **Required**, no default (set in `.env`)
+- `GF_DATASOURCE_PROMETHEUS_URL` - Set automatically by Docker Compose profile
+
+#### Configuration Files by Environment
+
+**Local Environment:**
+- Prometheus: `prometheus/provisioning/prometheus.local.yml`
+- Profile: `--profile local`
+- Services: `prometheus_local`, `grafana_local`, `caddy_local`, `postgres_exporter_local`
+
+**Production Environment:**
+- Prometheus: `prometheus/provisioning/prometheus.yml`
+- Profile: `--profile prod`
+- Services: `prometheus`, `grafana`, `caddy_prod`, `postgres_exporter`
+
+**Shared Across All Environments:**
+- Grafana datasources: `grafana/provisioning/datasources/prometheus.yml`
+- Grafana dashboards: `grafana/provisioning/dashboards/default.yml`
+- Dashboard JSONs: `grafana/dashboards/*.json`
+- Postgres exporter script: `prometheus/scripts/postgres-exporter-entrypoint.sh`
+
+---
+
+## Quick Start
+
+### Local Development
+
+**Prerequisites:**
+- Docker and Docker Compose installed
+- `.env` file in the repository root with `GRAFANA_ADMIN_PASSWORD` set (required - no default)
+  - Use `make env:init` to copy `.env.example` if you need a starting point
+  - If `.env` already exists, edit it in place instead of appending duplicates
+- Database secrets in `database/infra/secrets/`
+
+**Setup:**
+
+```bash
+# 1. Set Grafana admin password in .env file
+echo "GRAFANA_ADMIN_PASSWORD=$(openssl rand -base64 32)" >> .env
+# (Add or update the key manually if the file already defines it.)
+
+# 2. Start the local monitoring stack
+make monitor-up
+# Or: docker compose --profile local up -d
+
+# 3. Access services
+# Grafana:    http://localhost:3000 (admin / your-password)
+# Prometheus: http://localhost:9090
+# Caddy Admin: http://localhost:2019
+```
+
+**Verification:**
+
+```bash
+# Check all services are running
+docker ps
+
+# Verify Prometheus targets are UP
+make monitor-targets
+# Or: curl http://localhost:9090/api/v1/targets
+
+# Generate test traffic
+make monitor-traffic
+
+# View dashboards
+make monitor-grafana
+```
+
+---
+
+## Security Model
+
+### Critical Security Requirements
+
+⚠️ **IMPORTANT**: The monitoring stack includes several security considerations:
+
+1. **Grafana Admin Password**
+   - No default password allowed
+   - Must set `GRAFANA_ADMIN_PASSWORD` in `.env`
+   - Docker Compose will fail if not set
+   - Generate strong password: `openssl rand -base64 32`
+
+2. **Caddy Admin API**
+   - Exposes powerful administrative endpoints (`/load`, `/config`, `/stop`)
+   - **NO authentication** by default
+   - Production: Only accessible within Docker network; restrict further via firewalls/security groups when possible
+   - If you must expose it, configure Caddy's admin access controls (`admin.identity`, `admin.authorize`, or reverse-proxy ACLs) to require authentication
+   - Never expose to public internet
+
+3. **Service Exposure**
+   - Production: Services bound to `127.0.0.1` only
+   - Access via SSH tunneling from remote
+   - No direct internet exposure
+
+### Production Security Configuration
+
+**Docker Compose Production Services:**
+
+```yaml
+grafana:
+  ports:
+    - "127.0.0.1:3000:3000"  # Localhost only
+
+prometheus:
+  ports:
+    - "127.0.0.1:9090:9090"  # Localhost only
+
+caddy_prod:
+  expose:
+    - "2019"  # Internal network only - NOT exposed to host
+```
+
+**Remote Access:**
+
+```bash
+# SSH tunnel for Grafana and Prometheus
+ssh -L 3000:localhost:3000 -L 9090:localhost:9090 user@your-server
+
+# Access Caddy admin API (debugging only)
+docker exec -it oullin_proxy_prod curl http://localhost:2019/metrics
+```
+
+### Security Checklist
+
+- ✅ `GRAFANA_ADMIN_PASSWORD` set with strong password
+- ✅ Firewall configured (UFW)
+- ✅ Only necessary ports exposed (22, 80, 443)
+- ✅ Monitoring services NOT exposed to internet
+- ✅ Docker secrets for sensitive data
+- ✅ Regular backups scheduled
+- ✅ Log rotation configured
+- ✅ SSH key-based authentication
+
+---
+
+## Grafana Dashboards
+
+### Accessing Dashboards
+
+**Local:** <http://localhost:3000>
+**Production:** SSH tunnel then <http://localhost:3000>
+
+### Dashboard Files
+
+All dashboards are in `infra/metrics/grafana/dashboards/`:
+- `oullin-overview-oullin-overview.json`
+- `oullin-postgresql-postgresql-database-metrics.json`
+- `oullin-caddy-caddy-proxy-metrics.json`
+
+### Exporting Dashboards
+
+Use the built-in export script:
+
+```bash
+make monitor-export-dashboards
+```
+
+This will:
+1. List all dashboards in Grafana
+2. Let you select which to export
+3. Save to `infra/metrics/grafana/dashboards/`
+4. Format properly for provisioning
+
+### Manual Export
+
+1. Open your dashboard in Grafana
+2. Click **"Share"** → **"Export"** tab
+3. Click **"Save to file"** or **"View JSON"**
+4. Save to `infra/metrics/grafana/dashboards/`
+5. Restart Grafana: `make monitor-restart`
+
+### Updating Dashboards Safely
+
+To keep dashboard changes reproducible and under version control:
+
+1. **Start monitoring stack**: `make monitor-up`
+2. **Make changes in Grafana UI**: Navigate to <http://localhost:3000> and edit dashboards
+3. **Export your changes**: Run `./infra/metrics/grafana/scripts/export-dashboards.sh`
+   - Select specific dashboard or `all` to export all dashboards
+   - Exports are saved to `infra/metrics/grafana/dashboards/`
+4. **Review the diff**: `git diff infra/metrics/grafana/dashboards/`
+5. **Commit changes**: Add and commit the exported JSON files
+6. **Verify**: `make monitor-restart` to ensure dashboards reload correctly
+
+**Warning:** Always export after making UI changes—manual edits to JSON files can work but are error-prone.
+
+---
+
+## Creating Custom Dashboards
+
+### Method 1: Create in UI (Recommended)
+
+**Step 1:** Start Grafana
+
+```bash
+make monitor-up
+make monitor-grafana  # Opens http://localhost:3000
+```
+
+**Step 2:** Create dashboard
+
+1. Click **"+"** → **"Dashboard"** → **"Add visualization"**
+2. Select **"Prometheus"** as data source
+3. Write PromQL query
+4. Choose visualization type (Time series, Stat, Gauge, Table)
+5. Configure panel (title, description, units, thresholds)
+6. Add more panels as needed
+7. Save dashboard
+
+**Step 3:** Export
+
+```bash
+make monitor-export-dashboards
+```
+
+### Method 2: Use Community Dashboards
+
+Grafana has thousands of pre-built dashboards at <https://grafana.com/grafana/dashboards/>
+
+**Popular for our stack:**
+- [9628](https://grafana.com/grafana/dashboards/9628) - PostgreSQL Database
+- [455](https://grafana.com/grafana/dashboards/455) - PostgreSQL Stats
+- [10826](https://grafana.com/grafana/dashboards/10826) - Go Metrics
+- [6671](https://grafana.com/grafana/dashboards/6671) - Go Processes
+
+**Import via UI:**
+1. Click **"+"** → **"Import"**
+2. Enter dashboard ID
+3. Select **"Prometheus"** as data source
+4. Click **"Import"**
+
+### Dashboard Best Practices
+
+**Organization:**
+- One dashboard per service
+- Overview dashboard for high-level metrics
+- Detail dashboards for deep dives
+- Use tags for categorization
+
+**Panel Design:**
+- Clear titles
+- Descriptions for complex metrics
+- Consistent colors
+- Appropriate units (bytes, %, req/s)
+- Thresholds for warnings/errors
+
+**Query Performance:**
+- Avoid high-cardinality labels
+- Use recording rules for expensive queries
+- Limit time range
+- Use `rate()` instead of raw counters
+
+---
+
+## Prometheus Queries
+
+### API Metrics
+
+```promql
+# Request rate
+rate(promhttp_metric_handler_requests_total[5m])
+
+# Memory usage
+go_memstats_alloc_bytes{job="api"}
+
+# Goroutines (check for leaks)
+go_goroutines{job="api"}
+
+# GC duration
+rate(go_gc_duration_seconds_sum[5m])
+
+# Heap allocations
+rate(go_memstats_alloc_bytes_total[5m])
+```
+
+### PostgreSQL Metrics
+
+```promql
+# Active connections
+pg_stat_database_numbackends
+
+# Database size
+pg_database_size_bytes
+
+# Transaction rate
+rate(pg_stat_database_xact_commit[5m])
+
+# Cache hit ratio (should be >90%)
+rate(pg_stat_database_blks_hit[5m]) /
+(rate(pg_stat_database_blks_hit[5m]) + rate(pg_stat_database_blks_read[5m]))
+
+# Rows inserted/updated/deleted
+rate(pg_stat_database_tup_inserted[5m])
+rate(pg_stat_database_tup_updated[5m])
+rate(pg_stat_database_tup_deleted[5m])
+```
+
+### Caddy Metrics
+
+```promql
+# Request rate by status
+sum by(code) (rate(caddy_http_requests_total[5m]))
+
+# Response time percentiles
+histogram_quantile(0.95, rate(caddy_http_request_duration_seconds_bucket[5m]))
+histogram_quantile(0.99, rate(caddy_http_request_duration_seconds_bucket[5m]))
+
+# Error rate
+sum(rate(caddy_http_request_errors_total[5m]))
+
+# Response traffic rate
+rate(caddy_http_response_size_bytes_sum[5m])
+```
+
+---
+
+## Troubleshooting
+
+### Dashboards Don't Load
+
+```bash
+# Check JSON syntax
+jq . < infra/metrics/grafana/dashboards/my-dashboard.json
+
+# Check Grafana logs
+docker logs oullin_grafana_local  # Local
+docker logs oullin_grafana        # Production
+
+# Or view all monitoring logs
+make monitor-logs      # Local
+make monitor-logs-prod # Production
+
+# Verify Prometheus connection
+# Grafana UI → Settings → Data Sources → Prometheus → "Save & Test"
+
+# Ensure Prometheus is running
+docker ps | grep prometheus
+```
+
+### No Data in Panels
+
+```bash
+# Verify Prometheus is scraping targets
+make monitor-targets
+# Or: curl http://localhost:9090/api/v1/targets
+
+# Test query in Prometheus
+# Open http://localhost:9090
+
+# Wait a few minutes for initial data collection
+```
+
+### Prometheus Not Scraping
+
+```bash
+# Check network connectivity
+docker exec -it oullin_prometheus_local ping caddy_local
+
+# Verify service exposes metrics
+docker exec -it oullin_prometheus_local curl http://caddy_local:2019/metrics
+
+# Check Prometheus config
+docker exec -it oullin_prometheus_local cat /etc/prometheus/prometheus.yml
+```
+
+### Targets Show as DOWN
+
+```bash
+# Check container networking
+docker network ls
+docker network inspect caddy_net
+
+# Check container names match Prometheus config
+docker ps
+
+# Restart services
+make monitor-restart
+# Or: docker compose --profile local restart
+```
+
+### High Memory Usage
+
+```bash
+# Monitor memory
+docker stats
+
+# If Prometheus using too much memory:
+# - Reduce retention time
+# - Decrease scrape frequency
+# - Add metric filters
+```
+
+### Data Not Persisting
+
+```bash
+# Ensure volumes are configured
+docker volume ls
+docker volume inspect prometheus_data_local   # Local
+docker volume inspect prometheus_data_prod    # Production
+docker volume inspect grafana_data_local      # Local
+docker volume inspect grafana_data_prod       # Production
+```
+
+---
+
+## Maintenance & Backup
+
+### Backing Up Data
+
+**Automated backup** (recommended):
+
+```bash
+# Runs daily via cron, keeps last 5 backups
+make monitor-backup       # Local environment
+make monitor-backup-prod  # Production environment
+```
+
+Backups saved to:
+- **Local**: `storage/monitoring/backups/prometheus-backup-YYYYMMDD-HHMMSS.tar.gz`
+- **Production**: `storage/monitoring/backups/prometheus-prod-backup-YYYYMMDD-HHMMSS.tar.gz`
+
+**Manual backup:**
+
+```bash
+# Backup Prometheus data
+docker run --rm -v prometheus_data_local:/data -v $(pwd)/backups:/backup alpine \
+  tar czf /backup/prometheus-backup-$(date +%Y%m%d-%H%M%S).tar.gz /data
+# (Use prometheus_data_prod on production hosts)
+
+# Backup Grafana data
+docker run --rm -v grafana_data_local:/data -v $(pwd)/backups:/backup alpine \
+  tar czf /backup/grafana-backup-$(date +%Y%m%d-%H%M%S).tar.gz /data
+# (Use grafana_data_prod on production hosts)
+```
+
+### Restoring from Backup
+
+```bash
+# Stop services
+make monitor-down
+
+# Restore Prometheus data
+# WARNING: This will DELETE all existing Prometheus data. Validate backups and consider restoring in a test environment first.
+docker run --rm -v prometheus_data_local:/data -v $(pwd)/backups:/backup alpine \
+  sh -c "rm -rf /data/* && tar xzf /backup/prometheus-backup-YYYYMMDD-HHMMSS.tar.gz -C /"
+# (Use prometheus_data_prod on production hosts)
+
+# Restore Grafana data
+# WARNING: This will DELETE all existing Grafana data. Keep a secondary backup if unsure.
+docker run --rm -v grafana_data_local:/data -v $(pwd)/backups:/backup alpine \
+  sh -c "rm -rf /data/* && tar xzf /backup/grafana-backup-YYYYMMDD-HHMMSS.tar.gz -C /"
+# (Use grafana_data_prod on production hosts)
+
+# Restart services
+make monitor-up
+```
+
+### Updating the Stack
+
+**Local environment:**
+```bash
+# Pull latest images
+docker compose pull
+
+# Restart with new images
+make monitor-restart
+# Or: docker compose --profile local up -d
+```
+
+**Production environment:**
+```bash
+# Pull latest images
+docker compose pull
+
+# Restart with new images
+make monitor-restart-prod
+# Or: docker compose --profile prod up -d
+```
+
+### Monitoring Resource Usage
+
+```bash
+# CPU and Memory usage
+docker stats
+
+# Disk usage by container
+docker system df -v
+
+# Container logs size
+sudo du -sh /var/lib/docker/containers/*/*-json.log
+```
+
+### Cleaning Up Old Data
+
+Prometheus automatically handles retention based on `--storage.tsdb.retention.time` (30d prod, 7d local).
+
+Manual cleanup:
+
+```bash
+# Stop Prometheus
+docker compose stop prometheus_local
+
+# Clean data
+docker run --rm -v prometheus_data_local:/data alpine rm -rf /data/*
+# (Use prometheus_data_prod on production hosts)
+
+# Restart
+docker compose --profile local up -d prometheus_local
+```
+
+---
+
+## Resources
+
+### Official Documentation
+
+- [Prometheus Documentation](https://prometheus.io/docs/)
+- [Grafana Documentation](https://grafana.com/docs/)
+- [Grafana Dashboards](https://grafana.com/grafana/dashboards/)
+- [Caddy Metrics](https://caddyserver.com/docs/metrics)
+- [PostgreSQL Exporter](https://github.com/prometheus-community/postgres_exporter)
+- [PromQL Basics](https://prometheus.io/docs/prometheus/latest/querying/basics/)
+- [Grafonnet Library](https://github.com/grafana/grafonnet-lib)
+
+### Quick Reference Commands
+
+```bash
+# Start monitoring stack
+make monitor-up              # Local
+make monitor-up-prod         # Production
+
+# Access services
+make monitor-grafana         # Open Grafana
+make monitor-prometheus      # Open Prometheus
+
+# Check status
+make monitor-status          # Service health
+make monitor-targets         # Prometheus targets
+
+# Generate traffic
+make monitor-traffic         # Local
+make monitor-traffic-prod    # Production
+
+# View logs
+make monitor-logs            # All services (local)
+make monitor-logs-prod       # All services (production)
+
+# Individual container logs
+docker logs oullin_grafana_local     # Grafana (local)
+docker logs oullin_prometheus_local  # Prometheus (local)
+docker logs oullin_grafana           # Grafana (production)
+docker logs oullin_prometheus        # Prometheus (production)
+
+# Maintenance
+make monitor-backup          # Backup Prometheus data
+make monitor-restart         # Restart services (local)
+make monitor-restart-prod    # Restart services (production)
+make monitor-export-dashboards
+
+# Cleanup
+make monitor-down            # Stop services (local)
+make monitor-down-prod       # Stop services (production)
+make monitor-clean           # Clean up data (local)
+make monitor-clean-prod      # Clean up data (production)
+```
+
+### Production Deployment
+
+For complete VPS deployment instructions including firewall setup, SSL configuration, and production best practices, see [VPS_DEPLOYMENT.md](./VPS_DEPLOYMENT.md).
+
+---
+
+## Next Steps
+
+1. **Set up Alerting**: Configure Prometheus Alertmanager for critical metrics
+2. **Add Custom Metrics**: Instrument your API with custom business metrics
+3. **Create Custom Dashboards**: Build dashboards specific to your use case
+4. **Configure Recording Rules**: Pre-compute expensive queries
+5. **Implement SLOs**: Define and monitor Service Level Objectives
+6. **Export and Share**: Share dashboard configurations with your team
+
+---
+
+For questions or issues, please check the [Troubleshooting](#troubleshooting) section or refer to the official documentation links above.
diff --git a/infra/metrics/VPS_DEPLOYMENT.md b/infra/metrics/VPS_DEPLOYMENT.md
new file mode 100644
index 00000000..11ca467d
--- /dev/null
+++ b/infra/metrics/VPS_DEPLOYMENT.md
@@ -0,0 +1,436 @@
+# VPS Deployment Guide
+
+Complete guide for deploying the Oullin monitoring stack on an Ubuntu VPS (Hostinger or similar).
+
+## Table of Contents
+
+1. [Prerequisites](#prerequisites)
+2. [Initial Server Setup](#initial-server-setup)
+3. [Install Docker and Docker Compose](#install-docker-and-docker-compose)
+4. [Install Make](#install-make)
+5. [Clone Your Repository](#clone-your-repository)
+6. [Configure Environment Variables](#configure-environment-variables)
+7. [Set Up Docker Secrets](#set-up-docker-secrets)
+8. [Configure Firewall](#configure-firewall)
+9. [Deploy the Monitoring Stack](#deploy-the-monitoring-stack)
+10. [Verify Monitoring Stack](#verify-monitoring-stack)
+11. [Access Grafana Remotely](#access-grafana-remotely)
+12. [Production Considerations](#production-considerations)
+13. [Generate Test Traffic](#generate-test-traffic)
+14. [VPS Troubleshooting](#vps-troubleshooting)
+15. [Updating the Stack](#updating-the-stack)
+16. [Installing Fail2ban](#installing-fail2ban)
+
+---
+
+## Prerequisites
+
+- Hostinger VPS with Ubuntu 20.04 or 22.04 (or similar VPS provider)
+- SSH access to your VPS
+- Domain name (optional, but recommended for SSL)
+- At least 2GB RAM and 20GB storage
+
+---
+
+## Initial Server Setup
+
+Connect to your VPS:
+
+```bash
+ssh root@your-vps-ip
+```
+
+Update the system:
+
+```bash
+apt update && apt upgrade -y
+```
+
+Create a non-root user:
+
+```bash
+# Create user
+adduser deployer
+
+# Add to sudo group
+usermod -aG sudo deployer
+
+# Switch to new user
+su - deployer
+```
+
+---
+
+## Install Docker and Docker Compose
+
+Install required packages:
+
+```bash
+sudo apt install -y apt-transport-https ca-certificates curl software-properties-common
+```
+
+Add Docker's official GPG key:
+
+```bash
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
+```
+
+Add Docker repository:
+
+```bash
+echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+```
+
+Install Docker:
+
+```bash
+sudo apt update
+sudo apt install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
+```
+
+Add your user to the docker group:
+
+```bash
+sudo usermod -aG docker ${USER}
+```
+
+Log out and back in, then verify:
+
+```bash
+docker --version
+docker compose version
+```
+
+---
+
+## Install Make
+
+```bash
+sudo apt install -y make
+```
+
+---
+
+## Clone Your Repository
+
+```bash
+cd ~
+git clone https://github.com/yourusername/your-repo.git
+cd your-repo
+```
+
+---
+
+## Configure Environment Variables
+
+Create your `.env` file with production settings:
+
+```bash
+cat > .env << 'EOF'
+# Database Configuration
+POSTGRES_USER=your_db_user
+POSTGRES_PASSWORD=your_strong_db_password
+POSTGRES_DB=your_database_name
+
+# Grafana Configuration (REQUIRED - no default)
+GRAFANA_ADMIN_PASSWORD=your_very_strong_grafana_password
+
+# Production Domain (optional, for SSL)
+DOMAIN=your-domain.com
+
+# Environment
+ENVIRONMENT=production
+EOF
+```
+
+**Security Notes:**
+- Use strong, unique passwords
+- Never commit `.env` to version control
+- Consider using a password manager
+
+---
+
+## Set Up Docker Secrets
+
+Avoid piping credentials through `echo` because the literal values end up in your shell history. Use one of the safer patterns below.
+
+### Option 1: Read secrets from secure input
+
+```bash
+# Prompt won't echo characters and won't touch shell history
+read -s -p "Enter database password: " DB_PASSWORD && echo
+
+echo "$DB_PASSWORD" | docker secret create pg_password - 2>/dev/null || \
+  printf "%s" "$DB_PASSWORD" > secrets/pg_password
+
+unset DB_PASSWORD
+```
+
+Repeat the same pattern for usernames or other sensitive values you do not want stored on disk.
+
+### Option 2: Write files directly
+
+```bash
+mkdir -p secrets
+printf "your_db_user" > secrets/pg_username
+printf "your_strong_db_password" > secrets/pg_password
+printf "your_database_name" > secrets/pg_dbname
+chmod 600 secrets/*
+```
+
+Store these files somewhere secure (e.g., `pass`, `1Password CLI`, `sops`) and only copy them onto the server when needed.
+
+---
+
+## Configure Firewall
+
+Set up UFW:
+
+```bash
+# Enable UFW
+sudo ufw --force enable
+
+# Allow SSH (IMPORTANT: Do this first!)
+sudo ufw allow 22/tcp
+
+# Allow HTTP and HTTPS (for Caddy)
+sudo ufw allow 80/tcp
+sudo ufw allow 443/tcp
+
+# Verify rules
+sudo ufw status
+```
+
+**Do NOT expose Prometheus (9090), Grafana (3000), or postgres_exporter (9187) ports!**
+
+---
+
+## Deploy the Monitoring Stack
+
+```bash
+# Start with production profile
+make monitor-up-prod
+# Or: docker compose --profile prod up -d
+```
+
+Verify services:
+
+```bash
+docker compose ps
+```
+
+Expected containers:
+- `oullin_prometheus`
+- `oullin_grafana`
+- `oullin_postgres_exporter`
+- `oullin_proxy_prod`
+- `oullin_db`
+
+---
+
+## Verify Monitoring Stack
+
+Check Prometheus targets:
+
+```bash
+curl -s http://localhost:9090/api/v1/targets | jq '.data.activeTargets[] | {job: .labels.job, health: .health}'
+```
+
+All should show `"health": "up"`.
+
+---
+
+## Access Grafana Remotely
+
+From your local machine:
+
+```bash
+ssh -L 3000:localhost:3000 deployer@your-vps-ip
+```
+
+Then open `http://localhost:3000` in your browser.
+
+**Login:**
+- Username: `admin`
+- Password: Value from `GRAFANA_ADMIN_PASSWORD`
+
+---
+
+## Production Considerations
+
+### Enable Automatic Backups
+
+Schedule daily backups:
+
+```bash
+crontab -e
+```
+
+Add:
+
+# NOTE: Update /home/deployer/your-repo to your actual repository path
+```cron
+# Run daily at 2 AM
+0 2 * * * cd /home/deployer/your-repo && make monitor-backup-prod >> /var/log/prometheus-backup.log 2>&1
+```
+
+### Monitor Disk Space
+
+```bash
+# Check disk usage
+df -h
+
+# Check Prometheus data size
+docker exec oullin_prometheus du -sh /prometheus
+```
+
+### Configure Log Rotation
+
+```bash
+sudo tee /etc/docker/daemon.json > /dev/null << 'EOF'
+{
+  "log-driver": "json-file",
+  "log-opts": {
+    "max-size": "10m",
+    "max-file": "3"
+  }
+}
+EOF
+
+sudo systemctl restart docker
+make monitor-restart-prod
+```
+
+### Enable SSL/TLS (Optional)
+
+If you have a domain, configure Caddy for automatic HTTPS.
+
+Edit `infra/caddy/Caddyfile.prod`:
+
+```caddyfile
+your-domain.com {
+    reverse_proxy api:8080
+
+    log {
+        output file /var/log/caddy/access.log
+    }
+}
+
+# Admin API (internal only)
+127.0.0.1:2019 {
+    admin {
+        metrics
+    }
+}
+```
+
+Caddy will automatically obtain Let's Encrypt certificates.
+
+---
+
+## Generate Test Traffic
+
+```bash
+make monitor-traffic-prod
+```
+
+Wait a few minutes for data to appear in Grafana.
+
+---
+
+## VPS Troubleshooting
+
+### Services won't start
+
+```bash
+# View logs from monitoring services
+make monitor-logs         # Local: all services
+make monitor-logs-prod    # Production: all services
+
+# Or view individual container logs
+docker logs oullin_grafana
+docker logs oullin_prometheus
+
+# Check Docker daemon
+sudo systemctl status docker
+```
+
+### Can't connect via SSH tunnel
+
+```bash
+# Verify Grafana is listening
+docker exec oullin_grafana netstat -tlnp | grep 3000
+
+# Check if port is already in use locally
+lsof -i :3000
+```
+
+### Prometheus targets are down
+
+```bash
+# Check DNS resolution
+docker exec oullin_prometheus nslookup oullin_proxy_prod
+docker exec oullin_prometheus nslookup oullin_postgres_exporter
+
+# Verify network
+docker network inspect caddy_net oullin_net
+```
+
+### Out of disk space
+
+```bash
+# Clean up Docker
+docker system prune -a --volumes
+
+# Rotate backups (keeps last 5)
+make monitor-backup
+
+# Clear old Prometheus data
+docker exec oullin_prometheus rm -rf /prometheus/wal/*
+```
+
+---
+
+## Updating the Stack
+
+```bash
+cd ~/your-repo
+git pull origin main
+
+make monitor-down-prod
+make monitor-up-prod
+```
+
+---
+
+## Installing Fail2ban
+
+```bash
+sudo apt install -y fail2ban
+sudo systemctl start fail2ban
+sudo systemctl enable fail2ban
+sudo fail2ban-client status sshd
+```
+
+---
+
+## Production Checklist
+
+- ✅ `GRAFANA_ADMIN_PASSWORD` set in `.env`
+- ✅ Firewall configured (UFW)
+- ✅ Services bound to localhost
+- ✅ SSH tunneling configured
+- ✅ Backups scheduled (cron)
+- ✅ Log rotation configured
+- ✅ SSL/TLS enabled (if domain)
+- ✅ Fail2ban installed
+- ✅ All Prometheus targets UP
+- ✅ Dashboards accessible
+- ✅ Retention policies set
+- ✅ Volumes backed up regularly
+
+---
+
+## Additional Resources
+
+For monitoring-specific documentation, see [README.md](./README.md).
diff --git a/infra/metrics/grafana/dashboards/oullin-caddy-caddy-proxy-metrics.json b/infra/metrics/grafana/dashboards/oullin-caddy-caddy-proxy-metrics.json
new file mode 100644
index 00000000..47c068c4
--- /dev/null
+++ b/infra/metrics/grafana/dashboards/oullin-caddy-caddy-proxy-metrics.json
@@ -0,0 +1,482 @@
+{
+  "annotations": {
+    "list": []
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "reqps"
+        }
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "sum(rate(caddy_http_request_duration_seconds_count[5m]))",
+          "legendFormat": "Requests/s",
+          "refId": "A"
+        }
+      ],
+      "title": "Total Request Rate",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        }
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 3,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["mean"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.95, rate(caddy_http_request_duration_seconds_bucket[5m]))",
+          "legendFormat": "p95",
+          "refId": "A"
+        }
+      ],
+      "title": "Response Time (p95)",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "normal"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "reqps"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 6
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "sum by(code) (rate(caddy_http_request_duration_seconds_count[5m]))",
+          "legendFormat": "{{code}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Requests by Status Code",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 6
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.50, rate(caddy_http_request_duration_seconds_bucket[5m]))",
+          "legendFormat": "p50",
+          "refId": "A"
+        },
+        {
+          "expr": "histogram_quantile(0.95, rate(caddy_http_request_duration_seconds_bucket[5m]))",
+          "legendFormat": "p95",
+          "refId": "B"
+        },
+        {
+          "expr": "histogram_quantile(0.99, rate(caddy_http_request_duration_seconds_bucket[5m]))",
+          "legendFormat": "p99",
+          "refId": "C"
+        }
+      ],
+      "title": "Request Duration Percentiles",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "Bps"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 14
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "rate(caddy_http_response_size_bytes_sum[5m])",
+          "legendFormat": "Response",
+          "refId": "A"
+        }
+      ],
+      "title": "Response Traffic Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 14
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "sum(rate(caddy_http_request_errors_total[5m])) or vector(0)",
+          "legendFormat": "Errors/s",
+          "refId": "A"
+        }
+      ],
+      "title": "Request Errors",
+      "type": "timeseries"
+    }
+  ],
+  "schemaVersion": 39,
+  "tags": ["oullin", "caddy", "proxy"],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Caddy - Proxy Metrics",
+  "uid": "oullin-caddy",
+  "version": 1
+}
diff --git a/infra/metrics/grafana/dashboards/oullin-overview-oullin-overview.json b/infra/metrics/grafana/dashboards/oullin-overview-oullin-overview.json
new file mode 100644
index 00000000..1a2e4d5e
--- /dev/null
+++ b/infra/metrics/grafana/dashboards/oullin-overview-oullin-overview.json
@@ -0,0 +1,395 @@
+{
+  "annotations": {
+    "list": []
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "sum(rate(caddy_http_request_duration_seconds_count[5m]))",
+          "legendFormat": "Caddy Requests/s",
+          "refId": "A"
+        }
+      ],
+      "title": "Caddy Request Rate",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 50
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "pg_stat_database_numbackends{datname=~\".*\"}",
+          "legendFormat": "DB Connections",
+          "refId": "A"
+        }
+      ],
+      "title": "PostgreSQL Active Connections",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "reqps"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 8
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "sum by(code) (rate(caddy_http_request_duration_seconds_count[5m]))",
+          "legendFormat": "{{code}}",
+          "refId": "A"
+        }
+      ],
+      "title": "HTTP Requests by Status Code",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "bytes"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "go_memstats_alloc_bytes{job=\"api\"}",
+          "legendFormat": "API Memory Usage",
+          "refId": "A"
+        }
+      ],
+      "title": "API Memory Usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "go_goroutines{job=\"api\"}",
+          "legendFormat": "Goroutines",
+          "refId": "A"
+        }
+      ],
+      "title": "API Goroutines",
+      "type": "timeseries"
+    }
+  ],
+  "schemaVersion": 39,
+  "tags": ["oullin", "overview"],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Oullin - Overview",
+  "uid": "oullin-overview",
+  "version": 1
+}
diff --git a/infra/metrics/grafana/dashboards/oullin-postgresql-postgresql-database-metrics.json b/infra/metrics/grafana/dashboards/oullin-postgresql-postgresql-database-metrics.json
new file mode 100644
index 00000000..abfc3662
--- /dev/null
+++ b/infra/metrics/grafana/dashboards/oullin-postgresql-postgresql-database-metrics.json
@@ -0,0 +1,600 @@
+{
+  "annotations": {
+    "list": []
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 50
+              },
+              {
+                "color": "red",
+                "value": 100
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 6,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "pg_stat_database_numbackends",
+          "legendFormat": "{{datname}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Active Connections",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "bytes"
+        }
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 6,
+        "x": 6,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "pg_database_size_bytes",
+          "legendFormat": "{{datname}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Database Size",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 6,
+        "x": 12,
+        "y": 0
+      },
+      "id": 3,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "rate(pg_stat_database_xact_commit[5m])",
+          "legendFormat": "Commits/s",
+          "refId": "A"
+        }
+      ],
+      "title": "Transaction Rate",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 1
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 6,
+        "x": 18,
+        "y": 0
+      },
+      "id": 4,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "pg_stat_database_conflicts",
+          "legendFormat": "Conflicts",
+          "refId": "A"
+        }
+      ],
+      "title": "Conflicts",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 6
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "rate(pg_stat_database_tup_inserted[5m])",
+          "legendFormat": "Inserts - {{datname}}",
+          "refId": "A"
+        },
+        {
+          "expr": "rate(pg_stat_database_tup_updated[5m])",
+          "legendFormat": "Updates - {{datname}}",
+          "refId": "B"
+        },
+        {
+          "expr": "rate(pg_stat_database_tup_deleted[5m])",
+          "legendFormat": "Deletes - {{datname}}",
+          "refId": "C"
+        }
+      ],
+      "title": "Database Operations",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 6
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "pg_stat_database_numbackends",
+          "legendFormat": "{{datname}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Active Connections Over Time",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "percentunit"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 14
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "rate(pg_stat_database_blks_hit[5m]) / (rate(pg_stat_database_blks_hit[5m]) + rate(pg_stat_database_blks_read[5m]))",
+          "legendFormat": "{{datname}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Cache Hit Ratio",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 14
+      },
+      "id": 8,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "expr": "pg_locks_count",
+          "legendFormat": "{{mode}} - {{datname}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Database Locks",
+      "type": "timeseries"
+    }
+  ],
+  "schemaVersion": 39,
+  "tags": ["oullin", "postgresql", "database"],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "PostgreSQL - Database Metrics",
+  "uid": "oullin-postgresql",
+  "version": 1
+}
diff --git a/infra/metrics/grafana/provisioning/dashboards/default.yml b/infra/metrics/grafana/provisioning/dashboards/default.yml
new file mode 100644
index 00000000..45fb2660
--- /dev/null
+++ b/infra/metrics/grafana/provisioning/dashboards/default.yml
@@ -0,0 +1,13 @@
+apiVersion: 1
+
+providers:
+  - name: 'Oullin Dashboards'
+    orgId: 1
+    folder: ''
+    type: file
+    disableDeletion: false
+    updateIntervalSeconds: 10
+    allowUiUpdates: true
+    options:
+      path: /var/lib/grafana/dashboards
+      foldersFromFilesStructure: true
diff --git a/infra/metrics/grafana/provisioning/datasources/prometheus.yml b/infra/metrics/grafana/provisioning/datasources/prometheus.yml
new file mode 100644
index 00000000..c9be740e
--- /dev/null
+++ b/infra/metrics/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,14 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    uid: prometheus
+    type: prometheus
+    access: proxy
+    url: ${GF_DATASOURCE_PROMETHEUS_URL}
+    isDefault: true
+    editable: true
+    allowUiUpdates: true
+    jsonData:
+      timeInterval: 15s
+      queryTimeout: 60s
diff --git a/infra/metrics/grafana/scripts/export-dashboards.sh b/infra/metrics/grafana/scripts/export-dashboards.sh
new file mode 100755
index 00000000..43e53a28
--- /dev/null
+++ b/infra/metrics/grafana/scripts/export-dashboards.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# Helper script to export Grafana dashboards
+
+set -e
+
+GRAFANA_URL="${GRAFANA_URL:-http://localhost:3000}"
+GRAFANA_USER="${GRAFANA_USER:-admin}"
+GRAFANA_PASSWORD="${GRAFANA_PASSWORD:-admin}"
+OUTPUT_DIR="./infra/metrics/grafana/dashboards"
+
+echo "================================"
+echo "Grafana Dashboard Export Tool"
+echo "================================"
+echo ""
+
+# Check if Grafana is running
+if ! curl -s "$GRAFANA_URL/api/health" > /dev/null 2>&1; then
+    echo "Error: Grafana is not accessible at $GRAFANA_URL"
+    echo "Please start Grafana with: make monitor-up"
+    exit 1
+fi
+
+# List all dashboards
+echo "Fetching dashboard list..."
+DASHBOARDS=$(curl -s -u "$GRAFANA_USER:$GRAFANA_PASSWORD" \
+    "$GRAFANA_URL/api/search?type=dash-db" | jq -r '.[] | "\(.uid) \(.title)"')
+
+if [ -z "$DASHBOARDS" ]; then
+    echo "No dashboards found in Grafana"
+    exit 0
+fi
+
+echo ""
+echo "Available dashboards:"
+echo "---------------------"
+echo "$DASHBOARDS" | nl
+echo ""
+
+# Ask user which dashboard to export
+read -r -p "Enter dashboard number to export (or 'all' for all dashboards): " SELECTION
+
+# Validate selection
+if [ "$SELECTION" != "all" ]; then
+    # Check if selection is a valid number
+    if ! [[ "$SELECTION" =~ ^[0-9]+$ ]]; then
+        echo "Error: Please enter a valid number or 'all'"
+        exit 1
+    fi
+
+    # Check if selection is within valid range
+    DASHBOARD_COUNT=$(echo "$DASHBOARDS" | wc -l)
+    if [ "$SELECTION" -lt 1 ] || [ "$SELECTION" -gt "$DASHBOARD_COUNT" ]; then
+        echo "Error: Selection out of range (1-$DASHBOARD_COUNT)"
+        exit 1
+    fi
+fi
+
+if [ "$SELECTION" = "all" ]; then
+    # Export all dashboards
+    echo ""
+    echo "Exporting all dashboards..."
+
+    EXPORT_COUNT=0
+    FAIL_COUNT=0
+
+    while IFS= read -r line; do
+        UID=$(echo "$line" | awk '{print $1}')
+        TITLE=$(echo "$line" | cut -d' ' -f2-)
+        FILENAME="${UID}-$(echo "$TITLE" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' | tr -cd '[:alnum:]-').json"
+
+        echo -n "Exporting: $TITLE -> $FILENAME ... "
+
+        # Temporarily disable errexit for this operation
+        set +e
+        if curl -s -u "$GRAFANA_USER:$GRAFANA_PASSWORD" \
+            "$GRAFANA_URL/api/dashboards/uid/$UID" | \
+            jq 'del(.meta) | .dashboard.id = null | .overwrite = true' > \
+            "$OUTPUT_DIR/$FILENAME" 2>/dev/null; then
+
+            # Verify the file is valid JSON and not empty
+            if [ -s "$OUTPUT_DIR/$FILENAME" ] && jq empty "$OUTPUT_DIR/$FILENAME" 2>/dev/null; then
+                echo "✓ Success"
+                ((EXPORT_COUNT++))
+            else
+                echo "✗ Failed (invalid JSON)"
+                rm -f "$OUTPUT_DIR/$FILENAME"
+                ((FAIL_COUNT++))
+            fi
+        else
+            echo "✗ Failed (export error)"
+            rm -f "$OUTPUT_DIR/$FILENAME"
+            ((FAIL_COUNT++))
+        fi
+        set -e
+    done <<< "$DASHBOARDS"
+
+    echo ""
+    echo "Export summary: $EXPORT_COUNT succeeded, $FAIL_COUNT failed"
+
+    if [ $FAIL_COUNT -gt 0 ]; then
+        exit 1
+    fi
+
+else
+    # Export single dashboard
+    SELECTED_LINE=$(echo "$DASHBOARDS" | sed -n "${SELECTION}p")
+
+    if [ -z "$SELECTED_LINE" ]; then
+        echo "Error: Invalid selection"
+        exit 1
+    fi
+
+    UID=$(echo "$SELECTED_LINE" | awk '{print $1}')
+    TITLE=$(echo "$SELECTED_LINE" | cut -d' ' -f2-)
+    FILENAME="${UID}-$(echo "$TITLE" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' | tr -cd '[:alnum:]-').json"
+
+    echo ""
+    echo "Exporting: $TITLE"
+
+    # Temporarily disable errexit for this operation
+    set +e
+    if curl -s -u "$GRAFANA_USER:$GRAFANA_PASSWORD" \
+        "$GRAFANA_URL/api/dashboards/uid/$UID" | \
+        jq 'del(.meta) | .dashboard.id = null | .overwrite = true' > \
+        "$OUTPUT_DIR/$FILENAME" 2>/dev/null; then
+
+        # Verify the file is valid JSON and not empty
+        if [ -s "$OUTPUT_DIR/$FILENAME" ] && jq empty "$OUTPUT_DIR/$FILENAME" 2>/dev/null; then
+            echo "✓ Saved to: $OUTPUT_DIR/$FILENAME"
+        else
+            echo "✗ Error: Export produced invalid JSON"
+            rm -f "$OUTPUT_DIR/$FILENAME"
+            exit 1
+        fi
+    else
+        echo "✗ Error: Failed to export dashboard"
+        rm -f "$OUTPUT_DIR/$FILENAME"
+        exit 1
+    fi
+    set -e
+fi
+
+echo ""
+echo "Export complete!"
+echo ""
+echo "To reload dashboards:"
+echo "  make monitor-restart"
diff --git a/infra/metrics/prometheus/provisioning/prometheus.local.yml b/infra/metrics/prometheus/provisioning/prometheus.local.yml
new file mode 100644
index 00000000..4c661cbb
--- /dev/null
+++ b/infra/metrics/prometheus/provisioning/prometheus.local.yml
@@ -0,0 +1,41 @@
+# Prometheus configuration for local development/testing
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+  external_labels:
+    monitor: 'oullin-local'
+    environment: 'local'
+
+scrape_configs:
+  # Caddy metrics endpoint (dedicated /metrics endpoint, not admin API)
+  - job_name: 'caddy'
+    static_configs:
+      - targets: ['caddy_local:9180']
+        labels:
+          service: 'caddy'
+          environment: 'local'
+
+  # PostgreSQL database metrics via postgres_exporter (local)
+  - job_name: 'postgresql'
+    static_configs:
+      - targets: ['oullin_postgres_exporter_local:9187']
+        labels:
+          service: 'postgresql'
+          environment: 'local'
+
+  # API metrics endpoint (local)
+  - job_name: 'api'
+    metrics_path: '/metrics'
+    static_configs:
+      - targets: ['api:8080']
+        labels:
+          service: 'api'
+          environment: 'local'
+
+  # Prometheus self-monitoring
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+        labels:
+          service: 'prometheus'
+          environment: 'local'
diff --git a/infra/metrics/prometheus/provisioning/prometheus.yml b/infra/metrics/prometheus/provisioning/prometheus.yml
new file mode 100644
index 00000000..18ef3a2c
--- /dev/null
+++ b/infra/metrics/prometheus/provisioning/prometheus.yml
@@ -0,0 +1,41 @@
+# Prometheus configuration for monitoring Caddy, API, and PostgreSQL
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+  external_labels:
+    monitor: 'oullin-prod'
+    environment: 'production'
+
+scrape_configs:
+  # Caddy metrics endpoint (dedicated /metrics endpoint, not admin API)
+  - job_name: 'caddy'
+    static_configs:
+      - targets: ['caddy_prod:9180']
+        labels:
+          service: 'caddy'
+          environment: 'production'
+
+  # PostgreSQL database metrics via postgres_exporter
+  - job_name: 'postgresql'
+    static_configs:
+      - targets: ['oullin_postgres_exporter:9187']
+        labels:
+          service: 'postgresql'
+          environment: 'production'
+
+  # API metrics endpoint
+  - job_name: 'api'
+    metrics_path: '/metrics'
+    static_configs:
+      - targets: ['api:8080']
+        labels:
+          service: 'api'
+          environment: 'production'
+
+  # Prometheus self-monitoring
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+        labels:
+          service: 'prometheus'
+          environment: 'production'
diff --git a/infra/metrics/prometheus/scripts/postgres-exporter-entrypoint.sh b/infra/metrics/prometheus/scripts/postgres-exporter-entrypoint.sh
new file mode 100755
index 00000000..55f48fce
--- /dev/null
+++ b/infra/metrics/prometheus/scripts/postgres-exporter-entrypoint.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+set -e
+
+# URL-encode function using od and tr (POSIX-compliant)
+# Required for credentials containing special characters (@, :, /, ?, =)
+urlencode() {
+  string="$1"
+  printf '%s' "$string" | od -An -tx1 | tr ' ' % | tr -d '\n'
+}
+
+# Read Docker secrets separately for better error diagnostics
+PG_USER=$(cat /run/secrets/pg_username)
+PG_PASSWORD=$(cat /run/secrets/pg_password)
+PG_DBNAME=$(cat /run/secrets/pg_dbname)
+
+# Construct DATA_SOURCE_NAME with URL-encoded credentials
+export DATA_SOURCE_NAME="postgresql://$(urlencode "$PG_USER"):$(urlencode "$PG_PASSWORD")@api-db:5432/$(urlencode "$PG_DBNAME")?sslmode=require"
+
+# Execute postgres_exporter with any additional arguments
+exec /bin/postgres_exporter "$@"
diff --git a/metal/kernel/app.go b/metal/kernel/app.go
index f4066948..c4f10fb4 100644
--- a/metal/kernel/app.go
+++ b/metal/kernel/app.go
@@ -87,6 +87,7 @@ func (a *App) Boot() {
 
 	modem.KeepAlive()
 	modem.KeepAliveDB()
+	modem.Metrics()
 	modem.Profile()
 	modem.Experience()
 	modem.Projects()
diff --git a/metal/router/router.go b/metal/router/router.go
index 0c68015b..02dab599 100644
--- a/metal/router/router.go
+++ b/metal/router/router.go
@@ -92,6 +92,17 @@ func (r *Router) KeepAliveDB() {
 	r.Mux.HandleFunc("GET /ping-db", apiHandler)
 }
 
+func (r *Router) Metrics() {
+	metricsHandler := handler.NewMetricsHandler()
+
+	// Metrics endpoint blocked from public access by Caddy (see @protected matcher in Caddyfile)
+	// Only accessible internally via direct container access (api:8080/metrics)
+	// Prometheus scrapes via internal DNS without going through Caddy's public listener
+	r.Mux.HandleFunc("GET /metrics", func(w http.ResponseWriter, req *http.Request) {
+		_ = metricsHandler.Handle(w, req)
+	})
+}
+
 func (r *Router) Profile() {
 	maker := handler.NewProfileHandler
 
diff --git a/storage/monitoring/backups/.gitkeep b/storage/monitoring/backups/.gitkeep
new file mode 100644
index 00000000..5aab5f49
--- /dev/null
+++ b/storage/monitoring/backups/.gitkeep
@@ -0,0 +1 @@
+# Prometheus backups stored here