Skip to content

Commit

Permalink
Secure metrics endpoint with oauth-proxy
Browse files Browse the repository at this point in the history
Prometheus server can't access ES metrics, because ES uses certs
signed by a self-signed CA, which isn't recognized.

Instead a proxy is used, which presents certs signed by a CA
which can be accessed through serving-certs-ca-bundle
  • Loading branch information
Josef Karasek committed Feb 22, 2019
1 parent 2263e57 commit a363882
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 27 deletions.
2 changes: 0 additions & 2 deletions hack/test-e2e.sh
Expand Up @@ -20,8 +20,6 @@ popd
# update the manifest with the image built by ci
sed -i "s,quay.io/openshift/origin-elasticsearch-operator:latest,${IMAGE_ELASTICSEARCH_OPERATOR}," ${manifest}

sudo sysctl -w vm.max_map_count=262144 ||:

if oc get project openshift-logging > /dev/null 2>&1 ; then
echo using existing project openshift-logging
else
Expand Down
24 changes: 24 additions & 0 deletions manifests/02-role.yaml
Expand Up @@ -40,3 +40,27 @@ rules:
- servicemonitors
verbs:
- "*"

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: oauth-proxy
rules:
- apiGroups: ["authentication.k8s.io"]
resources:
- tokenreviews
verbs: ["create"]
- apiGroups: ["authorization.k8s.io"]
resources:
- subjectaccessreviews
verbs: ["create"]

---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: elasticsearch-metrics
rules:
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
28 changes: 28 additions & 0 deletions manifests/03-role-bindings.yaml
Expand Up @@ -11,3 +11,31 @@ roleRef:
kind: ClusterRole
name: elasticsearch-operator
apiGroup: rbac.authorization.k8s.io

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: oauth-proxy
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: oauth-proxy
subjects:
- kind: ServiceAccount
name: elasticsearch
namespace: openshift-logging

---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: elasticsearch-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: elasticsearch-metrics
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: openshift-monitoring
2 changes: 2 additions & 0 deletions manifests/05-deployment.yaml
Expand Up @@ -28,3 +28,5 @@ spec:
value: ""
- name: OPERATOR_NAME
value: "elasticsearch-operator"
- name: PROXY_IMAGE
value: "quay.io/openshift/origin-oauth-proxy:v4.0.0"
8 changes: 6 additions & 2 deletions pkg/k8shandler/deployment.go
Expand Up @@ -128,7 +128,7 @@ func (node *deploymentNode) isUpdateNeeded(cfg *desiredNodeState) bool {
for _, container := range node.resource.Spec.Template.Spec.Containers {
if container.Name == "elasticsearch" {
// Check image of Elasticsearch container
if container.Image != cfg.Image {
if container.Image != cfg.ESImage {
logrus.Debugf("Resource '%s' has different container image than desired", node.resource.Name)
return true
}
Expand Down Expand Up @@ -185,6 +185,10 @@ func (node *deploymentNode) constructNodeResource(cfg *desiredNodeState, owner m

progressDeadlineSeconds := int32(1800)
deployment.ObjectMeta.Labels = cfg.getLabels()
podTemplate, err := cfg.constructPodTemplateSpec()
if err != nil {
return nil, err
}
deployment.Spec = apps.DeploymentSpec{
Replicas: &replicas,
Selector: &metav1.LabelSelector{
Expand All @@ -194,7 +198,7 @@ func (node *deploymentNode) constructNodeResource(cfg *desiredNodeState, owner m
Type: "Recreate",
},
ProgressDeadlineSeconds: &progressDeadlineSeconds,
Template: cfg.constructPodTemplateSpec(),
Template: podTemplate,
Paused: cfg.Paused,
}

Expand Down
80 changes: 73 additions & 7 deletions pkg/k8shandler/desirednodestate.go
Expand Up @@ -2,12 +2,14 @@ package k8shandler

import (
"fmt"
"os"
"sort"
"strconv"
"strings"
"time"

v1alpha1 "github.com/openshift/elasticsearch-operator/pkg/apis/elasticsearch/v1alpha1"
"github.com/openshift/elasticsearch-operator/pkg/utils"
"github.com/operator-framework/operator-sdk/pkg/sdk"
"github.com/sirupsen/logrus"
apps "k8s.io/api/apps/v1"
Expand All @@ -23,6 +25,7 @@ const (
elasticsearchConfigPath = "/usr/share/java/elasticsearch/config"
elasticsearchDefaultImage = "quay.io/openshift/origin-logging-elasticsearch5"
heapDumpLocation = "/elasticsearch/persistent/heapdump.hprof"
proxyImageEnv = "PROXY_IMAGE"
)

type nodeState struct {
Expand All @@ -36,7 +39,8 @@ type desiredNodeState struct {
DeployName string
Roles []v1alpha1.ElasticsearchNodeRole
ESNodeSpec v1alpha1.ElasticsearchNode
Image string
ESImage string
ProxyImage string
SecretName string
NodeNum int32
ReplicaNum int32
Expand Down Expand Up @@ -80,7 +84,13 @@ func constructNodeSpec(dpl *v1alpha1.Elasticsearch, esNode v1alpha1.Elasticsearc
nodeCfg.EnvVars = nodeCfg.getEnvVars()

nodeCfg.ESNodeSpec.Resources = getResourceRequirements(dpl.Spec.Spec.Resources, esNode.Resources)
nodeCfg.Image = getImage(dpl.Spec.Spec.Image)
nodeCfg.ESImage = dpl.Spec.Spec.Image
// proxyImage isn't part of the CRD, because we may not need the proxy forever
value, ok := os.LookupEnv(proxyImageEnv)
if !ok {
return nodeCfg, fmt.Errorf("proxy Image not specified, use %s environmental variable", proxyImageEnv)
}
nodeCfg.ProxyImage = value
return nodeCfg, nil
}

Expand Down Expand Up @@ -457,7 +467,7 @@ func (cfg *desiredNodeState) getESContainer() v1.Container {
probe := getReadinessProbe()
return v1.Container{
Name: "elasticsearch",
Image: cfg.Image,
Image: cfg.ESImage,
ImagePullPolicy: "IfNotPresent",
Env: cfg.getEnvVars(),
Ports: []v1.ContainerPort{
Expand All @@ -478,6 +488,49 @@ func (cfg *desiredNodeState) getESContainer() v1.Container {
}
}

func (cfg *desiredNodeState) getProxyContainer() (v1.Container, error) {
proxyCookieSecret, err := utils.RandStringBase64(16)
if err != nil {
return v1.Container{}, err
}
container := v1.Container{
Name: "proxy",
Image: cfg.ProxyImage,
ImagePullPolicy: "IfNotPresent",
Ports: []v1.ContainerPort{
v1.ContainerPort{
Name: "metrics",
ContainerPort: 60000,
Protocol: v1.ProtocolTCP,
},
},
VolumeMounts: []v1.VolumeMount{
v1.VolumeMount{
Name: fmt.Sprintf("%s-%s", cfg.ClusterName, "metrics"),
MountPath: "/etc/proxy/secrets",
},
v1.VolumeMount{
Name: "certificates",
MountPath: "/etc/proxy/elasticsearch",
},
},
Args: []string{
"--https-address=:60000",
"--provider=openshift",
"--upstream=https://127.0.0.1:9200",
"--tls-cert=/etc/proxy/secrets/tls.crt",
"--tls-key=/etc/proxy/secrets/tls.key",
"--upstream-ca=/etc/proxy/elasticsearch/admin-ca",
"--openshift-service-account=elasticsearch",
`-openshift-sar={"resource": "namespaces", "verb": "get"}`,
`-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get"}}`,
"--pass-user-bearer-token",
fmt.Sprintf("--cookie-secret=%s", proxyCookieSecret),
},
}
return container, nil
}

func (cfg *desiredNodeState) getVolumeMounts() []v1.VolumeMount {
return []v1.VolumeMount{
v1.VolumeMount{
Expand Down Expand Up @@ -579,6 +632,14 @@ func (cfg *desiredNodeState) getVolumes() []v1.Volume {
},
},
},
v1.Volume{
Name: fmt.Sprintf("%s-%s", cfg.ClusterName, "metrics"),
VolumeSource: v1.VolumeSource{
Secret: &v1.SecretVolumeSource{
SecretName: fmt.Sprintf("%s-%s", cfg.ClusterName, "metrics"),
},
},
},
}

if !cfg.isNodePureMaster() {
Expand Down Expand Up @@ -640,9 +701,14 @@ func (actualState *actualNodeState) isStatusUpdateNeeded(nodesInStatus v1alpha1.
return true
}

func (cfg *desiredNodeState) constructPodTemplateSpec() v1.PodTemplateSpec {
func (cfg *desiredNodeState) constructPodTemplateSpec() (v1.PodTemplateSpec, error) {
affinity := cfg.getAffinity()

proxyPodSpec, err := cfg.getProxyContainer()
if err != nil {
return v1.PodTemplateSpec{}, err
}

template := v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: cfg.getLabels(),
Expand All @@ -651,15 +717,15 @@ func (cfg *desiredNodeState) constructPodTemplateSpec() v1.PodTemplateSpec {
Affinity: &affinity,
Containers: []v1.Container{
cfg.getESContainer(),
proxyPodSpec,
},
Volumes: cfg.getVolumes(),
// ImagePullSecrets: TemplateImagePullSecrets(imagePullSecrets),
Volumes: cfg.getVolumes(),
ServiceAccountName: cfg.ServiceAccountName,
},
}
nodeSelector, ok := cfg.getSelector()
if ok {
template.Spec.NodeSelector = nodeSelector
}
return template
return template, nil
}
8 changes: 6 additions & 2 deletions pkg/k8shandler/service_monitor.go
Expand Up @@ -40,10 +40,11 @@ func createServiceMonitor(serviceMonitorName, clusterName, namespace string, lab
}
tlsConfig := monitoringv1.TLSConfig{
CAFile: prometheusCAFile,
ServerName: clusterName,
ServerName: fmt.Sprintf("%s-%s.%s.svc", clusterName, "metrics", namespace),
// ServerName can be e.g. elasticsearch-metrics.openshift-logging.svc
}
endpoint := monitoringv1.Endpoint{
Port: "restapi",
Port: fmt.Sprintf("%s-%s", clusterName, "metrics"),
Path: "/_prometheus/metrics",
Scheme: "https",
BearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
Expand All @@ -53,6 +54,9 @@ func createServiceMonitor(serviceMonitorName, clusterName, namespace string, lab
JobLabel: "monitor-elasticsearch",
Endpoints: []monitoringv1.Endpoint{endpoint},
Selector: labelSelector,
NamespaceSelector: monitoringv1.NamespaceSelector{
MatchNames: []string{namespace},
},
}
return svcMonitor
}
Expand Down
35 changes: 30 additions & 5 deletions pkg/k8shandler/services.go
Expand Up @@ -16,24 +16,48 @@ import (
func CreateOrUpdateServices(dpl *v1alpha1.Elasticsearch) error {
elasticsearchClusterSvcName := fmt.Sprintf("%s-%s", dpl.Name, "cluster")
elasticsearchRestSvcName := dpl.Name
metricsSvcName := fmt.Sprintf("%s-%s", dpl.Name, "metrics")
owner := asOwner(dpl)

labelsWithDefault := appendDefaultLabel(dpl.Name, dpl.Labels)

err := createOrUpdateService(elasticsearchClusterSvcName, dpl.Namespace, dpl.Name, "cluster", 9300, selectorForES("es-node-master", dpl.Name), labelsWithDefault, true, owner)
err := createOrUpdateService(elasticsearchClusterSvcName, dpl.Namespace, dpl.Name,
"cluster", 9300, selectorForES("es-node-master", dpl.Name), map[string]string{},
labelsWithDefault, true, owner)

if err != nil {
return fmt.Errorf("Failure creating service %v", err)
}

err = createOrUpdateService(elasticsearchRestSvcName, dpl.Namespace, dpl.Name,
"restapi", 9200, selectorForES("es-node-client", dpl.Name), map[string]string{},
labelsWithDefault, false, owner)

if err != nil {
return fmt.Errorf("Failure creating service %v", err)
}

err = createOrUpdateService(elasticsearchRestSvcName, dpl.Namespace, dpl.Name, "restapi", 9200, selectorForES("es-node-client", dpl.Name), labelsWithDefault, false, owner)
annotations := map[string]string{
"service.alpha.openshift.io/serving-cert-secret-name": metricsSvcName,
}

err = createOrUpdateService(metricsSvcName, dpl.Namespace, metricsSvcName,
"metrics", 60000, selectorForES("es-node-client", dpl.Name), annotations,
labelsWithDefault, false, owner)

if err != nil {
return fmt.Errorf("Failure creating service %v", err)
}

return nil
}

func createOrUpdateService(serviceName, namespace, clusterName, targetPortName string, port int32, selector, labels map[string]string, publishNotReady bool, owner metav1.OwnerReference) error {
elasticsearchSvc := createService(serviceName, namespace, clusterName, targetPortName, port, selector, labels, publishNotReady)
func createOrUpdateService(serviceName, namespace, clusterName, targetPortName string, port int32, selector, annotations, labels map[string]string, publishNotReady bool, owner metav1.OwnerReference) error {

elasticsearchSvc := createService(serviceName, namespace, clusterName,
targetPortName, port, selector, annotations,
labels, publishNotReady)

addOwnerRefToObject(elasticsearchSvc, owner)
err := sdk.Create(elasticsearchSvc)
if err != nil && !errors.IsAlreadyExists(err) {
Expand All @@ -52,8 +76,9 @@ func createOrUpdateService(serviceName, namespace, clusterName, targetPortName s
return nil
}

func createService(serviceName, namespace, clusterName, targetPortName string, port int32, selector, labels map[string]string, publishNotReady bool) *v1.Service {
func createService(serviceName, namespace, clusterName, targetPortName string, port int32, selector, annotations, labels map[string]string, publishNotReady bool) *v1.Service {
svc := service(serviceName, namespace)
svc.Annotations = annotations
svc.Labels = labels
svc.Spec = v1.ServiceSpec{
Selector: selector,
Expand Down
7 changes: 6 additions & 1 deletion pkg/k8shandler/statefulset.go
Expand Up @@ -61,13 +61,18 @@ func (node *statefulSetNode) constructNodeResource(cfg *desiredNodeState, owner
//statefulSet(cfg.DeployName, node.resource.ObjectMeta.Namespace)
statefulSet.ObjectMeta.Labels = cfg.getLabels()

podTemplate, err := cfg.constructPodTemplateSpec()
if err != nil {
return nil, err
}

statefulSet.Spec = apps.StatefulSetSpec{
Replicas: &replicas,
ServiceName: cfg.DeployName,
Selector: &metav1.LabelSelector{
MatchLabels: cfg.getLabels(),
},
Template: cfg.constructPodTemplateSpec(),
Template: podTemplate,
}

pvc, ok, err := cfg.generateMasterPVC()
Expand Down
8 changes: 0 additions & 8 deletions pkg/k8shandler/util.go
Expand Up @@ -93,14 +93,6 @@ func getReadinessProbe() v1.Probe {
}
}

func getImage(commonImage string) string {
image := commonImage
if image == "" {
image = elasticsearchDefaultImage
}
return image
}

func getResourceRequirements(commonResRequirements, nodeResRequirements v1.ResourceRequirements) v1.ResourceRequirements {
limitCPU := nodeResRequirements.Limits.Cpu()
if limitCPU.IsZero() {
Expand Down

0 comments on commit a363882

Please sign in to comment.