Skip to content

Commit

Permalink
Add support for reliable acks based on configuration (#167)
Browse files Browse the repository at this point in the history
Implement reliable acks in fleet telemetry which
will allow to signal back to the vehicle that
the request was processed successfully
  • Loading branch information
agbpatro committed Apr 23, 2024
1 parent 3f39c7c commit be065fb
Show file tree
Hide file tree
Showing 21 changed files with 412 additions and 193 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,10 @@ The following [dispatchers](./telemetry/producer.go#L10-L19) are supported
* ZMQ: Configure with the config.json file. See implementation here: [config/config.go](./config/config.go)
* Logger: This is a simple STDOUT logger that serializes the protos to json.

>NOTE: To add a new dispatcher, please provide integration tests and updated documentation. To serialize dispatcher data as json instead of protobufs, add a config `transmit_decoded_records` and set value to `true` as shown [here](config/test_configs_test.go#L104)
>NOTE: To add a new dispatcher, please provide integration tests and updated documentation. To serialize dispatcher data as json instead of protobufs, add a config `transmit_decoded_records` and set value to `true` as shown [here](config/test_configs_test.go#L186)
## Reliable Acks
Fleet telemetry allows you to send ack messages back to the vehicle. This is useful for applications that need to ensure the data was received and processed. To enable this feature, set `reliable_ack_sources` to one of configured dispatchers (`kafka`,`kinesis`,`pubsub`,`zmq`) in the config file. You can only set reliable acks to one dispatcher per recordType. See [here](./test/integration/config.json#L8) for sample config.

## Metrics
Configure and use Prometheus or a StatsD-interface supporting data store for metrics. The integration test runs fleet telemetry with [grafana](https://grafana.com/docs/grafana/latest/datasources/google-cloud-monitoring/), which is compatible with prometheus. It also has an example dashboard which tracks important metrics related to the hosted server. Sample screenshot for the [sample dashboard](./test/integration/grafana/provisioning/dashboards/dashboard.json):-
Expand Down
61 changes: 48 additions & 13 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,8 @@ type Config struct {
// RateLimit is a configuration for the ratelimit
RateLimit *RateLimit `json:"rate_limit,omitempty"`

// ReliableAck if true, the server will send an ack back to the client only when the message has been stored in a datastore
ReliableAck bool `json:"reliable_ack,omitempty"`

// ReliableAckWorkers is the number of workers that will handle the acknowledgment
ReliableAckWorkers int `json:"reliable_ack_workers,omitempty"`
// ReliableAckSources is a mapping of record types to a dispatcher that will be used for reliable ack
ReliableAckSources map[string]telemetry.Dispatcher `json:"reliable_ack_sources,omitempty"`

// Kafka is a configuration for the standard librdkafka configuration properties
// seen here: https://raw.githubusercontent.com/confluentinc/librdkafka/master/CONFIGURATION.md
Expand Down Expand Up @@ -245,12 +242,13 @@ func (c *Config) prometheusEnabled() bool {
return false
}

func (c *Config) ReliableAcksDisabled() bool {
return c.ReliableAck == false && c.ReliableAckWorkers == 0
}

// ConfigureProducers validates and establishes connections to the producers (kafka/pubsub/logger)
func (c *Config) ConfigureProducers(airbrakeHandler *airbrake.AirbrakeHandler, logger *logrus.Logger) (map[string][]telemetry.Producer, error) {
reliableAckSources, err := c.configureReliableAckSources()
if err != nil {
return nil, err
}

producers := make(map[telemetry.Dispatcher]telemetry.Producer)
producers[telemetry.Logger] = simple.NewProtoLogger(logger)

Expand All @@ -266,7 +264,7 @@ func (c *Config) ConfigureProducers(airbrakeHandler *airbrake.AirbrakeHandler, l
return nil, errors.New("Expected Kafka to be configured")
}
convertKafkaConfig(c.Kafka)
kafkaProducer, err := kafka.NewProducer(c.Kafka, c.Namespace, c.prometheusEnabled(), c.MetricCollector, airbrakeHandler, logger)
kafkaProducer, err := kafka.NewProducer(c.Kafka, c.Namespace, c.prometheusEnabled(), c.MetricCollector, airbrakeHandler, c.AckChan, reliableAckSources[telemetry.Kafka], logger)
if err != nil {
return nil, err
}
Expand All @@ -277,7 +275,7 @@ func (c *Config) ConfigureProducers(airbrakeHandler *airbrake.AirbrakeHandler, l
if c.Pubsub == nil {
return nil, errors.New("Expected Pubsub to be configured")
}
googleProducer, err := googlepubsub.NewProducer(context.Background(), c.prometheusEnabled(), c.Pubsub.ProjectID, c.Namespace, c.MetricCollector, airbrakeHandler, logger)
googleProducer, err := googlepubsub.NewProducer(context.Background(), c.prometheusEnabled(), c.Pubsub.ProjectID, c.Namespace, c.MetricCollector, airbrakeHandler, c.AckChan, reliableAckSources[telemetry.Pubsub], logger)
if err != nil {
return nil, err
}
Expand All @@ -293,7 +291,7 @@ func (c *Config) ConfigureProducers(airbrakeHandler *airbrake.AirbrakeHandler, l
maxRetries = *c.Kinesis.MaxRetries
}
streamMapping := c.CreateKinesisStreamMapping(recordNames)
kinesis, err := kinesis.NewProducer(maxRetries, streamMapping, c.Kinesis.OverrideHost, c.prometheusEnabled(), c.MetricCollector, airbrakeHandler, logger)
kinesis, err := kinesis.NewProducer(maxRetries, streamMapping, c.Kinesis.OverrideHost, c.prometheusEnabled(), c.MetricCollector, airbrakeHandler, c.AckChan, reliableAckSources[telemetry.Kinesis], logger)
if err != nil {
return nil, err
}
Expand All @@ -304,7 +302,7 @@ func (c *Config) ConfigureProducers(airbrakeHandler *airbrake.AirbrakeHandler, l
if c.ZMQ == nil {
return nil, errors.New("Expected ZMQ to be configured")
}
zmqProducer, err := zmq.NewProducer(context.Background(), c.ZMQ, c.MetricCollector, c.Namespace, airbrakeHandler, logger)
zmqProducer, err := zmq.NewProducer(context.Background(), c.ZMQ, c.MetricCollector, c.Namespace, airbrakeHandler, c.AckChan, reliableAckSources[telemetry.ZMQ], logger)
if err != nil {
return nil, err
}
Expand All @@ -327,6 +325,43 @@ func (c *Config) ConfigureProducers(airbrakeHandler *airbrake.AirbrakeHandler, l
return dispatchProducerRules, nil
}

func (c *Config) configureReliableAckSources() (map[telemetry.Dispatcher]map[string]interface{}, error) {
reliableAckSources := make(map[telemetry.Dispatcher]map[string]interface{}, 0)
for txType, dispatchRule := range c.ReliableAckSources {
if dispatchRule == telemetry.Logger {
return nil, fmt.Errorf("logger cannot be configured as reliable ack for record: %s", txType)
}
dispatchers, ok := c.Records[txType]
if !ok {
return nil, fmt.Errorf("%s cannot be configured as reliable ack for record: %s since no record mapping exists", dispatchRule, txType)
}
dispatchRuleFound := false
validDispatchers := parseValidDispatchers(dispatchers)
for _, dispatcher := range validDispatchers {
if dispatcher == dispatchRule {
dispatchRuleFound = true
reliableAckSources[dispatchRule] = map[string]interface{}{txType: true}
break
}
}
if !dispatchRuleFound {
return nil, fmt.Errorf("%s cannot be configured as reliable ack for record: %s. Valid datastores configured %v", dispatchRule, txType, validDispatchers)
}
}
return reliableAckSources, nil
}

// parseValidDispatchers removes no-op dispatcher from the input i.e. Logger
func parseValidDispatchers(input []telemetry.Dispatcher) []telemetry.Dispatcher {
var result []telemetry.Dispatcher
for _, v := range input {
if v != telemetry.Logger {
result = append(result, v)
}
}
return result
}

// convertKafkaConfig will prioritize int over float
// see: https://github.com/confluentinc/confluent-kafka-go/blob/cde2827bc49655eca0f9ce3fc1cda13cb6cdabc9/kafka/config.go#L108-L125
func convertKafkaConfig(input *confluent.ConfigMap) {
Expand Down
6 changes: 0 additions & 6 deletions config/config_initializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package config

import (
"encoding/json"
"errors"
"flag"
"log"
"os"
Expand Down Expand Up @@ -54,11 +53,6 @@ func loadApplicationConfig(configFilePath string) (*Config, error) {
return nil, err
}
config.MetricCollector = metrics.NewCollector(config.Monitoring, logger)

// TODO disble this check when reliable acks are properly supported
if !config.ReliableAcksDisabled() {
return nil, errors.New("reliable acks not support yet. Unset `reliable_ack` and `reliable_ack_workers` in the config file")
}
config.AckChan = make(chan *telemetry.Record)
return config, err
}
Expand Down
12 changes: 3 additions & 9 deletions config/config_initializer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ var _ = Describe("Test application config initialization", func() {
Namespace: "tesla_telemetry",
TLS: &TLS{CAFile: "tesla.ca", ServerCert: "your_own_cert.crt", ServerKey: "your_own_key.key"},
RateLimit: &RateLimit{Enabled: true, MessageLimit: 1000, MessageInterval: 30},
ReliableAck: false,
ReliableAckWorkers: 0,
ReliableAckSources: map[string]telemetry.Dispatcher{"V": telemetry.Kafka},
Kafka: &confluent.ConfigMap{
"bootstrap.servers": "some.broker1:9093,some.broker1:9093",
"ssl.ca.location": "kafka.ca",
Expand All @@ -36,7 +35,7 @@ var _ = Describe("Test application config initialization", func() {
MetricCollector: prometheus.NewCollector(),
LogLevel: "info",
JSONLogEnable: true,
Records: map[string][]telemetry.Dispatcher{"FS": {"kafka"}},
Records: map[string][]telemetry.Dispatcher{"V": {"kafka"}},
}

loadedConfig, err := loadTestApplicationConfig(TestConfig)
Expand All @@ -62,7 +61,7 @@ var _ = Describe("Test application config initialization", func() {
"queue.buffering.max.messages": float64(1000000),
},
MetricCollector: noop.NewCollector(),
Records: map[string][]telemetry.Dispatcher{"FS": {"kafka"}},
Records: map[string][]telemetry.Dispatcher{"V": {"kafka"}},
}

loadedConfig, err := loadTestApplicationConfig(TestSmallConfig)
Expand All @@ -73,11 +72,6 @@ var _ = Describe("Test application config initialization", func() {
Expect(loadedConfig).To(Equal(expectedConfig))
})

It("fails when reliable acks are set", func() {
_, err := loadTestApplicationConfig(TestReliableAckConfig)
Expect(err).Should(MatchError("reliable acks not support yet. Unset `reliable_ack` and `reliable_ack_workers` in the config file"))
})

It("returns an error if config is not appropriate", func() {
_, err := loadTestApplicationConfig(BadTopicConfig)
Expect(err).To(MatchError("invalid character '}' looking for beginning of object key string"))
Expand Down
45 changes: 31 additions & 14 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,12 @@ var _ = Describe("Test full application config", func() {
BeforeEach(func() {
log, _ = logrus.NoOpLogger()
config = &Config{
Host: "127.0.0.1",
Port: 443,
StatusPort: 8080,
Namespace: "tesla_telemetry",
TLS: &TLS{CAFile: "tesla.ca", ServerCert: "your_own_cert.crt", ServerKey: "your_own_key.key"},
RateLimit: &RateLimit{Enabled: true, MessageLimit: 1000, MessageInterval: 30},
ReliableAck: true,
ReliableAckWorkers: 15,
Host: "127.0.0.1",
Port: 443,
StatusPort: 8080,
Namespace: "tesla_telemetry",
TLS: &TLS{CAFile: "tesla.ca", ServerCert: "your_own_cert.crt", ServerKey: "your_own_key.key"},
RateLimit: &RateLimit{Enabled: true, MessageLimit: 1000, MessageInterval: 30},
Kafka: &confluent.ConfigMap{
"bootstrap.servers": "some.broker:9093",
"ssl.ca.location": "kafka.ca",
Expand All @@ -44,7 +42,7 @@ var _ = Describe("Test full application config", func() {
Monitoring: &metrics.MonitoringConfig{PrometheusMetricsPort: 9090, ProfilerPort: 4269, ProfilingPath: "/tmp/fleet-telemetry/profile/"},
LogLevel: "info",
JSONLogEnable: true,
Records: map[string][]telemetry.Dispatcher{"FS": {"kafka"}},
Records: map[string][]telemetry.Dispatcher{"V": {"kafka"}},
}
})

Expand Down Expand Up @@ -137,7 +135,7 @@ var _ = Describe("Test full application config", func() {

producers, err = config.ConfigureProducers(airbrake.NewAirbrakeHandler(nil), log)
Expect(err).NotTo(HaveOccurred())
Expect(producers["FS"]).To(HaveLen(1))
Expect(producers["V"]).To(HaveLen(1))

value, err := config.Kafka.Get("queue.buffering.max.messages", 10)
Expect(err).NotTo(HaveOccurred())
Expand Down Expand Up @@ -168,10 +166,29 @@ var _ = Describe("Test full application config", func() {
})
})

Context("configure reliable acks", func() {

DescribeTable("fails",
func(configInput string, errMessage string) {

config, err := loadTestApplicationConfig(configInput)
Expect(err).NotTo(HaveOccurred())

producers, err = config.ConfigureProducers(airbrake.NewAirbrakeHandler(nil), log)
Expect(err).To(MatchError(errMessage))
Expect(producers).To(BeNil())
},
Entry("when reliable ack is mapped incorrectly", TestBadReliableAckConfig, "pubsub cannot be configured as reliable ack for record: V. Valid datastores configured [kafka]"),
Entry("when logger is configured as reliable ack", TestLoggerAsReliableAckConfig, "logger cannot be configured as reliable ack for record: V"),
Entry("when reliable ack is configured for unmapped txtype", TestUnusedTxTypeAsReliableAckConfig, "kafka cannot be configured as reliable ack for record: error since no record mapping exists"),
)

})

Context("configure kinesis", func() {
It("returns an error if kinesis isn't included", func() {
log, _ := logrus.NoOpLogger()
config.Records = map[string][]telemetry.Dispatcher{"FS": {"kinesis"}}
config.Records = map[string][]telemetry.Dispatcher{"V": {"kinesis"}}

var err error
producers, err = config.ConfigureProducers(airbrake.NewAirbrakeHandler(nil), log)
Expand Down Expand Up @@ -218,7 +235,7 @@ var _ = Describe("Test full application config", func() {
var err error
producers, err = pubsubConfig.ConfigureProducers(airbrake.NewAirbrakeHandler(nil), log)
Expect(err).NotTo(HaveOccurred())
Expect(producers["FS"]).NotTo(BeNil())
Expect(producers["V"]).NotTo(BeNil())
})
})

Expand All @@ -233,7 +250,7 @@ var _ = Describe("Test full application config", func() {

It("returns an error if zmq isn't included", func() {
log, _ := logrus.NoOpLogger()
config.Records = map[string][]telemetry.Dispatcher{"FS": {"zmq"}}
config.Records = map[string][]telemetry.Dispatcher{"V": {"zmq"}}
var err error
producers, err = config.ConfigureProducers(airbrake.NewAirbrakeHandler(nil), log)
Expect(err).To(MatchError("Expected ZMQ to be configured"))
Expand All @@ -249,7 +266,7 @@ var _ = Describe("Test full application config", func() {
var err error
producers, err = zmqConfig.ConfigureProducers(airbrake.NewAirbrakeHandler(nil), log)
Expect(err).NotTo(HaveOccurred())
Expect(producers["FS"]).NotTo(BeNil())
Expect(producers["V"]).NotTo(BeNil())
})
})

Expand Down
Loading

0 comments on commit be065fb

Please sign in to comment.