forked from FundingCircle/jackdaw
-
Notifications
You must be signed in to change notification settings - Fork 0
/
kafka.clj
227 lines (202 loc) · 9.34 KB
/
kafka.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
(ns jackdaw.test.transports.kafka
(:require
[clojure.tools.logging :as log]
[clojure.stacktrace :as stacktrace]
[manifold.stream :as s]
[manifold.deferred :as d]
[jackdaw.client :as kafka]
[jackdaw.data :as jd]
[jackdaw.test.journal :as j]
[jackdaw.test.transports :as t :refer [deftransport]]
[jackdaw.test.serde :refer [apply-serializers apply-deserializers
serde-map
byte-array-serde]])
(:import
org.apache.kafka.common.header.Header
org.apache.kafka.clients.consumer.Consumer
org.apache.kafka.streams.KafkaStreams$StateListener
org.apache.kafka.clients.consumer.ConsumerRecord
org.apache.kafka.clients.producer.Producer
org.apache.kafka.clients.producer.ProducerRecord))
(set! *warn-on-reflection* true)
(defn subscribe
"Subscribes to specified topics
`consumer` should be a kafka consumer
`topic-config` should be a sequence of topic-metadata maps"
[consumer topic-config]
(kafka/subscribe consumer topic-config))
(defn load-assignments
[consumer]
(.poll ^Consumer consumer 0)
(.assignment ^Consumer consumer))
(defn seek-to-end
"Seeks to the end of all the partitions assigned to the given consumer
and returns the updated consumer"
[consumer & topic-partitions]
(let [assigned-partitions (or topic-partitions (load-assignments consumer))]
(.seekToEnd ^Consumer consumer assigned-partitions)
(doseq [assigned-partition assigned-partitions]
;; This forces the seek to happen now
(.position ^Consumer consumer assigned-partition))
consumer))
(defn poller
"Returns a function that takes a consumer and puts any messages retrieved
by polling it onto the supplied `messages` channel"
[messages]
(fn [consumer]
(try
(let [m (.poll ^Consumer consumer 1000)]
(when m
(s/put-all! messages m)))
(catch Throwable e
(log/error (Throwable->map e))
(s/put! messages {:error e})))))
(defn subscription
"Subscribes to `topic-collection` and seeks to the end of all partitions. This
is usually what you want in a testing context. It's best for the test you're
trying to run now to ignore all the garbage created by previous tests."
[kafka-config topic-collection]
(-> (kafka/consumer kafka-config byte-array-serde)
(subscribe topic-collection)
(seek-to-end)))
(defn mk-consumer-record
"Clojurize the ConsumerRecord returned from consuming a kafka record"
[^ConsumerRecord consumer-record]
(when consumer-record
{:checksum (.checksum consumer-record)
:key (.key consumer-record)
:offset (.offset consumer-record)
:partition (.partition consumer-record)
:serializedKeySize (.serializedKeySize consumer-record)
:serializedValueSize (.serializedValueSize consumer-record)
:timestamp (.timestamp consumer-record)
:topic (.topic consumer-record)
:value (.value consumer-record)
:headers (reduce (fn [header-map header]
(assoc header-map
(.key ^Header header)
(.value ^Header header))) {} (.headers consumer-record))}))
(defn ^ProducerRecord mk-producer-record
"Creates a kafka ProducerRecord for use with `send!`."
([{:keys [topic-name]} value]
(ProducerRecord. ^String topic-name value))
([{:keys [topic-name]} key value]
(ProducerRecord. ^String topic-name key value))
([{:keys [topic-name]} partition key value]
(ProducerRecord. ^String topic-name ^Integer (int partition) key value))
([{:keys [topic-name]} partition timestamp key value]
(ProducerRecord. ^String topic-name ^Integer (int partition) ^Long timestamp key value)))
(defn consumer
"Creates an asynchronous Kafka Consumer of all topics defined in the
supplied `topic-metadata`
Puts all messages on the channel in the returned response. It is the
responsibility of the caller to arrange for the read the channel to
be read by some other process.
Must be closed with `close-consumer` when no longer required"
[kafka-config topic-metadata deserializers]
(let [continue? (atom true)
messages (s/stream 1 (comp
(map #'mk-consumer-record)
(map #(apply-deserializers deserializers %))
(map #(assoc % :topic (j/reverse-lookup topic-metadata (:topic %))))))
started? (promise)
poll (poller messages)]
{:process (d/loop [consumer (subscription kafka-config
(vals topic-metadata))]
(d/chain (d/future consumer)
(fn [c]
(when-not (realized? started?)
(deliver started? true)
(log/infof "started kafka consumer: %s"
(select-keys kafka-config ["bootstrap.servers" "group.id"])))
(if @continue?
(do (poll consumer)
(d/recur consumer))
(do
(s/close! messages)
(.close ^Consumer consumer)
(log/infof "stopped kafka consumer: %s"
(select-keys kafka-config ["bootstrap.servers" "group.id"])))))))
:started? started?
:messages messages
:continue? continue?}))
(defn close-consumer
[consumer]
(reset! (:continue? consumer) false)
(deref (:process consumer)))
(defn set-headers [^ProducerRecord producer-record headers]
(let [record-headers (.headers producer-record)]
(doseq [[k v] headers]
(.add record-headers k v)))
producer-record)
(defn build-record
"Builds a Kafka Producer and assoc it onto the message map"
[m]
(let [rec (mk-producer-record (:topic m)
(:partition m (int 0))
(:timestamp m)
(:key m)
(:value m))]
(set-headers rec (:headers m))
(assoc m :producer-record rec)))
(defn deliver-ack
"Deliver the `ack` promise with the result of attempting to write to kafka. The
default command-handler waits for this before on to the next command so the
test response may indicate the success/failure of each write command."
[ack]
(fn [rec-meta ex]
(when-not (nil? ack)
(if ex
(deliver ack {:error ex})
(deliver ack (select-keys (jd/datafy rec-meta)
[:topic-name :offset :partition
:serialized-key-size
:serialized-value-size]))))))
(defn producer
"Creates an asynchronous kafka producer to be used by a test-machine for for
injecting test messages"
([kafka-config topic-config serializers]
(let [producer (kafka/producer kafka-config byte-array-serde)
messages (s/stream 1 (map (fn [x]
(try
(-> (apply-serializers serializers x)
(build-record))
(catch Exception e
(let [trace (with-out-str
(stacktrace/print-cause-trace e))]
(log/error e trace))
(assoc x :serialization-error e))))))
_ (log/infof "started kafka producer: %s"
(select-keys kafka-config ["bootstrap.servers" "group.id"]))
process (d/loop [message (s/take! messages)]
(d/chain (d/future message)
(fn [{:keys [producer-record ack serialization-error] :as m}]
(cond
serialization-error (do (deliver ack {:error :serialization-error
:message (.getMessage ^Exception serialization-error)})
(d/recur (s/take! messages)))
producer-record (do (kafka/send! producer producer-record (deliver-ack ack))
(d/recur (s/take! messages)))
:else (do
(.close ^Producer producer)
(log/infof "stopped kafka producer: "
(select-keys kafka-config ["bootstrap.servers" "group.id"])))))))]
{:producer producer
:messages messages
:process process})))
(deftransport :kafka
[{:keys [config topics]}]
(let [serdes (serde-map topics)
test-consumer (consumer config topics (get serdes :deserializers))
test-producer (when @(:started? test-consumer)
(producer config topics (get serdes :serializers)))]
{:consumer test-consumer
:producer test-producer
:serdes serdes
:topics topics
:exit-hooks [(fn []
(s/close! (:messages test-producer)))
(fn []
(reset! (:continue? test-consumer) false)
@(:process test-consumer)
@(:process test-producer))]}))