/
main.go
322 lines (276 loc) · 9.58 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
// given a slice of FQDNs to resolve and the expected minimum number of
// addresses, thorw wrros if the resolution fails or the count is 0.
// log a target specific alarm if the number of addresses it smaller than
// expected
// if an nslookupfails completly, error the lambda, and log an event
// if it succeeds but the count is wrong log an alarm event
// turn on debug loggging with "DEBUG"= "TRUE"
// turn on random errors wiht "RANDOM_FAILURES" = TRUE
// override lookup request with LOOKUPS = [valid json object]
//ex. job = `[{"Target": "www.google.com"}, {"ExpectedResponses": 1}]`
// Event reserved metadata
// imprivata_event_type: TEST_RESULT
// devops maintains the test AND the resource under test
// imprivata_event_audience: DEVOPS
// imprivata_event_severity:
// - 1: wake someone up
// - 2: get this in front of someone next business day
// - 3: informational: lives and dies in a log archive
// - 4: debug/diag: debug logging or the output of a diagnostic request
package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"log"
"math/rand"
"net"
"os"
"strings"
"time"
"github.com/aws/aws-lambda-go/events"
runtime "github.com/aws/aws-lambda-go/lambda"
"github.com/aws/aws-lambda-go/lambdacontext"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/lambda"
)
var client = lambda.New(session.New())
type lookupRequest struct {
Target string `json:"target"`
ExpectedResponses int `json:"expectedResponses"`
}
type lookupResult struct {
NumberOfAddresses int `json:"numberOfAddresses"`
Responses []string `json:"responses"`
}
// validate data n the LOOKUPS env var
// handle problems as appropriate
func lookups() ([]lookupRequest, string, error) {
var r []lookupRequest
defaultJob := `[{"Target": "www.google.com", "ExpectedResponses": 1}]`
lv := os.Getenv("LOOKUPS")
//getenv returns "" for empty AND unset
if lv == "" {
alarmBadLookupVar()
json.Unmarshal([]byte(defaultJob), &r)
msg := fmt.Sprintf("required LOOKUPS var is unset")
return r, msg, errors.New(msg)
}
if err := json.Unmarshal([]byte(lv), &r); err != nil {
json.Unmarshal([]byte(defaultJob), &r)
msg, _ := alarmBadLookupVar()
return r, msg, err
}
return r, "Got valid env LOOKUPS variable setting", nil
}
// called when the value of LOOKUPS can't be parsed as JSON
// audience is DEVOPS becuase they maintain the lambda config (terraform)
// Severity is 2 because it's a blindspot on a stable test
// not an actual problem with the test subject
func alarmBadLookupVar() (string, error) {
alarmData := make(map[string]string)
//set audience
alarmData["imprivata_event_audience"] = "DEVOPS"
// alarm message
msg := fmt.Sprintf("Bad LOOKUPS value. Unable to parse JSON")
alarmData["imprivata_event_message"] = msg
emitStructuredEvent(alarmData, 2)
return msg, errors.New(msg)
}
// called when the required var LOOKUPS is not set
// audience is DEVOPS becuase they maintain the lambda config (terraform)
// Severity is 2 because it's a blindspot on a stable test
// not an actual problem with the test subject
func alarmUnsetLookupVar() (string, error) {
alarmData := make(map[string]string)
//set audience
alarmData["imprivata_event_audience"] = "DEVOPS"
// alarm detail
msg := fmt.Sprintf("Bad LOOKUPS value. The required varaible is unset")
alarmData["imprivata_event_message"] = msg
emitStructuredEvent(alarmData, 2)
return msg, errors.New(msg)
}
// called for each host lookup failure
// audience is DEVOPS becuase they maintain the lambda config (terraform)
// Severity is 2 because it's a blindspot on a stable test
// not an actual problem with the test subject
// NOTE: this is a generic lookup falure and it could be a real problem, but
// it's been used for a long time without any failures. Other generic tests
// (tcp connection test from pingdom, etc.) would be better suited than a
// custom lambda
func alarmHostLookupFailed(h string) (string, error) {
alarmData := make(map[string]string)
//set audience
alarmData["imprivata_event_audience"] = "DEVOPS"
// alarm detail
msg := fmt.Sprintf("Lookup failed for: %s", h)
alarmData["imprivata_event_message"] = msg
emitStructuredEvent(alarmData, 2)
return msg, errors.New(msg)
}
// Called when the lookup works, but we don't get enough addresses back
// audience is DEVOPS becuase they maintain the DNs config under test The
// severity is 1 because it breaks customers with FQDN based ACLs causing
// intermittent cloud connecton failures
func alarmTooFewAddresses(addr string, expected int, actual int) (string, error) {
alarmData := make(map[string]string)
//set audience
alarmData["imprivata_event_audience"] = "DEVOPS"
// alarm detail
msg := fmt.Sprintf("Too few addresses for %s. expected %d. got %d", addr, 4, 1)
alarmData["imprivata_event_message"] = msg
emitStructuredEvent(alarmData, 1)
return msg, errors.New(msg)
}
// Called to log a successful lookup test
func goodLookupResult(t string, e int, a int) {
successEvent := map[string]string{}
successEvent["target"] = t
successEvent["expectedAddressCount"] = fmt.Sprint(e)
successEvent["actualAddressCount"] = fmt.Sprint(a)
// log the success
emitStructuredEvent(successEvent, 3)
}
// Randomly select from all of the valid outcomes and log accordingly
// This is critical for generating all the possible output data in order
// to tune the metrics log matching
func failRandomly(req []lookupRequest) (string, error) {
failures := []string{
"lookup_error",
"too_few_addresses",
"real_execution",
"unset_lookup_var",
"bad_json_lookup_var",
}
rand.Seed(time.Now().Unix()) // initialize global pseudo random generator
switch failure := failures[rand.Intn(len(failures))]; failure {
case "unset_lookup_var":
return alarmUnsetLookupVar()
case "bad_json_lookup_var":
return alarmBadLookupVar()
case "lookup_error":
return alarmHostLookupFailed("fake_random_failure")
case "too_few_addresses":
alarmTooFewAddresses("fake_random_failure", 4, 1)
errMsg := "Too few addresses for fake_random_failure. expected 4. got 1"
return errMsg, nil
case "real_execution":
return executeLookups(req)
default:
return executeLookups(req)
}
}
func emitStructuredEvent(eventData map[string]string, severity int) {
eventData["imprivata_event_type"] = "TEST_RESULT"
eventData["imprivata_event_audience"] = "DEVOPS"
eventData["imprivata_event_severity"] = fmt.Sprint(severity)
logMsg, _ := json.Marshal(eventData)
log.Println(string(logMsg))
}
func executeLookups(req []lookupRequest) (string, error) {
// Use testErrors to track status without breaking out on a failure. If we
// break on a bad lookup, we miss an opportunity to catch a severity 1 in
//future loop iterations
var testErrors error = nil
var testOut string = "job status: success"
// iterate on targets
for _, target := range req {
//log target message before attempt
logData := map[string]string{"target": target.Target}
logMsg, _ := json.Marshal(logData)
log.Println(string(logMsg))
// attempt lookup
addresses, err := net.LookupHost(target.Target)
if err != nil {
alarmHostLookupFailed(target.Target)
testOut = fmt.Sprintf("One or more lookups failed. see logs for details")
testErrors = err
continue
}
// evaluate lookup response
res := lookupResult{Responses: addresses, NumberOfAddresses: len(addresses)}
// bad result, throw alarm
if res.NumberOfAddresses < target.ExpectedResponses {
return alarmTooFewAddresses(target.Target, target.ExpectedResponses, res.NumberOfAddresses)
}
//if debug mode, dump the result
if debugMode() {
jsonString, _ := json.Marshal(res)
log.Println(string(jsonString))
}
// Log the success summary
goodLookupResult(target.Target, target.ExpectedResponses, res.NumberOfAddresses)
}
return testOut, testErrors
}
// read all environment variables into a map
// makes it easy to acces and to dump to json log
func environmentMap() map[string]string {
items := make(map[string]string)
for _, item := range os.Environ() {
splits := strings.Split(item, "=")
items[splits[0]] = splits[1]
}
return items
}
// Called when debug logging is enable to dump context data
func debugLogging(ctx context.Context, event events.CloudWatchEvent) {
// log event
eventJSON, _ := json.Marshal(event)
log.Printf("EVENT: %s", eventJSON)
// log environment variables
emitStructuredEvent(environmentMap(), 4)
ctxData := make(map[string]string)
// request context
lc, _ := lambdacontext.FromContext(ctx)
//request context
ctxData["REQUEST ID"] = lc.AwsRequestID
// global variable
ctxData["FUNCTION NAME"] = lambdacontext.FunctionName
// context method
deadline, _ := ctx.Deadline()
ctxData["DEADLINE"] = deadline.String()
//log some context attributes
emitStructuredEvent(ctxData, 4)
}
// return true if in debug mode
func debugMode() bool {
res := os.Getenv("DEBUG")
return strings.EqualFold("true", res)
}
// return true if in random fail mode
func failMode() bool {
res := os.Getenv("RANDOM_FAILURES")
return strings.EqualFold("true", res)
}
// write the funciton verson on execution
func logVersion() {
version := "0.1.9"
e := make(map[string]string)
e["version"] = version
emitStructuredEvent(e, 3)
}
// custom handler is the entry point for the function
func handleRequest(ctx context.Context, event events.CloudWatchEvent) (string, error) {
// log the version
logVersion()
// if debug mode, log all the context info
if debugMode() {
debugLogging(ctx, event)
}
// validate the LOOKUPS env var data
l, out, err := lookups()
if err != nil {
return out, err
}
// if running in fail-mode, fail randomly
if failMode() {
return failRandomly(l)
}
return executeLookups(l)
}
func main() {
runtime.Start(handleRequest)
}