-
Notifications
You must be signed in to change notification settings - Fork 33
/
credentials_external_account.R
394 lines (354 loc) · 14.5 KB
/
credentials_external_account.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
#' Get a token for an external account
#'
#' @description
#' `r lifecycle::badge('experimental')`
#' Workload identity federation is a new (as of April 2021) keyless
#' authentication mechanism that allows applications running on a non-Google
#' Cloud platform, such as AWS, to access Google Cloud resources without using a
#' conventional service account token. This eliminates the dilemma of how to
#' safely manage service account credential files.
#'
#' Unlike service accounts, the configuration file for workload identity
#' federation contains no secrets. Instead, it holds non-sensitive metadata.
#' The external application obtains the needed sensitive data "on-the-fly" from
#' the running instance. The combined data is then used to obtain a so-called
#' subject token from the external identity provider, such as AWS. This is then
#' sent to Google's Security Token Service API, in exchange for a very
#' short-lived federated access token. Finally, the federated access token is
#' sent to Google's Service Account Credentials API, in exchange for a
#' short-lived GCP access token. This access token allows the external
#' application to impersonate a service account and inherit the permissions of
#' the service account to access GCP resources.
#'
#' This feature is still experimental in gargle and **currently only supports
#' AWS**. It also requires installation of the suggested packages
#' \pkg{aws.signature} and \pkg{aws.ec2metadata}. Workload identity federation
#' **can** be used with other platforms, such as Microsoft Azure or any
#' identity provider that supports OpenID Connect. If you would like gargle to
#' support this token flow for additional platforms, please [open an issue on
#' GitHub](https://github.com/r-lib/gargle/issues) and describe your use case.
#'
#' @inheritParams token_fetch
#' @param path JSON containing the workload identity configuration for the
#' external account, in one of the forms supported for the `txt` argument of
#' [jsonlite::fromJSON()] (probably, a file path, although it could be a JSON
#' string). The instructions for generating this configuration are given at
#' [Configuring workload identity federation](https://cloud.google.com/iam/docs/configuring-workload-identity-federation).
#'
#' Note that external account tokens are a natural fit for use as Application
#' Default Credentials, so consider storing the configuration file in one of
#' the standard locations consulted for ADC, instead of providing `path`
#' explicitly. See [credentials_app_default()] for more.
#'
#' @seealso There is substantial setup necessary, both on the GCP and AWS side,
#' to use this authentication method. These two links provide, respectively,
#' a high-level overview and step-by-step instructions.
#' * <https://cloud.google.com/blog/products/identity-security/enable-keyless-access-to-gcp-with-workload-identity-federation/>
#' * <https://cloud.google.com/iam/docs/configuring-workload-identity-federation>
#' @return A [WifToken()] or `NULL`.
#' @family credential functions
#' @export
#' @examples
#' \dontrun{
#' credentials_external_account()
#' }
credentials_external_account <- function(scopes = "https://www.googleapis.com/auth/cloud-platform",
path = "",
...) {
gargle_debug("trying {.fun credentials_external_account}")
if (!detect_aws_ec2() || is.null(scopes)) {
return(NULL)
}
scopes <- normalize_scopes(add_email_scope(scopes))
token <- oauth_external_token(path = path, scopes = scopes)
if (is.null(token$credentials$access_token) ||
!nzchar(token$credentials$access_token)) {
NULL
} else {
gargle_debug("service account email: {.email {token_email(token)}}")
token
}
}
#' Generate OAuth token for an external account
#'
#' @inheritParams credentials_external_account
#'
#' @keywords internal
#' @export
oauth_external_token <- function(path = "",
scopes = "https://www.googleapis.com/auth/cloud-platform") {
info <- jsonlite::fromJSON(path, simplifyVector = FALSE)
if (!identical(info[["type"]], "external_account")) {
gargle_debug("JSON does not appear to represent an external account")
return()
}
params <- c(
list(scopes = scopes),
info,
# the most pragmatic way to get super$sign() to work
# can't implement my own method without needing unexported httr functions
# request() or build_request()
as_header = TRUE
)
WifToken$new(params = params)
}
#' Token for use with workload identity federation
#'
#' Not intended for direct use. See [credentials_external_account()] instead.
#'
#' @keywords internal
#' @export
WifToken <- R6::R6Class("WifToken", inherit = httr::Token2.0, list(
#' @description Get a token via workload identity federation
#' @param params A list of parameters for `init_oauth_external_account()`.
#' @return A WifToken.
initialize = function(params = list()) {
gargle_debug("WifToken initialize")
# TODO: any desired validity checks on contents of params
# NOTE: the final token exchange with
# https://cloud.google.com/iam/docs/reference/credentials/rest/v1/projects.serviceAccounts/generateAccessToken
# takes scopes as an **array**, not a space delimited string
# so we do NOT collapse scopes in this flow
params$scope <- params$scopes
self$params <- params
self$init_credentials()
},
#' @description Enact the actual token exchange for workload identity
#' federation.
init_credentials = function() {
gargle_debug("WifToken init_credentials")
creds <- init_oauth_external_account(params = self$params)
# for some reason, the serviceAccounts.generateAccessToken method of
# Google's Service Account Credentials API returns in camelCase, not
# snake_case
# as in, we get this:
# "accessToken":"ya29.c.KsY..."
# "expireTime":"2021-06-01T18:01:06Z"
# instead of this:
# "access_token": "ya29.a0A..."
# "expires_in": 3599
snake_case <- function(x) {
gsub("([a-z0-9])([A-Z])", "\\1_\\L\\2", x, perl = TRUE)
}
names(creds) <- snake_case(names(creds))
self$credentials <- creds
self
},
#' @description Refreshes the token, which means re-doing the entire token
#' flow in this case.
refresh = function() {
gargle_debug("WifToken refresh")
# There's something kind of wrong about this, because it's not a true
# refresh. But this method is basically required by the way httr currently
# works.
# This means that some uses of $refresh() aren't really appropriate for a
# WifToken.
# For example, if I attempt token_userinfo(x) on a WifToken that lacks
# appropriate scope, it fails with 401.
# httr tries to "fix" things by refreshing the token. But this is
# not a problem that refreshing can fix.
# I've now prevented that particular phenomenon in token_userinfo().
self$init_credentials()
},
#' @description Format a [WifToken()].
#' @param ... Not used.
format = function(...) {
x <- list(
scopes = commapse(base_scope(self$params$scope)),
credentials = commapse(names(self$credentials))
)
c(
cli::cli_format_method(
cli::cli_h1("<WifToken (via {.pkg gargle})>")
),
glue("{fr(names(x))}: {fl(x)}")
)
},
#' @description Print a [WifToken()].
#' @param ... Not used.
print = function(...) {
# a format method is not sufficient for WifToken because the parent class
# has a print method
cli::cat_line(self$format())
},
#' @description Placeholder implementation of required method. Returns `TRUE`.
can_refresh = function() {
# TODO: see above re: my ambivalence about the whole notion of refresh with
# respect to this flow
TRUE
},
# TODO: are cache and load_from_cache really required?
# alternatively, what if calling them threw an error?
#' @description Placeholder implementation of required method. Returns self.
cache = function() self,
#' @description Placeholder implementation of required method. Returns self.
load_from_cache = function() self,
# TODO: are these really required?
#' @description Placeholder implementation of required method.
validate = function() {},
#' @description Placeholder implementation of required method.
revoke = function() {}
))
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/identify_ec2_instances.html
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html
detect_aws_ec2 <- function() {
if (is_installed("aws.ec2metadata")) {
return(aws.ec2metadata::is_ec2())
}
gargle_debug("
{.pkg aws.ec2metadata} not installed; can't detect whether running on \\
EC2 instance")
FALSE
}
init_oauth_external_account <- function(params) {
credential_source <- params$credential_source
if (!identical(credential_source$environment_id, "aws1")) {
gargle_abort("
{.pkg gargle}'s workload identity federation flow only supports AWS at \\
this time.")
}
subject_token <- aws_subject_token(
credential_source = credential_source,
audience = params$audience
)
serialized_subject_token <- serialize_subject_token(subject_token)
federated_access_token <- fetch_federated_access_token(
params = params,
subject_token = serialized_subject_token
)
fetch_wif_access_token(
federated_access_token,
impersonation_url = params[["service_account_impersonation_url"]],
scope = params[["scope"]]
)
}
# For AWS, the subject token isn't really a token, but rather the instructions
# necessary to get a token:
#
# From https://cloud.google.com/iam/docs/access-resources-aws#exchange-token
#
# "The GetCallerIdentity token contains the information that you would normally
# include in a request to the AWS GetCallerIdentity() method, as well as the
# signature that you would normally generate for the request.
#
# Also scroll down here, to see the AWS-specific content
# https://cloud.google.com/iam/docs/reference/sts/rest/v1/TopLevel/token
aws_subject_token <- function(credential_source, audience) {
if (!is_installed(c("aws.ec2metadata", "aws.signature"))) {
gargle_abort("
Packages {.pkg aws.ec2metadata} and {.pkg aws.signature} must be \\
installed in order to use workload identity federation on AWS.")
}
region <- aws.ec2metadata::instance_document()$region
regional_cred_verification_url <- glue(
credential_source[["regional_cred_verification_url"]],
region = region
)
parsed_url <- httr::parse_url(regional_cred_verification_url)
headers_orig <- list(
host = parsed_url$hostname,
# for some reason, this is not included as a signed header unless I provide
# it
`x-amz-date` = format(Sys.time(), "%Y%m%dT%H%M%SZ", tz = "UTC"),
# in contrast, session token IS automatically included if it exists, which
# it should
`x-goog-cloud-target-resource` = audience
)
verb <- "POST"
# https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html
signed <- aws.signature::signature_v4_auth(
region = region,
service = "sts",
verb = verb,
action = "/",
query_args = parsed_url$query,
canonical_headers = headers_orig,
request_body = ""
)
# unfortunately, the headers actually used to make the canonical request are
# not returned in the signed object, so we dig them out of the canonical
# request
req_parts <- strsplit(signed[["CanonicalRequest"]], split = "\n")[[1]]
f <- function(needle) {
needle <- paste0("^", needle, ":")
x <- grep(needle, req_parts, value = TRUE)
sub(needle, "", x)
}
headers <- list(
host = f("host"),
`x-amz-date` = f("x-amz-date"),
`x-amz-security-token` = f("x-amz-security-token"),
`x-goog-cloud-target-resource` = f("x-goog-cloud-target-resource")
)
list(
url = regional_cred_verification_url,
method = verb,
headers = c(
Authorization = signed$SignatureHeader,
headers
)
)
}
serialize_subject_token <- function(x) {
# The GCP STS endpoint expects the headers to be formatted as:
# [
# {key: 'Authorization', value: '...'},
# {key: 'x-amz-date', value: '...'},
# ...
# ]
# even though the headers were formatted differently, i.e. in the usual way,
# when we generated the V4 signature.
# we're using a purrr compat file, so must call with actual function
kv <- function(val, nm) list(key = nm, value = val)
headers_key_value <- unname(imap(x$headers, kv))
x$headers <- headers_key_value
# The GCP STS endpoint expects the prepared request to be serialized as a JSON
# string, which is then URL-encoded.
utils::URLencode(
jsonlite::toJSON(x, auto_unbox = TRUE),
reserved = TRUE
)
}
# https://datatracker.ietf.org/doc/html/rfc8693
# https://cloud.google.com/iam/docs/reference/sts/rest/v1/TopLevel/token
# https://cloud.google.com/iam/docs/reference/credentials/rest/v1/projects.serviceAccounts/generateAccessToken#authorization-scopes
fetch_federated_access_token <- function(params,
subject_token) {
req <- list(
method = "POST",
url = params$token_url,
body = list(
audience = params[["audience"]],
grantType = "urn:ietf:params:oauth:grant-type:token-exchange",
requestedTokenType = "urn:ietf:params:oauth:token-type:access_token",
# this request must have one of these scopes:
# https://www.googleapis.com/auth/cloud-platform
# https://www.googleapis.com/auth/iam
# I am hard-wiring the iam scope, guided by the least privilege principle,
# as it is the narrower of the 2 scopes
scope = "https://www.googleapis.com/auth/iam",
subjectTokenType = params[["subject_token_type"]],
subjectToken = subject_token
)
)
# rfc 8693 says to encode as "application/x-www-form-urlencoded"
resp <- request_make(req, encode = "form")
response_process(resp)
}
# https://cloud.google.com/iam/docs/creating-short-lived-service-account-credentials#sa-credentials-oauth
fetch_wif_access_token <- function(federated_access_token,
impersonation_url,
scope = "https://www.googleapis.com/auth/cloud-platform") {
req <- list(
method = "POST",
url = impersonation_url,
# https://cloud.google.com/iam/docs/reference/credentials/rest/v1/projects.serviceAccounts/generateAccessToken
# takes scope as an **array**, not a space delimited string
body = list(scope = scope),
token = httr::add_headers(
Authorization = paste("Bearer", federated_access_token$access_token)
)
)
resp <- request_make(req)
response_process(resp)
}