Skip to content

Commit

Permalink
Add support for ABFS OAuth authentication
Browse files Browse the repository at this point in the history
  • Loading branch information
jirassimok authored and losipiuk committed Sep 17, 2020
1 parent 53c9d38 commit cc30d16
Show file tree
Hide file tree
Showing 9 changed files with 302 additions and 1 deletion.
13 changes: 13 additions & 0 deletions .github/workflows/ci.yml
Expand Up @@ -142,6 +142,19 @@ jobs:
source presto-product-tests/conf/product-tests-${{ matrix.config }}.sh &&
presto-hive-hadoop2/bin/run_hive_abfs_access_key_tests.sh
fi
- name: Run Hive Azure ABFS OAuth Tests
if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage
env:
ABFS_CONTAINER: ${{ secrets.AZURE_ABFS_CONTAINER }}
ABFS_ACCOUNT: ${{ secrets.AZURE_ABFS_ACCOUNT }}
ABFS_OAUTH_ENDPOINT: ${{ secrets.AZURE_ABFS_OAUTH_ENDPOINT }}
ABFS_OAUTH_CLIENTID: ${{ secrets.AZURE_ABFS_OAUTH_CLIENTID }}
ABFS_OAUTH_SECRET: ${{ secrets.AZURE_ABFS_OAUTH_SECRET }}
run: |
if [ -n "$ABFS_CONTAINER" ]; then
source presto-product-tests/conf/product-tests-${{ matrix.config }}.sh &&
presto-hive-hadoop2/bin/run_hive_abfs_oauth_tests.sh
fi
- name: Run Hive Azure WASB Tests
if: matrix.config != 'config-empty' # Hive 1.x does not support Azure storage
env:
Expand Down
50 changes: 50 additions & 0 deletions presto-hive-hadoop2/bin/run_hive_abfs_oauth_tests.sh
@@ -0,0 +1,50 @@
#!/usr/bin/env bash
set -euxo pipefail

. "${BASH_SOURCE%/*}/common.sh"

test -v ABFS_ACCOUNT
test -v ABFS_CONTAINER
test -v ABFS_OAUTH_ENDPOINT
test -v ABFS_OAUTH_CLIENTID
test -v ABFS_OAUTH_SECRET

test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)"

cleanup_hadoop_docker_containers
start_hadoop_docker_containers

# insert Azure credentials
deploy_core_site_xml core-site.xml.abfs-oauth-template \
ABFS_ACCOUNT ABFS_CONTAINER \
ABFS_OAUTH_ENDPOINT ABFS_OAUTH_CLIENTID ABFS_OAUTH_SECRET

# restart hive-server2 to apply changes in core-site.xml
exec_in_hadoop_master_container supervisorctl restart hive-server2
retry check_hadoop

create_test_tables \
"abfs://$ABFS_CONTAINER@$ABFS_ACCOUNT.dfs.core.windows.net/$test_directory"

stop_unnecessary_hadoop_services

pushd $PROJECT_ROOT
set +e
./mvnw -B -pl presto-hive-hadoop2 test -P test-hive-hadoop2-abfs-oauth \
-DHADOOP_USER_NAME=hive \
-Dhive.hadoop2.metastoreHost=localhost \
-Dhive.hadoop2.metastorePort=9083 \
-Dhive.hadoop2.databaseName=default \
-Dtest.hive.azure.abfs.container="$ABFS_CONTAINER" \
-Dtest.hive.azure.abfs.storage-account="$ABFS_ACCOUNT" \
-Dtest.hive.azure.abfs.test-directory="$test_directory" \
-Dtest.hive.azure.abfs.oauth.endpoint="$ABFS_OAUTH_ENDPOINT" \
-Dtest.hive.azure.abfs.oauth.client-id="$ABFS_OAUTH_CLIENTID" \
-Dtest.hive.azure.abfs.oauth.secret="$ABFS_OAUTH_SECRET"
EXIT_CODE=$?
set -e
popd

cleanup_hadoop_docker_containers

exit ${EXIT_CODE}
60 changes: 60 additions & 0 deletions presto-hive-hadoop2/conf/files/core-site.xml.abfs-oauth-template
@@ -0,0 +1,60 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<!-- Hive impersonation -->
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>

<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>

<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:9000</value>
</property>


<property>
<name>fs.azure.account.auth.type</name>
<value>OAuth</value>
</property>

<property>
<name>fs.azure.account.oauth.provider.type</name>
<value>org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider</value>
</property>

<property>
<name>fs.azure.account.oauth2.client.id</name>
<value>%ABFS_OAUTH_CLIENTID%</value>
</property>

<property>
<name>fs.azure.account.oauth2.client.secret</name>
<value>%ABFS_OAUTH_SECRET%</value>
</property>

<property>
<name>fs.azure.account.oauth2.client.endpoint</name>
<value>%ABFS_OAUTH_ENDPOINT%</value>
</property>
</configuration>
17 changes: 17 additions & 0 deletions presto-hive-hadoop2/pom.xml
Expand Up @@ -128,6 +128,7 @@
<exclude>**/TestHiveFileSystemS3SelectPushdown.java</exclude>
<exclude>**/TestHiveFileSystemWasb.java</exclude>
<exclude>**/TestHiveFileSystemAbfsAccessKey.java</exclude>
<exclude>**/TestHiveFileSystemAbfsOAuth.java</exclude>
<exclude>**/TestHiveFileSystemAdl.java</exclude>
<exclude>**/TestHiveAzure.java</exclude>
</excludes>
Expand Down Expand Up @@ -201,6 +202,22 @@
</plugins>
</build>
</profile>
<profile>
<id>test-hive-hadoop2-abfs-oauth</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<includes>
<include>**/TestHiveFileSystemAbfsOAuth.java</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>test-hive-hadoop2-adl</id>
<build>
Expand Down
@@ -0,0 +1,64 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.prestosql.plugin.hive;

import io.prestosql.plugin.hive.azure.HiveAzureConfig;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Parameters;

public class TestHiveFileSystemAbfsOAuth
extends AbstractTestHiveFileSystemAbfs
{
private String endpoint;
private String clientId;
private String secret;

@Parameters({
"hive.hadoop2.metastoreHost",
"hive.hadoop2.metastorePort",
"hive.hadoop2.databaseName",
"test.hive.azure.abfs.container",
"test.hive.azure.abfs.storage-account",
"test.hive.azure.abfs.test-directory",
"test.hive.azure.abfs.oauth.endpoint",
"test.hive.azure.abfs.oauth.client-id",
"test.hive.azure.abfs.oauth.secret",
})
@BeforeClass
public void setup(
String host,
int port,
String databaseName,
String container,
String account,
String testDirectory,
String clientEndpoint,
String clientId,
String clientSecret)
{
this.endpoint = checkParameter(clientEndpoint, "endpoint");
this.clientId = checkParameter(clientId, "client ID");
this.secret = checkParameter(clientSecret, "secret");
super.setup(host, port, databaseName, container, account, testDirectory);
}

@Override
protected HiveAzureConfig getConfig()
{
return new HiveAzureConfig()
.setAbfsOAuthClientEndpoint(endpoint)
.setAbfsOAuthClientId(clientId)
.setAbfsOAuthClientSecret(secret);
}
}
Expand Up @@ -27,6 +27,9 @@ public class HiveAzureConfig
private String adlClientId;
private String adlCredential;
private String adlRefreshUrl;
private String abfsOAuthClientEndpoint;
private String abfsOAuthClientId;
private String abfsOAuthClientSecret;

public Optional<String> getWasbStorageAccount()
{
Expand Down Expand Up @@ -118,4 +121,43 @@ public HiveAzureConfig setAdlRefreshUrl(String adlRefreshUrl)
this.adlRefreshUrl = adlRefreshUrl;
return this;
}

@ConfigSecuritySensitive
@Config("hive.azure.abfs.oauth.endpoint")
public HiveAzureConfig setAbfsOAuthClientEndpoint(String endpoint)
{
abfsOAuthClientEndpoint = endpoint;
return this;
}

public Optional<String> getAbfsOAuthClientEndpoint()
{
return Optional.ofNullable(abfsOAuthClientEndpoint);
}

@ConfigSecuritySensitive
@Config("hive.azure.abfs.oauth.client-id")
public HiveAzureConfig setAbfsOAuthClientId(String id)
{
abfsOAuthClientId = id;
return this;
}

public Optional<String> getAbfsOAuthClientId()
{
return Optional.ofNullable(abfsOAuthClientId);
}

@ConfigSecuritySensitive
@Config("hive.azure.abfs.oauth.secret")
public HiveAzureConfig setAbfsOAuthClientSecret(String secret)
{
abfsOAuthClientSecret = secret;
return this;
}

public Optional<String> getAbfsOAuthClientSecret()
{
return Optional.ofNullable(abfsOAuthClientSecret);
}
}
Expand Up @@ -35,6 +35,9 @@ public class PrestoAzureConfigurationInitializer
private final Optional<String> adlRefreshUrl;
private final Optional<String> abfsAccessKey;
private final Optional<String> abfsStorageAccount;
private final Optional<String> abfsOAuthClientEndpoint;
private final Optional<String> abfsOAuthClientId;
private final Optional<String> abfsOAuthClientSecret;

@Inject
public PrestoAzureConfigurationInitializer(HiveAzureConfig config)
Expand Down Expand Up @@ -63,6 +66,19 @@ public PrestoAzureConfigurationInitializer(HiveAzureConfig config)
adlClientId.isPresent() && adlCredential.isPresent() && adlRefreshUrl.isPresent(),
"If any of ADL client ID, credential, and refresh URL are set, all must be set");
}

this.abfsOAuthClientEndpoint = dropEmpty(config.getAbfsOAuthClientEndpoint());
this.abfsOAuthClientId = dropEmpty(config.getAbfsOAuthClientId());
this.abfsOAuthClientSecret = dropEmpty(config.getAbfsOAuthClientSecret());
if (abfsOAuthClientEndpoint.isPresent() || abfsOAuthClientSecret.isPresent() || abfsOAuthClientId.isPresent()) {
checkArgument(
abfsOAuthClientEndpoint.isPresent() && abfsOAuthClientId.isPresent() && abfsOAuthClientSecret.isPresent(),
"If any of ABFS OAuth2 Client endpoint, ID, and secret are set, all must be set.");
}

checkArgument(
!(abfsAccessKey.isPresent() && abfsOAuthClientSecret.isPresent()),
"Multiple ABFS authentication methods configured: access key and OAuth2");
}

@Override
Expand All @@ -76,6 +92,13 @@ public void initializeConfiguration(Configuration config)
config.set(format("fs.azure.account.key.%s.dfs.core.windows.net", abfsStorageAccount.get()), abfsAccessKey.get());
config.set("fs.abfs.impl", AzureBlobFileSystem.class.getName());
}
if (abfsOAuthClientEndpoint.isPresent() && abfsOAuthClientId.isPresent() && abfsOAuthClientSecret.isPresent()) {
config.set("fs.azure.account.auth.type", "OAuth");
config.set("fs.azure.account.oauth.provider.type", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider");
config.set("fs.azure.account.oauth2.client.endpoint", abfsOAuthClientEndpoint.get());
config.set("fs.azure.account.oauth2.client.id", abfsOAuthClientId.get());
config.set("fs.azure.account.oauth2.client.secret", abfsOAuthClientSecret.get());
}

if (adlClientId.isPresent() && adlCredential.isPresent() && adlRefreshUrl.isPresent()) {
config.set("fs.adl.oauth2.access.token.provider.type", "ClientCredential");
Expand Down
Expand Up @@ -48,6 +48,9 @@ public void testExplicitPropertyMappings()
.put("hive.azure.adl-client-id", "adlclientid")
.put("hive.azure.adl-credential", "adlcredential")
.put("hive.azure.adl-refresh-url", "adlrefreshurl")
.put("hive.azure.abfs.oauth.endpoint", "abfsoauthendpoint")
.put("hive.azure.abfs.oauth.client-id", "abfsoauthclientid")
.put("hive.azure.abfs.oauth.secret", "abfsoauthsecret")
.build();

HiveAzureConfig expected = new HiveAzureConfig()
Expand All @@ -57,7 +60,10 @@ public void testExplicitPropertyMappings()
.setAbfsAccessKey("abfssecret")
.setAdlClientId("adlclientid")
.setAdlCredential("adlcredential")
.setAdlRefreshUrl("adlrefreshurl");
.setAdlRefreshUrl("adlrefreshurl")
.setAbfsOAuthClientEndpoint("abfsoauthendpoint")
.setAbfsOAuthClientId("abfsoauthclientid")
.setAbfsOAuthClientSecret("abfsoauthsecret");

assertFullMapping(properties, expected);
}
Expand Down
Expand Up @@ -48,6 +48,26 @@ public void testAbfsAccessKey()
HiveAzureConfig::setAbfsStorageAccount);
}

@Test
public void testAbfsOAuth()
{
testPropertyGroup(
HiveAzureConfig::setAbfsOAuthClientEndpoint,
HiveAzureConfig::setAbfsOAuthClientId,
HiveAzureConfig::setAbfsOAuthClientSecret);
}

@Test
public void testExclusiveProperties()
{
assertThrows(() -> testProperties(
HiveAzureConfig::setAbfsAccessKey,
HiveAzureConfig::setAbfsStorageAccount,
HiveAzureConfig::setAbfsOAuthClientEndpoint,
HiveAzureConfig::setAbfsOAuthClientId,
HiveAzureConfig::setAbfsOAuthClientSecret));
}

@SafeVarargs
private static void testPropertyGroup(BiConsumer<HiveAzureConfig, String>... setters)
{
Expand All @@ -65,6 +85,12 @@ private static void testPropertyGroup(Set<BiConsumer<HiveAzureConfig, String>> s
}
}

@SafeVarargs
private static void testProperties(BiConsumer<HiveAzureConfig, String>... setters)
{
testProperties(Set.of(setters));
}

private static void testProperties(Set<BiConsumer<HiveAzureConfig, String>> setters)
{
var config = new HiveAzureConfig();
Expand Down

0 comments on commit cc30d16

Please sign in to comment.