Skip to content

Commit

Permalink
Add Iceberg connector smoke tests on Google Cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
alexjo2144 authored and ebyhr committed Sep 2, 2022
1 parent f027041 commit 30ee010
Show file tree
Hide file tree
Showing 5 changed files with 318 additions and 5 deletions.
10 changes: 7 additions & 3 deletions .github/workflows/ci.yml
Expand Up @@ -527,15 +527,19 @@ jobs:
if: matrix.modules == 'plugin/trino-bigquery' && env.BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY != ''
run: |
$MAVEN test ${MAVEN_TEST} -pl :trino-bigquery -Pcloud-tests-case-insensitive-mapping -Dbigquery.credentials-key="${BIGQUERY_CASE_INSENSITIVE_CREDENTIALS_KEY}"
- name: Iceberg Glue Catalog Tests
- name: Iceberg Cloud Tests
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESSKEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRETKEY }}
AWS_REGION: us-east-2
S3_BUCKET: presto-ci-test
if: contains(matrix.modules, 'plugin/trino-iceberg') && (env.AWS_ACCESS_KEY_ID != '' || env.AWS_SECRET_ACCESS_KEY != '')
GCP_CREDENTIALS_KEY: ${{ secrets.GCP_CREDENTIALS_KEY }}
if: contains(matrix.modules, 'plugin/trino-iceberg') && (env.AWS_ACCESS_KEY_ID != '' || env.AWS_SECRET_ACCESS_KEY != '' || env.GCP_CREDENTIALS_KEY != '')
run: |
$MAVEN test ${MAVEN_TEST} -pl :trino-iceberg -P test-glue-catalog -Ds3.bucket=${S3_BUCKET}
$MAVEN test ${MAVEN_TEST} -pl :trino-iceberg -P cloud-tests \
-Ds3.bucket=${S3_BUCKET} \
-Dtesting.gcp-storage-bucket="trino-ci-test-us-east" \
-Dtesting.gcp-credentials-key="${GCP_CREDENTIALS_KEY}"
- name: Sanitize artifact name
if: always()
run: |
Expand Down
10 changes: 9 additions & 1 deletion plugin/trino-iceberg/pom.xml
Expand Up @@ -196,6 +196,12 @@
</dependency>

<!-- used by tests but also needed transitively -->
<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-hadoop-toolkit</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>concurrent</artifactId>
Expand Down Expand Up @@ -412,6 +418,7 @@
<exclude>**/TestSharedGlueMetastore.java</exclude>
<exclude>**/TestIcebergGlueCatalogAccessOperations.java</exclude>
<exclude>**/TestIcebergGlueCatalogMaterializedViewTest.java</exclude>
<exclude>**/TestIcebergGcsConnectorSmokeTest.java</exclude>
</excludes>
</configuration>
</plugin>
Expand All @@ -420,7 +427,7 @@
</profile>

<profile>
<id>test-glue-catalog</id>
<id>cloud-tests</id>
<build>
<plugins>
<plugin>
Expand All @@ -433,6 +440,7 @@
<include>**/TestSharedGlueMetastore.java</include>
<include>**/TestIcebergGlueCatalogAccessOperations.java</include>
<include>**/TestIcebergGlueCatalogMaterializedViewTest.java</include>
<include>**/TestIcebergGcsConnectorSmokeTest.java</include>
</includes>
</configuration>
</plugin>
Expand Down
Expand Up @@ -100,7 +100,7 @@ public void testHiddenPathColumn()
}

// Repeat test with invocationCount for better test coverage, since the tested aspect is inherently non-deterministic.
@Test(timeOut = 60_000, invocationCount = 4)
@Test(timeOut = 120_000, invocationCount = 4)
public void testDeleteRowsConcurrently()
throws Exception
{
Expand Down
@@ -0,0 +1,170 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.iceberg;

import com.google.common.collect.ImmutableMap;
import com.google.common.io.Resources;
import io.airlift.log.Logger;
import io.trino.hadoop.ConfigurationInstantiator;
import io.trino.plugin.hive.containers.HiveHadoop;
import io.trino.plugin.hive.gcs.GoogleGcsConfigurationInitializer;
import io.trino.plugin.hive.gcs.HiveGcsConfig;
import io.trino.testing.QueryRunner;
import io.trino.testing.TestingConnectorBehavior;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.testng.annotations.AfterClass;
import org.testng.annotations.Parameters;
import org.testng.annotations.Test;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.nio.file.attribute.PosixFilePermissions;
import java.util.Base64;

import static io.trino.plugin.hive.containers.HiveHadoop.HIVE3_IMAGE;
import static io.trino.testing.sql.TestTable.randomTableSuffix;
import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;
import static org.apache.iceberg.FileFormat.ORC;

public class TestIcebergGcsConnectorSmokeTest
extends BaseIcebergConnectorSmokeTest
{
private static final Logger LOG = Logger.get(TestIcebergGcsConnectorSmokeTest.class);

private static final FileAttribute<?> READ_ONLY_PERMISSIONS = PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--"));
private final String schema;
private final String gcpStorageBucket;
private final Path gcpCredentialsFile;
private final HiveHadoop hiveHadoop;
private final FileSystem fileSystem;

@Parameters({"testing.gcp-storage-bucket", "testing.gcp-credentials-key"})
public TestIcebergGcsConnectorSmokeTest(String gcpStorageBucket, String gcpCredentialKey)
{
super(ORC);
this.schema = "test_iceberg_gcs_connector_smoke_test_" + randomTableSuffix();
this.gcpStorageBucket = requireNonNull(gcpStorageBucket, "gcpStorageBucket is null");

requireNonNull(gcpCredentialKey, "gcpCredentialKey is null");
InputStream jsonKey = new ByteArrayInputStream(Base64.getDecoder().decode(gcpCredentialKey));
try {
this.gcpCredentialsFile = Files.createTempFile("gcp-credentials", ".json", READ_ONLY_PERMISSIONS);
gcpCredentialsFile.toFile().deleteOnExit();
Files.write(gcpCredentialsFile, jsonKey.readAllBytes());

String gcpSpecificCoreSiteXmlContent = Resources.toString(Resources.getResource("hdp3.1-core-site.xml.gcs-template"), UTF_8)
.replace("%GCP_CREDENTIALS_FILE_PATH%", "/etc/hadoop/conf/gcp-credentials.json");

Path hadoopCoreSiteXmlTempFile = Files.createTempFile("core-site", ".xml", READ_ONLY_PERMISSIONS);
hadoopCoreSiteXmlTempFile.toFile().deleteOnExit();
Files.writeString(hadoopCoreSiteXmlTempFile, gcpSpecificCoreSiteXmlContent);

this.hiveHadoop = closeAfterClass(HiveHadoop.builder()
.withImage(HIVE3_IMAGE)
.withFilesToMount(ImmutableMap.of(
"/etc/hadoop/conf/core-site.xml", hadoopCoreSiteXmlTempFile.normalize().toAbsolutePath().toString(),
"/etc/hadoop/conf/gcp-credentials.json", gcpCredentialsFile.toAbsolutePath().toString()))
.build());
this.hiveHadoop.start();

HiveGcsConfig gcsConfig = new HiveGcsConfig().setJsonKeyFilePath(gcpCredentialsFile.toAbsolutePath().toString());
Configuration configuration = ConfigurationInstantiator.newEmptyConfiguration();
new GoogleGcsConfigurationInitializer(gcsConfig).initializeConfiguration(configuration);

this.fileSystem = FileSystem.newInstance(new URI(schemaUrl()), configuration);
}
catch (IOException e) {
throw new UncheckedIOException(e);
}
catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}

@AfterClass(alwaysRun = true)
public void removeTestData()
{
if (fileSystem != null) {
try {
fileSystem.delete(new org.apache.hadoop.fs.Path(schemaUrl()), true);
}
catch (IOException e) {
// The GCS bucket should be configured to expire objects automatically. Clean up issues do not need to fail the test.
LOG.warn(e, "Failed to clean up GCS test directory: %s", schemaUrl());
}
}
}

@Override
protected boolean hasBehavior(TestingConnectorBehavior connectorBehavior)
{
if (connectorBehavior == TestingConnectorBehavior.SUPPORTS_RENAME_SCHEMA) {
// GCS tests use the Hive Metastore catalog which does not support renaming schemas
return false;
}

return super.hasBehavior(connectorBehavior);
}

@Override
protected QueryRunner createQueryRunner()
throws Exception
{
return IcebergQueryRunner.builder()
.setIcebergProperties(ImmutableMap.<String, String>builder()
.put("iceberg.catalog.type", "hive_metastore")
.put("hive.gcs.json-key-file-path", gcpCredentialsFile.toAbsolutePath().toString())
.put("hive.metastore.uri", "thrift://" + hiveHadoop.getHiveMetastoreEndpoint())
.put("iceberg.file-format", format.name())
.buildOrThrow())
.setSchemaInitializer(
SchemaInitializer.builder()
.withClonedTpchTables(REQUIRED_TPCH_TABLES)
.withSchemaName(schema)
.withSchemaProperties(ImmutableMap.of("location", "'" + schemaUrl() + "'"))
.build())
.build();
}

@Override
protected String createSchemaSql(String schema)
{
return format("CREATE SCHEMA %1$s WITH (location = '%2$s%1$s')", schema, schemaUrl());
}

private String schemaUrl()
{
return format("gs://%s/%s/", gcpStorageBucket, schema);
}

@Test
@Override
public void testRenameSchema()
{
String schemaName = getSession().getSchema().orElseThrow();
assertQueryFails(
format("ALTER SCHEMA %s RENAME TO %s", schemaName, schemaName + randomTableSuffix()),
"Hive metastore does not support renaming schemas");
}
}
@@ -0,0 +1,131 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:9000</value>
</property>

<!-- Google Cloud Storage properties -->
<property>
<name>fs.gs.impl</name>
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value>
<description>The FileSystem for gs: (GCS) uris.</description>
</property>

<property>
<name>fs.AbstractFileSystem.gs.impl</name>
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
<description>
The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2.
</description>
</property>

<property>
<name>google.cloud.auth.service.account.enable</name>
<value>true</value>
<description>
Whether to use a service account for GCS authorizaiton. If an email and
keyfile are provided (see google.cloud.auth.service.account.email and
google.cloud.auth.service.account.keyfile), then that service account
willl be used. Otherwise the connector will look to see if it running on
a GCE VM with some level of GCS access in it's service account scope, and
use that service account.
</description>
</property>

<property>
<name>google.cloud.auth.service.account.json.keyfile</name>
<value>%GCP_CREDENTIALS_FILE_PATH%</value>
<description>
The JSON key file of the service account used for GCS
access when google.cloud.auth.service.account.enable is true.
</description>
</property>

<!-- OOZIE proxy user setting -->
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>

<!-- HTTPFS proxy user setting -->
<property>
<name>hadoop.proxyuser.httpfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.groups</name>
<value>*</value>
</property>

<!-- Llama proxy user setting -->
<property>
<name>hadoop.proxyuser.llama.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.llama.groups</name>
<value>*</value>
</property>

<!-- Hue proxy user setting -->
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>

<!-- Mapred proxy user setting -->
<property>
<name>hadoop.proxyuser.mapred.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mapred.groups</name>
<value>*</value>
</property>

<!-- Hive impersonation -->
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>

<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>

<!-- Hdfs impersonation -->
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>

<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>

</configuration>

0 comments on commit 30ee010

Please sign in to comment.