Skip to content

Commit

Permalink
[native] Add support for bucketed (but not partitioned) tables
Browse files Browse the repository at this point in the history
  • Loading branch information
aditi-pandit committed Jun 6, 2024
1 parent d899bc0 commit 635335b
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1180,10 +1180,6 @@ HivePrestoToVeloxConnector::toVeloxInsertTableHandle(
bool isPartitioned{false};
const auto inputColumns = toHiveColumns(
hiveOutputTableHandle->inputColumns, typeParser, isPartitioned);
VELOX_USER_CHECK(
hiveOutputTableHandle->bucketProperty == nullptr || isPartitioned,
"Bucketed table must be partitioned: {}",
toJsonString(*hiveOutputTableHandle));
return std::make_unique<velox::connector::hive::HiveInsertTableHandle>(
inputColumns,
toLocationHandle(hiveOutputTableHandle->locationHandle),
Expand All @@ -1208,10 +1204,6 @@ HivePrestoToVeloxConnector::toVeloxInsertTableHandle(
bool isPartitioned{false};
const auto inputColumns = toHiveColumns(
hiveInsertTableHandle->inputColumns, typeParser, isPartitioned);
VELOX_USER_CHECK(
hiveInsertTableHandle->bucketProperty == nullptr || isPartitioned,
"Bucketed table must be partitioned: {}",
toJsonString(*hiveInsertTableHandle));

const auto table = hiveInsertTableHandle->pageSinkMetadata.table;
VELOX_USER_CHECK_NOT_NULL(table, "Table must not be null for insert query");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.createLineitem;
import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.createNation;
import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.createOrders;
import static org.testng.Assert.assertFalse;

public abstract class AbstractTestNativeCtasQueries
extends AbstractTestQueryFramework
Expand Down Expand Up @@ -104,4 +105,42 @@ public void testCreateTableAsSelect()
assertQuery("SELECT * from analyze_test", "SELECT orderstatus FROM orders");
assertUpdate("DROP TABLE analyze_test");
}

@Test
public void testCreateTableAsSelectBucketedTable()
{
assertFalse(getQueryRunner().tableExists(getSession(), "nation_bucketed_ctas"));
assertUpdate(
"CREATE TABLE nation_bucketed_ctas(nationkey, name, comment, regionkey) " +
"WITH ( " +
" bucket_count = 10, bucketed_by = ARRAY['nationkey'], " +
" sorted_by = ARRAY['nationkey'] " +
") " +
"AS " +
"SELECT nationkey, name, comment, regionkey FROM nation",
"SELECT count(*) FROM nation");
assertTableColumnNames("nation_bucketed_ctas", "nationkey", "name", "comment", "regionkey");

assertQuery("SELECT * FROM nation_bucketed_ctas");
assertQuery("SELECT nationkey, sum(regionkey) FROM nation_bucketed_ctas GROUP BY nationkey");
// Check the number of distinct files for orders_bucketed_ctas is the same as the number of buckets.
assertQuery("SELECT count(distinct(\"$path\")) FROM nation_bucketed_ctas", "SELECT CAST(10 AS BIGINT)");
assertUpdate("DROP TABLE nation_bucketed_ctas");
assertFalse(getQueryRunner().tableExists(getSession(), "nation_bucketed_ctas"));

assertFalse(getQueryRunner().tableExists(getSession(), "empty_bucketed_table"));
assertUpdate("" +
"CREATE TABLE empty_bucketed_table " +
"WITH (" +
" bucketed_by = ARRAY[ 'orderkey' ], " +
" bucket_count = 10 " +
") " +
"AS " +
"SELECT orderkey, linenumber, quantity " +
"FROM lineitem " +
"WHERE orderkey < 0", 0);
assertQuery("SELECT count(*) FROM empty_bucketed_table", "SELECT CAST(0 AS BIGINT)");
assertUpdate("DROP TABLE empty_bucketed_table");
assertFalse(getQueryRunner().tableExists(getSession(), "empty_bucketed_table"));
}
}

0 comments on commit 635335b

Please sign in to comment.