Skip to content

Commit

Permalink
Add DECIMAL to VARCHAR partition/table coercion for hive
Browse files Browse the repository at this point in the history
Add a conversion support for the case when partition has `decimal` type
for a column and table has `varchar` type for same column.

The implementation is consistent with Hive except for truncation. Hive
truncates values when they do not fit the target type, and in this
commit we just fail.
  • Loading branch information
homar authored and findepi committed Sep 21, 2022
1 parent 7dfd4f7 commit 8fe8a91
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 5 deletions.
Expand Up @@ -78,6 +78,7 @@
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToDecimalCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToDoubleCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToRealCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToVarcharCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDoubleToDecimalCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createRealToDecimalCoercer;
import static io.trino.plugin.hive.util.HiveBucketing.getHiveBucket;
Expand Down Expand Up @@ -337,6 +338,9 @@ private static Optional<Function<Block, Block>> createCoercer(TypeManager typeMa
if (fromType instanceof DecimalType && toType == REAL) {
return Optional.of(createDecimalToRealCoercer((DecimalType) fromType));
}
if (fromType instanceof DecimalType && toType instanceof VarcharType) {
return Optional.of(createDecimalToVarcharCoercer((DecimalType) fromType, (VarcharType) toType));
}
if (fromType == DOUBLE && toType instanceof DecimalType) {
return Optional.of(createDoubleToDecimalCoercer((DecimalType) toType));
}
Expand Down
Expand Up @@ -14,15 +14,19 @@

package io.trino.plugin.hive.coercions;

import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.DecimalType;
import io.trino.spi.type.Decimals;
import io.trino.spi.type.DoubleType;
import io.trino.spi.type.Int128;
import io.trino.spi.type.RealType;
import io.trino.spi.type.VarcharType;

import java.util.function.Function;

import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS;
import static io.trino.spi.type.DecimalConversions.doubleToLongDecimal;
import static io.trino.spi.type.DecimalConversions.doubleToShortDecimal;
import static io.trino.spi.type.DecimalConversions.longDecimalToDouble;
Expand All @@ -38,6 +42,8 @@
import static io.trino.spi.type.Decimals.longTenToNth;
import static io.trino.spi.type.DoubleType.DOUBLE;
import static io.trino.spi.type.RealType.REAL;
import static java.lang.Math.min;
import static java.lang.String.format;

public final class DecimalCoercers
{
Expand Down Expand Up @@ -228,6 +234,60 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos
}
}

public static Function<Block, Block> createDecimalToVarcharCoercer(DecimalType fromType, VarcharType toType)
{
if (fromType.isShort()) {
return new ShortDecimalToVarcharCoercer(fromType, toType);
}
return new LongDecimalToVarcharCoercer(fromType, toType);
}

private static class ShortDecimalToVarcharCoercer
extends TypeCoercer<DecimalType, VarcharType>
{
private final int lengthLimit;

protected ShortDecimalToVarcharCoercer(DecimalType fromType, VarcharType toType)
{
super(fromType, toType);
this.lengthLimit = toType.getLength().orElse(Integer.MAX_VALUE);
}

@Override
protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position)
{
String stringValue = Decimals.toString(fromType.getLong(block, position), fromType.getScale());
// Hive truncates digits (also before the decimal point), which can be perceived as a bug
if (stringValue.length() > lengthLimit) {
throw new TrinoException(INVALID_ARGUMENTS, format("Decimal value %s representation exceeds varchar(%s) bounds", stringValue, lengthLimit));
}
toType.writeString(blockBuilder, stringValue.substring(0, min(lengthLimit, stringValue.length())));
}
}

private static class LongDecimalToVarcharCoercer
extends TypeCoercer<DecimalType, VarcharType>
{
private final int lengthLimit;

protected LongDecimalToVarcharCoercer(DecimalType fromType, VarcharType toType)
{
super(fromType, toType);
this.lengthLimit = toType.getLength().orElse(Integer.MAX_VALUE);
}

@Override
protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position)
{
String stringValue = Decimals.toString((Int128) fromType.getObject(block, position), fromType.getScale());
// Hive truncates digits (also before the decimal point), which can be perceived as a bug
if (stringValue.length() > lengthLimit) {
throw new TrinoException(INVALID_ARGUMENTS, format("Decimal value %s representation exceeds varchar(%s) bounds", stringValue, lengthLimit));
}
toType.writeString(blockBuilder, stringValue.substring(0, min(lengthLimit, stringValue.length())));
}
}

public static Function<Block, Block> createDoubleToDecimalCoercer(DecimalType toType)
{
if (toType.isShort()) {
Expand Down
Expand Up @@ -61,7 +61,7 @@ private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType)
toHiveType.equals(HIVE_LONG);
}
if (toType instanceof VarcharType) {
return fromHiveType.equals(HIVE_BYTE) || fromHiveType.equals(HIVE_SHORT) || fromHiveType.equals(HIVE_INT) || fromHiveType.equals(HIVE_LONG);
return fromHiveType.equals(HIVE_BYTE) || fromHiveType.equals(HIVE_SHORT) || fromHiveType.equals(HIVE_INT) || fromHiveType.equals(HIVE_LONG) || fromType instanceof DecimalType;
}
if (fromHiveType.equals(HIVE_BYTE)) {
return toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG);
Expand Down
Expand Up @@ -127,10 +127,14 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui
" longdecimal_to_longdecimal DECIMAL(20,12)," +
//" float_to_decimal " + floatType + "," + // this coercion is not permitted in Hive 3. TODO test this on Hive < 3.
//" double_to_decimal DOUBLE," + // this coercion is not permitted in Hive 3. TODO test this on Hive < 3.
" decimal_to_float DECIMAL(10,5)," +
" decimal_to_double DECIMAL(10,5)," +
" varchar_to_bigger_varchar VARCHAR(3)," +
" varchar_to_smaller_varchar VARCHAR(3)" +
" decimal_to_float DECIMAL(10,5)," +
" decimal_to_double DECIMAL(10,5)," +
" short_decimal_to_varchar DECIMAL(10,5)," +
" long_decimal_to_varchar DECIMAL(20,12)," +
" short_decimal_to_bounded_varchar DECIMAL(10,5)," +
" long_decimal_to_bounded_varchar DECIMAL(20,12)," +
" varchar_to_bigger_varchar VARCHAR(3)," +
" varchar_to_smaller_varchar VARCHAR(3)" +
") " +
"PARTITIONED BY (id BIGINT) " +
rowFormat.map(s -> format("ROW FORMAT %s ", s)).orElse("") +
Expand Down Expand Up @@ -306,6 +310,10 @@ private void doTestHiveCoercion(HiveTableDefinition tableDefinition)
// "double_to_decimal",
"decimal_to_float",
"decimal_to_double",
"short_decimal_to_varchar",
"long_decimal_to_varchar",
"short_decimal_to_bounded_varchar",
"long_decimal_to_bounded_varchar",
"varchar_to_bigger_varchar",
"varchar_to_smaller_varchar",
"id");
Expand Down Expand Up @@ -349,6 +357,10 @@ protected void insertTableRows(String tableName, String floatToDoubleType)
//" DOUBLE '12345.12345', " +
" DECIMAL '12345.12345', " +
" DECIMAL '12345.12345', " +
" DECIMAL '12345.12345', " +
" DECIMAL '12345678.123456123456', " +
" DECIMAL '12345.12345', " +
" DECIMAL '12345678.123456123456', " +
" 'abc', " +
" 'abc', " +
" 1), " +
Expand All @@ -373,6 +385,10 @@ protected void insertTableRows(String tableName, String floatToDoubleType)
//" DOUBLE '-12345.12345', " +
" DECIMAL '-12345.12345', " +
" DECIMAL '-12345.12345', " +
" DECIMAL '-12345.12345', " +
" DECIMAL '-12345678.123456123456', " +
" DECIMAL '-12345.12345', " +
" DECIMAL '-12345678.123456123456', " +
" '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " +
" '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " +
" 1)",
Expand Down Expand Up @@ -480,6 +496,18 @@ protected Map<String, List<Object>> expectedValuesForEngineProvider(Engine engin
.put("decimal_to_double", Arrays.asList(
12345.12345,
-12345.12345))
.put("short_decimal_to_varchar", Arrays.asList(
"12345.12345",
"-12345.12345"))
.put("long_decimal_to_varchar", Arrays.asList(
"12345678.123456123456",
"-12345678.123456123456"))
.put("short_decimal_to_bounded_varchar", Arrays.asList(
"12345.12345",
"12345.12345"))
.put("long_decimal_to_bounded_varchar", Arrays.asList(
"12345678.123456123456",
"-12345678.123456123456"))
.put("varchar_to_bigger_varchar", Arrays.asList(
"abc",
"\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0"))
Expand Down Expand Up @@ -637,6 +665,10 @@ private void assertProperAlteredTableSchema(String tableName)
//row("double_to_decimal", "decimal(10,5)"),
row("decimal_to_float", floatType),
row("decimal_to_double", "double"),
row("short_decimal_to_varchar", "varchar"),
row("long_decimal_to_varchar", "varchar"),
row("short_decimal_to_bounded_varchar", "varchar(30)"),
row("long_decimal_to_bounded_varchar", "varchar(30)"),
row("varchar_to_bigger_varchar", "varchar(4)"),
row("varchar_to_smaller_varchar", "varchar(2)"),
row("id", "bigint"));
Expand Down Expand Up @@ -677,6 +709,10 @@ private void assertColumnTypes(
//.put("double_to_decimal", DECIMAL)
.put("decimal_to_float", floatType)
.put("decimal_to_double", DOUBLE)
.put("short_decimal_to_varchar", VARCHAR)
.put("long_decimal_to_varchar", VARCHAR)
.put("short_decimal_to_bounded_varchar", VARCHAR)
.put("long_decimal_to_bounded_varchar", VARCHAR)
.put("varchar_to_bigger_varchar", VARCHAR)
.put("varchar_to_smaller_varchar", VARCHAR)
.put("id", BIGINT)
Expand Down Expand Up @@ -710,6 +746,10 @@ private static void alterTableColumnTypes(String tableName)
//onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN double_to_decimal double_to_decimal DECIMAL(10,5)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN decimal_to_float decimal_to_float %s", tableName, floatType));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN decimal_to_double decimal_to_double double", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN short_decimal_to_varchar short_decimal_to_varchar string", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN long_decimal_to_varchar long_decimal_to_varchar string", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN short_decimal_to_bounded_varchar short_decimal_to_bounded_varchar varchar(30)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN long_decimal_to_bounded_varchar long_decimal_to_bounded_varchar varchar(30)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_bigger_varchar varchar_to_bigger_varchar varchar(4)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_smaller_varchar varchar_to_smaller_varchar varchar(2)", tableName));
}
Expand Down

0 comments on commit 8fe8a91

Please sign in to comment.