Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 6 additions & 13 deletions pkg/sql/plan/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,20 +350,13 @@ func UpdateStatsInfo(info *InfoFromZoneMap, tableDef *plan.TableDef, s *pb.Stats
s.MinValMap[colName] = float64(ByteSliceToUint64(info.ColumnZMs[i].GetMinBuf()))
s.MaxValMap[colName] = float64(ByteSliceToUint64(info.ColumnZMs[i].GetMaxBuf()))
case types.T_decimal64:
// Fix: Use Decimal64ToFloat64 with proper scale to handle negative values correctly
// Direct cast to float64 treats negative values (stored as two's complement) as large positive numbers
scale := coldef.Typ.Scale
minDec := types.DecodeDecimal64(info.ColumnZMs[i].GetMinBuf())
maxDec := types.DecodeDecimal64(info.ColumnZMs[i].GetMaxBuf())
s.MinValMap[colName] = types.Decimal64ToFloat64(minDec, scale)
s.MaxValMap[colName] = types.Decimal64ToFloat64(maxDec, scale)
s.MinValMap[colName] = float64(types.DecodeDecimal64(info.ColumnZMs[i].GetMinBuf()))
s.MaxValMap[colName] = float64(types.DecodeDecimal64(info.ColumnZMs[i].GetMaxBuf()))
case types.T_decimal128:
// Fix: Use actual scale from column definition instead of hardcoded 0
scale := coldef.Typ.Scale
minDec := types.DecodeDecimal128(info.ColumnZMs[i].GetMinBuf())
maxDec := types.DecodeDecimal128(info.ColumnZMs[i].GetMaxBuf())
s.MinValMap[colName] = types.Decimal128ToFloat64(minDec, scale)
s.MaxValMap[colName] = types.Decimal128ToFloat64(maxDec, scale)
val := types.DecodeDecimal128(info.ColumnZMs[i].GetMinBuf())
s.MinValMap[colName] = float64(types.Decimal128ToFloat64(val, 0))
val = types.DecodeDecimal128(info.ColumnZMs[i].GetMaxBuf())
s.MaxValMap[colName] = float64(types.Decimal128ToFloat64(val, 0))
}

if info.ShuffleRanges[i] != nil {
Expand Down
246 changes: 0 additions & 246 deletions pkg/sql/plan/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,255 +18,9 @@ import (
"testing"

"github.com/matrixorigin/matrixone/pkg/catalog"
"github.com/matrixorigin/matrixone/pkg/container/types"
"github.com/matrixorigin/matrixone/pkg/pb/plan"
pb "github.com/matrixorigin/matrixone/pkg/pb/statsinfo"
index2 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/index"
"github.com/stretchr/testify/require"
)

// TestUpdateStatsInfo_Decimal64_NegativeValues tests that negative decimal64 values
// are correctly converted to float64 for statistics
func TestUpdateStatsInfo_Decimal64_NegativeValues(t *testing.T) {
// Test case: DECIMAL(10, 2) with negative values
// Example: -123.45 and 456.78
scale := int32(2)

// Create negative decimal: -123.45
negativeValue, err := types.Decimal64FromFloat64(-123.45, 10, scale)
require.NoError(t, err)

// Create positive decimal: 456.78
positiveValue, err := types.Decimal64FromFloat64(456.78, 10, scale)
require.NoError(t, err)

// Create zonemap with negative min and positive max
zm := index2.NewZM(types.T_decimal64, scale)
minBuf := types.EncodeDecimal64(&negativeValue)
maxBuf := types.EncodeDecimal64(&positiveValue)
index2.UpdateZM(zm, minBuf)
index2.UpdateZM(zm, maxBuf)

// Create table definition with decimal64 column
tableDef := &plan.TableDef{
Name: "test_table",
Cols: []*plan.ColDef{
{
Name: "balance",
Typ: plan.Type{
Id: int32(types.T_decimal64),
Scale: scale,
Width: 10,
},
},
{
Name: catalog.Row_ID,
},
},
}

// Create InfoFromZoneMap
info := &InfoFromZoneMap{
ColumnZMs: []index2.ZM{zm},
DataTypes: []types.Type{types.New(types.T_decimal64, 10, scale)},
ColumnNDVs: []float64{2},
NullCnts: []int64{0},
ColumnSize: []int64{8},
ShuffleRanges: []*pb.ShuffleRange{nil},
}

// Create StatsInfo
statsInfo := &pb.StatsInfo{
MinValMap: make(map[string]float64),
MaxValMap: make(map[string]float64),
NdvMap: make(map[string]float64),
DataTypeMap: make(map[string]uint64),
NullCntMap: make(map[string]uint64),
SizeMap: make(map[string]uint64),
ShuffleRangeMap: make(map[string]*pb.ShuffleRange),
}

// Call UpdateStatsInfo
UpdateStatsInfo(info, tableDef, statsInfo)

// Verify results
minVal := statsInfo.MinValMap["balance"]
maxVal := statsInfo.MaxValMap["balance"]

// The key assertion: min should be less than max
require.Less(t, minVal, maxVal, "Min value should be less than max value")

// Verify approximate values (allowing for floating point precision)
require.InDelta(t, -123.45, minVal, 0.01, "Min value should be approximately -123.45")
require.InDelta(t, 456.78, maxVal, 0.01, "Max value should be approximately 456.78")

// Before the fix, minVal would have been a huge positive number like 18446744073514074000
// This check ensures that didn't happen
require.Greater(t, minVal, -1000.0, "Min value should not be an extremely large number")
require.Less(t, minVal, 0.0, "Min value should be negative")
}

// TestUpdateStatsInfo_Decimal128_NegativeValues tests that negative decimal128 values
// are correctly converted with proper scale
func TestUpdateStatsInfo_Decimal128_NegativeValues(t *testing.T) {
// Test case: DECIMAL(20, 4) with negative values
scale := int32(4)

// Create negative decimal: -9876543210.1234
negativeValue, err := types.Decimal128FromFloat64(-9876543210.1234, 20, scale)
require.NoError(t, err)

// Create positive decimal: 1234567890.5678
positiveValue, err := types.Decimal128FromFloat64(1234567890.5678, 20, scale)
require.NoError(t, err)

// Create zonemap
zm := index2.NewZM(types.T_decimal128, scale)
minBuf := types.EncodeDecimal128(&negativeValue)
maxBuf := types.EncodeDecimal128(&positiveValue)
index2.UpdateZM(zm, minBuf)
index2.UpdateZM(zm, maxBuf)

// Create table definition
tableDef := &plan.TableDef{
Name: "test_table",
Cols: []*plan.ColDef{
{
Name: "amount",
Typ: plan.Type{
Id: int32(types.T_decimal128),
Scale: scale,
Width: 20,
},
},
{
Name: catalog.Row_ID,
},
},
}

// Create InfoFromZoneMap
info := &InfoFromZoneMap{
ColumnZMs: []index2.ZM{zm},
DataTypes: []types.Type{types.New(types.T_decimal128, 20, scale)},
ColumnNDVs: []float64{2},
NullCnts: []int64{0},
ColumnSize: []int64{16},
ShuffleRanges: []*pb.ShuffleRange{nil},
}

// Create StatsInfo
statsInfo := &pb.StatsInfo{
MinValMap: make(map[string]float64),
MaxValMap: make(map[string]float64),
NdvMap: make(map[string]float64),
DataTypeMap: make(map[string]uint64),
NullCntMap: make(map[string]uint64),
SizeMap: make(map[string]uint64),
ShuffleRangeMap: make(map[string]*pb.ShuffleRange),
}

// Call UpdateStatsInfo
UpdateStatsInfo(info, tableDef, statsInfo)

// Verify results
minVal := statsInfo.MinValMap["amount"]
maxVal := statsInfo.MaxValMap["amount"]

// The key assertion: min should be less than max
require.Less(t, minVal, maxVal, "Min value should be less than max value")

// Verify approximate values
require.InDelta(t, -9876543210.1234, minVal, 0.01, "Min value should be approximately -9876543210.1234")
require.InDelta(t, 1234567890.5678, maxVal, 0.01, "Max value should be approximately 1234567890.5678")

// Ensure min is negative and within reasonable range
require.Less(t, minVal, 0.0, "Min value should be negative")
}

// TestUpdateStatsInfo_Decimal_DifferentScales tests decimal conversion with various scales
func TestUpdateStatsInfo_Decimal_DifferentScales(t *testing.T) {
testCases := []struct {
name string
scale int32
minFloat float64
maxFloat float64
}{
{"scale_0", 0, -100.0, 200.0},
{"scale_2", 2, -99.99, 199.99},
{"scale_4", 4, -1234.5678, 5678.1234},
{"scale_6", 6, -0.123456, 0.987654},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Create decimal values
minDec, err := types.Decimal64FromFloat64(tc.minFloat, 18, tc.scale)
require.NoError(t, err)

maxDec, err := types.Decimal64FromFloat64(tc.maxFloat, 18, tc.scale)
require.NoError(t, err)

// Create zonemap
zm := index2.NewZM(types.T_decimal64, tc.scale)
minBuf := types.EncodeDecimal64(&minDec)
maxBuf := types.EncodeDecimal64(&maxDec)
index2.UpdateZM(zm, minBuf)
index2.UpdateZM(zm, maxBuf)

// Create table definition
tableDef := &plan.TableDef{
Name: "test_table",
Cols: []*plan.ColDef{
{
Name: "value",
Typ: plan.Type{
Id: int32(types.T_decimal64),
Scale: tc.scale,
Width: 18,
},
},
{
Name: catalog.Row_ID,
},
},
}

// Create InfoFromZoneMap
info := &InfoFromZoneMap{
ColumnZMs: []index2.ZM{zm},
DataTypes: []types.Type{types.New(types.T_decimal64, 18, tc.scale)},
ColumnNDVs: []float64{2},
NullCnts: []int64{0},
ColumnSize: []int64{8},
ShuffleRanges: []*pb.ShuffleRange{nil},
}

// Create StatsInfo
statsInfo := &pb.StatsInfo{
MinValMap: make(map[string]float64),
MaxValMap: make(map[string]float64),
NdvMap: make(map[string]float64),
DataTypeMap: make(map[string]uint64),
NullCntMap: make(map[string]uint64),
SizeMap: make(map[string]uint64),
ShuffleRangeMap: make(map[string]*pb.ShuffleRange),
}

// Call UpdateStatsInfo
UpdateStatsInfo(info, tableDef, statsInfo)

// Verify results
minVal := statsInfo.MinValMap["value"]
maxVal := statsInfo.MaxValMap["value"]

require.Less(t, minVal, maxVal, "Min value should be less than max value")
require.InDelta(t, tc.minFloat, minVal, 0.01, "Min value mismatch")
require.InDelta(t, tc.maxFloat, maxVal, 0.01, "Max value mismatch")
})
}
}

func makeQueryWithScan(tableType string, rowsize float64, blockNum int32) *plan.Query {
n := &plan.Node{
NodeType: plan.Node_TABLE_SCAN,
Expand Down
12 changes: 2 additions & 10 deletions pkg/vm/engine/disttae/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -609,14 +609,6 @@ func getMinMaxValueByFloat64(typ types.Type, buf []byte) float64 {
return float64(types.DecodeTimestamp(buf))
case types.T_datetime:
return float64(types.DecodeDatetime(buf))
case types.T_decimal64:
// Fix: Use Decimal64ToFloat64 to handle negative values correctly
dec := types.DecodeDecimal64(buf)
return types.Decimal64ToFloat64(dec, typ.Scale)
case types.T_decimal128:
// Fix: Use Decimal128ToFloat64 to handle negative values correctly
dec := types.DecodeDecimal128(buf)
return types.Decimal128ToFloat64(dec, typ.Scale)
//case types.T_char, types.T_varchar, types.T_text:
//return float64(plan2.ByteSliceToUint64(buf)), true
default:
Expand Down Expand Up @@ -671,7 +663,7 @@ func updateInfoFromZoneMap(
meta.BlockHeader().BFExtent().Length() + objColMeta.Location().Length())
if info.ColumnNDVs[idx] > 100 || info.ColumnNDVs[idx] > 0.1*float64(meta.BlockHeader().Rows()) {
switch info.DataTypes[idx].Oid {
case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_time, types.T_timestamp, types.T_date, types.T_datetime, types.T_decimal64, types.T_decimal128:
case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_time, types.T_timestamp, types.T_date, types.T_datetime:
info.ShuffleRanges[idx] = plan2.NewShuffleRange(false)
if info.ColumnZMs[idx].IsInited() {
minvalue := getMinMaxValueByFloat64(info.DataTypes[idx], info.ColumnZMs[idx].GetMinBuf())
Expand Down Expand Up @@ -717,7 +709,7 @@ func updateInfoFromZoneMap(
info.ColumnSize[idx] += int64(objColMeta.Location().Length())
if info.ShuffleRanges[idx] != nil {
switch info.DataTypes[idx].Oid {
case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_time, types.T_timestamp, types.T_date, types.T_datetime, types.T_decimal64, types.T_decimal128:
case types.T_int64, types.T_int32, types.T_int16, types.T_uint64, types.T_uint32, types.T_uint16, types.T_time, types.T_timestamp, types.T_date, types.T_datetime:
minvalue := getMinMaxValueByFloat64(info.DataTypes[idx], zm.GetMinBuf())
maxvalue := getMinMaxValueByFloat64(info.DataTypes[idx], zm.GetMaxBuf())
info.ShuffleRanges[idx].Update(minvalue, maxvalue, int64(meta.BlockHeader().Rows()), int64(objColMeta.NullCnt()))
Expand Down
Loading
Loading