Skip to content

Commit

Permalink
fixed join vs distinct facets; fixed join vs implicit groupby
Browse files Browse the repository at this point in the history
  • Loading branch information
glookka committed May 28, 2024
1 parent 0e85b9d commit 997bab9
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 11 deletions.
13 changes: 7 additions & 6 deletions src/joinsorter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,8 @@ JoinSorter_c::JoinSorter_c ( const CSphIndex * pIndex, const CSphIndex * pJoined
bHaveAggregates |= tSorterSchema.GetAttr(i).m_eAggrFunc!=SPH_AGGR_NONE;

CSphVector<std::pair<int,bool>> dRightFilters = FetchJoinRightTableFilters ( m_tQuery.m_dFilters, tSorterSchema, m_tQuery.m_sJoinIdx.cstr() );
m_bFinalCalcOnly = !bJoinedGroupSort && !bHaveAggregates && !dRightFilters.GetLength() && !NeedToMoveMixedJoinFilters ( m_tQuery, tSorterSchema );
bool bDisableByImplicitGrouping = HasImplicitGrouping(m_tQuery) && m_tQuery.m_eJoinType!=JoinType_e::LEFT;
m_bFinalCalcOnly = !bJoinedGroupSort && !bHaveAggregates && !dRightFilters.GetLength() && !NeedToMoveMixedJoinFilters ( m_tQuery, tSorterSchema ) && !pSorter->IsPrecalc() && !bDisableByImplicitGrouping;
m_bErrorFlag = !SetupJoinQuery ( m_pSorter->GetSchema()->GetDynamicSize(), m_sErrorMessage );
}

Expand Down Expand Up @@ -1306,7 +1307,7 @@ void JoinSorter_c::SetupJoinSelectList()
class JoinMultiSorter_c : public JoinSorter_c
{
public:
JoinMultiSorter_c ( const CSphIndex * pIndex, const CSphIndex * pJoinedIndex, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> dSorters, bool bJoinedGroupSort );
JoinMultiSorter_c ( const CSphIndex * pIndex, const CSphIndex * pJoinedIndex, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> dSorters );

bool Push ( const CSphMatch & tEntry ) override;
bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override;
Expand All @@ -1321,8 +1322,8 @@ class JoinMultiSorter_c : public JoinSorter_c
};


JoinMultiSorter_c::JoinMultiSorter_c ( const CSphIndex * pIndex, const CSphIndex * pJoinedIndex, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> dSorters, bool bJoinedGroupSort )
: JoinSorter_c ( pIndex, pJoinedIndex, dQueries, dSorters[0], bJoinedGroupSort )
JoinMultiSorter_c::JoinMultiSorter_c ( const CSphIndex * pIndex, const CSphIndex * pJoinedIndex, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> dSorters )
: JoinSorter_c ( pIndex, pJoinedIndex, dQueries, dSorters[0], true )
{
m_dSorters.Resize ( dSorters.GetLength() );
memcpy ( m_dSorters.Begin(), dSorters.Begin(), dSorters.GetLengthBytes() );
Expand Down Expand Up @@ -1476,7 +1477,7 @@ ISphMatchSorter * CreateJoinSorter ( const CSphIndex * pIndex, const CSphIndex *
}


bool CreateJoinMultiSorter ( const CSphIndex * pIndex, const CSphIndex * pJoinedIndex, const SphQueueSettings_t & tSettings, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> & dSorters, bool bJoinedGroupSort, CSphString & sError )
bool CreateJoinMultiSorter ( const CSphIndex * pIndex, const CSphIndex * pJoinedIndex, const SphQueueSettings_t & tSettings, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> & dSorters, CSphString & sError )
{
if ( !tSettings.m_pJoinArgs )
return true;
Expand All @@ -1486,7 +1487,7 @@ bool CreateJoinMultiSorter ( const CSphIndex * pIndex, const CSphIndex * pJoined

// the idea is that 1st sorter does the join AND it also pushes joined matches to all other sorters
// to avoid double push to 1..N sorters they are wrapped in a class that prevents pushing matches
std::unique_ptr<JoinMultiSorter_c> pJoinSorter = std::make_unique<JoinMultiSorter_c> ( pIndex, pJoinedIndex, dQueries, dSorters, bJoinedGroupSort );
std::unique_ptr<JoinMultiSorter_c> pJoinSorter = std::make_unique<JoinMultiSorter_c> ( pIndex, pJoinedIndex, dQueries, dSorters );
if ( pJoinSorter->GetErrorFlag() )
{
sError = pJoinSorter->GetErrorMessage();
Expand Down
2 changes: 1 addition & 1 deletion src/joinsorter.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ bool NeedToMoveMixedJoinFilters ( const CSphQuery & tQuery, const ISphSchema
std::unique_ptr<ISphFilter> CreateJoinNullFilter ( const CSphFilterSettings & tSettings, const CSphAttrLocator & tNullMapLocator );

ISphMatchSorter * CreateJoinSorter ( const CSphIndex * pIndex, const CSphIndex * pJoinedIndex, const SphQueueSettings_t & tSettings, const CSphQuery & tQuery, ISphMatchSorter * pSorter, bool bJoinedGroupSort, CSphString & sError );
bool CreateJoinMultiSorter ( const CSphIndex * pIndex, const CSphIndex * pJoinedIndex, const SphQueueSettings_t & tSettings, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> & dSorters, bool bJoinedGroupSort, CSphString & sError );
bool CreateJoinMultiSorter ( const CSphIndex * pIndex, const CSphIndex * pJoinedIndex, const SphQueueSettings_t & tSettings, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> & dSorters, CSphString & sError );
9 changes: 6 additions & 3 deletions src/queuecreator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,9 @@ bool QueueCreator_c::SetupGroupbySettings ( bool bHasImplicitGrouping )
return Err ( "group-by attribute '%s' not found", m_tQuery.m_sGroupBy.cstr() );

const CSphColumnInfo & tGroupByAttr = tSchema.GetAttr(iGroupBy);
if ( tGroupByAttr.m_pExpr && tGroupByAttr.m_pExpr->UsesDocstore() )
return Err ( "unable to group by stored field '%s'", m_tQuery.m_sGroupBy.cstr() );

ESphAttr eType = tGroupByAttr.m_eAttrType;
CSphAttrLocator tLoc = tGroupByAttr.m_tLocator;
m_bJoinedGroupSort |= IsJoinAttr ( tGroupByAttr.m_sName );
Expand Down Expand Up @@ -2231,21 +2234,21 @@ int QueueCreator_c::AdjustMaxMatches ( int iMaxMatches ) const
bool QueueCreator_c::CanCalcFastCountDistinct() const
{
bool bHasAggregates = PredictAggregates();
return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.IsEmpty() && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty();
return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.IsEmpty() && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty() && m_tQuery.m_eJoinType!=JoinType_e::INNER;
}


bool QueueCreator_c::CanCalcFastCountFilter() const
{
bool bHasAggregates = PredictAggregates();
return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.GetLength()==1 && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty();
return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.GetLength()==1 && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty() && m_tQuery.m_eJoinType!=JoinType_e::INNER;
}


bool QueueCreator_c::CanCalcFastCount() const
{
bool bHasAggregates = PredictAggregates();
return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.IsEmpty() && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty();
return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.IsEmpty() && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty() && m_tQuery.m_eJoinType!=JoinType_e::INNER;
}


Expand Down
2 changes: 1 addition & 1 deletion src/searchd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5507,7 +5507,7 @@ int SearchHandler_c::CreateMultiQueryOrFacetSorters ( const CSphIndex * pIndex,
return 0;
}

if ( m_bFacetQueue && !CreateJoinMultiSorter ( pIndex, dJoinedIndexes[0], tQueueSettings, m_dNQueries, dSorters, tQueueRes.m_bJoinedGroupSort, dErrors[0] ) )
if ( m_bFacetQueue && !CreateJoinMultiSorter ( pIndex, dJoinedIndexes[0], tQueueSettings, m_dNQueries, dSorters, dErrors[0] ) )
{
dSorters.Apply ( [] ( ISphMatchSorter *& pSorter ) { SafeDelete (pSorter); } );
return 0;
Expand Down
Binary file modified test/test_278/model.bin
Binary file not shown.
37 changes: 37 additions & 0 deletions test/test_278/test.xml
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,47 @@ select * from join1 inner join join2 on join1.string_id = join2.id where id=1 or
<!-- left join with a single right filter -->
select * from join1 left join join2 on join1.id = join2.id where join2.name = 'name1' order by id asc;

<!-- multiple facet -->
select join1.j.table, j.table from join2 join join1 on join1.id = join2.id facet join1.j.table distinct facet j.table;;

<!-- precalc (fast count) sorter -->
select id, count(*) from join1 left join join2 on join1.id=join2.id;

<!-- count(*) sorter vs left join -->
select id, * from join1 left join join2 on join1.id=join2.id;
select id, count(*) from join1 left join join2 on join1.id=join2.id;

<!-- count(*) sorter vs inner join -->
select id, * from join1 join join2 on join1.id=join2.id;
select id, count(*) from join1 join join2 on join1.id=join2.id;

<!-- cleanup -->
drop table join1;
drop table join2;

<!-- populate tables -->
CREATE TABLE customers (name text, email string attribute, address text );

INSERT INTO customers (id, name, email, address) VALUES
(1, 'Alice Johnson', 'alice@example.com', '123 Maple St'),
(2, 'Bob Smith', 'bob@example.com', '456 Oak St'),
(3, 'Carol White', 'carol@example.com', '789 Pine St');

CREATE TABLE orders (customer_id bigint, product text, quantity integer, order_date string, tags multi, details json );

INSERT INTO orders (id, customer_id, product, quantity, order_date, tags, details) VALUES
(1, 1, 'Laptop', 1, '2023-01-01', (101, 102), '{"price":1200, "warranty":"2 years"}'),
(2, 2, 'Phone', 2, '2023-01-02', (103), '{"price":800, "warranty":"1 year"}'),
(3, 1, 'Tablet', 1, '2023-01-03', (101, 104), '{"price":450, "warranty":"1 year"}'),
(4, 3, 'Monitor', 1, '2023-01-04', (105), '{"price":300, "warranty":"1 year"}');

<!-- group by fulltext (error) -->
SELECT name, COUNT(*) FROM customers INNER JOIN orders ON customers.id = orders.customer_id GROUP BY name;

<!-- cleanup -->
DROP TABLE customers;
DROP TABLE orders;

</sphinxql>
</queries>

Expand Down

0 comments on commit 997bab9

Please sign in to comment.