From 3b65e3884b552ea754b67f492fc1b0696f8aeea7 Mon Sep 17 00:00:00 2001 From: wbond Date: Wed, 14 May 2014 15:46:27 -0400 Subject: [PATCH] Fixed selecting DISTINCT rows from a table when ordering by a column that is not part of the SELECT list where the join creates at least one extra row. While previously the test suite combined with Rails 4.0.0 looked like it worked, the constructed SQL would return incorrect values for FinderTest#test_find_with_order_on_included_associations_with_construct_finder_sql_for_association_limiting_and_is_distinct. By using a GROUP BY and MAX()/MIN() in the ORDER BY with no DISTINCT call, the order of the returned rows was not necessarily correct. With Rails 4.0.1, a related bug was fixed that introduces a new method, columns_for_distinct. Unfortunately by adding the columns from the ORDER BY clause to the SELECT list, the DISTINCT nature of the queries was altered. This combined with a test limiting the results to only 2 records, exposed the bug. From testing various SQL constructs to solve the issue, I ended up determining that to properly return a single value from the related table used in the ORDER BY, a query with a subquery utilizing ROW_NUMBER() and DENSE_RANK() was needed. The ROW_NUMBER() was partitioned over the originally requested columns, and then the outer query would filter the results to just the first row of each partition. DENSE_RANK() was used to generate a proper ordering of the partitions in relation to each other. In the process I also determined that extra code, plus an extra test, was necessary to ensure that when such a construct was generated using both a limit and offset, that the results would be correct also. See https://github.com/rails-sqlserver/activerecord-sqlserver-adapter/issues/306 for further discussion, plus example queries and results. --- .../sqlserver/schema_statements.rb | 15 +- lib/arel/visitors/sqlserver.rb | 146 ++++++++++++++++++ test/cases/finder_test_sqlserver.rb | 32 ++-- 3 files changed, 165 insertions(+), 28 deletions(-) diff --git a/lib/active_record/connection_adapters/sqlserver/schema_statements.rb b/lib/active_record/connection_adapters/sqlserver/schema_statements.rb index c3216ebd1..cab53c5c4 100644 --- a/lib/active_record/connection_adapters/sqlserver/schema_statements.rb +++ b/lib/active_record/connection_adapters/sqlserver/schema_statements.rb @@ -44,17 +44,12 @@ def columns(table_name, _name = nil) end # like postgres, sqlserver requires the ORDER BY columns in the select list for distinct queries, and - # requires that the ORDER BY include the distinct column. - # this method is idental to the postgres method + # requires that the ORDER BY include the distinct column. Unfortunately, sqlserver does not support + # DISTINCT ON () like Posgres, or FIRST_VALUE() like Oracle (at least before SQL Server 2012). Because + # of these facts, we don't actually add any extra columns for distinct, but instead have to create + # a subquery with ROW_NUMBER() and DENSE_RANK() in our monkey-patches to Arel. def columns_for_distinct(columns, orders) #:nodoc: - order_columns = orders.map do |s| - # Convert Arel node to string - s = s.to_sql unless s.is_a?(String) - # Remove any ASC/DESC modifiers - s.gsub(/\s+(ASC|DESC)\s*(NULLS\s+(FIRST|LAST)\s*)?/i, '') - end.reject(&:blank?).map.with_index { |column, i| "#{column} AS alias_#{i}" } - - [super, *order_columns].join(', ') + columns end def rename_table(table_name, new_name) diff --git a/lib/arel/visitors/sqlserver.rb b/lib/arel/visitors/sqlserver.rb index b75a5549b..f800ab0a1 100644 --- a/lib/arel/visitors/sqlserver.rb +++ b/lib/arel/visitors/sqlserver.rb @@ -7,6 +7,8 @@ class SQLServer < Arel::Visitors::ToSql def visit_Arel_Nodes_SelectStatement(o, a) if complex_count_sql?(o) visit_Arel_Nodes_SelectStatementForComplexCount(o, a) + elsif distinct_non_present_orders?(o, a) + visit_Arel_Nodes_SelectStatementDistinctNonPresentOrders(o, a) elsif o.offset visit_Arel_Nodes_SelectStatementWithOffset(o, a) else @@ -47,6 +49,76 @@ def visit_Arel_Nodes_Bin(o, a) # SQLServer ToSql/Visitor (Additions) + # This constructs a query using DENSE_RANK() and ROW_NUMBER() to allow + # ordering a DISTINCT set of data by columns in another table that are + # not part of what we actually want to be DISTINCT. Without this, it is + # possible for the DISTINCT qualifier combined with TOP to return fewer + # rows than were requested. + def visit_Arel_Nodes_SelectStatementDistinctNonPresentOrders(o, a) + core = o.cores.first + projections = core.projections + groups = core.groups + orders = o.orders.uniq + + select_frags = projections.map do |x| + frag = projection_to_sql_remove_distinct(x, core, a) + # Remove the table specifier + frag.gsub!(/^[^\.]*\./, '') + # If there is an alias, remove everything but + frag.gsub(/^.*\sAS\s+/i, '') + end + + if o.offset + select_frags << 'ROW_NUMBER() OVER (ORDER BY __order) AS __offset' + else + select_frags << '__order' + end + + projection_list = projections.map { |x| projection_to_sql_remove_distinct(x, core, a) }.join(', ') + + sql = [ + ('SELECT'), + (visit(core.set_quantifier, a) if core.set_quantifier and !o.offset), + (visit(o.limit, a) if o.limit and !o.offset), + (select_frags.join(', ')), + ('FROM ('), + ('SELECT'), + ( + [ + (projection_list), + (', DENSE_RANK() OVER ('), + ("ORDER BY #{orders.map { |x| visit(x, a) }.join(', ')}" if !orders.empty?), + (') AS __order'), + (', ROW_NUMBER() OVER ('), + ("PARTITION BY #{projection_list}" if !orders.empty?), + (" ORDER BY #{orders.map { |x| visit(x, a) }.join(', ')}" if !orders.empty?), + (') AS __joined_row_num') + ].join('') + ), + (source_with_lock_for_select_statement(o, a)), + ("WHERE #{core.wheres.map { |x| visit(x, a) }.join ' AND ' }" unless core.wheres.empty?), + ("GROUP BY #{groups.map { |x| visit(x, a) }.join ', ' }" unless groups.empty?), + (visit(core.having, a) if core.having), + (') AS __sq'), + ('WHERE __joined_row_num = 1'), + ('ORDER BY __order' unless o.offset) + ].compact.join(' ') + + if o.offset + sql = [ + ('SELECT'), + (visit(core.set_quantifier, a) if core.set_quantifier), + (visit(o.limit, a) if o.limit), + ('*'), + ('FROM (' + sql + ') AS __osq'), + ("WHERE __offset > #{visit(o.offset.expr, a)}"), + ('ORDER BY __offset') + ].join(' ') + end + + sql + end + def visit_Arel_Nodes_SelectStatementWithOutOffset(o, a, windowed = false) find_and_fix_uncorrelated_joins_in_select_statement(o) core = o.cores.first @@ -123,6 +195,18 @@ def visit_Arel_Nodes_SelectStatementForComplexCount(o, a) # SQLServer Helpers + def projection_to_sql_remove_distinct(x, core, a) + frag = Arel.sql(visit(x, a)) + # In Rails 4.0.0, DISTINCT was in a projection, whereas with 4.0.1 + # it is now stored in the set_quantifier. This moves it to the correct + # place so the code works on both 4.0.0 and 4.0.1. + if frag =~ /^\s*DISTINCT\s+/i + core.set_quantifier = Arel::Nodes::Distinct.new() + frag.gsub!(/\s*DISTINCT\s+/, '') + end + frag + end + def source_with_lock_for_select_statement(o, a) core = o.cores.first source = "FROM #{visit(core.source, a).strip}" if core.source @@ -166,6 +250,68 @@ def single_distinct_select_statement?(o) p1.respond_to?(:include?) && p1.include?('DISTINCT')) end + # Determine if the SELECT statement is asking for DISTINCT results, + # but is using columns not part of the SELECT list in the ORDER BY. + # This is necessary because SQL Server requires all ORDER BY entries + # be in the SELECT list with DISTINCT. However, these ordering columns + # can cause duplicate rows, which affect when using a limit. + def distinct_non_present_orders?(o, a) + projections = o.cores.first.projections + + sq = o.cores.first.set_quantifier + p1 = projections.first + + found_distinct = sq and sq.class.to_s =~ /Distinct/ + if (p1.respond_to?(:distinct) && p1.distinct) || (p1.respond_to?(:include?) && p1.include?('DISTINCT')) + found_distinct = true + end + + return false if !found_distinct or o.orders.uniq.empty? + + tables_all_columns = [] + expressions = projections.map do |p| + visit(p, a).split(',').map do |x| + x.strip! + # Rails 4.0.0 included DISTINCT in the first projection + x.gsub!(/\s*DISTINCT\s+/, '') + # Aliased column names + x.gsub!(/\s+AS\s+\w+/i, '') + # Identifier quoting + x.gsub!(/\[|\]/, '') + star_match = /^(\w+)\.\*$/.match(x) + if star_match + tables_all_columns << star_match[1] + end + x.strip.downcase + end.join(', ') + end + + # Make sure each order by is in the select list, otherwise there needs + # to be a subquery with row_numbe() + o.orders.uniq.each do |order| + order = visit(order, a) + order.strip! + + order.gsub!(/\s+(asc|desc)/i, '') + # Identifier quoting + order.gsub!(/\[|\]/, '') + + order.strip! + order.downcase! + + # If we selected all columns from a table, the order is ok + table_match = /^(\w+)\.\w+$/.match(order) + next if table_match and tables_all_columns.include?(table_match[1]) + + next if expressions.include?(order) + + return true + end + + # We didn't find anything in the order by no being selected + false + end + def windowed_single_distinct_select_statement?(o) o.limit && o.offset && diff --git a/test/cases/finder_test_sqlserver.rb b/test/cases/finder_test_sqlserver.rb index 8606e24e1..beb0162be 100644 --- a/test/cases/finder_test_sqlserver.rb +++ b/test/cases/finder_test_sqlserver.rb @@ -10,34 +10,30 @@ class FinderTestSqlserver < ActiveRecord::TestCase end class FinderTest < ActiveRecord::TestCase - fixtures :authors, :author_addresses, :posts + fixtures :authors, :author_addresses, :posts, :categorizations COERCED_TESTS = [ :test_exists_does_not_select_columns_without_alias, :test_string_sanitation, - :test_take_and_first_and_last_with_integer_should_use_sql_limit, - :test_find_with_order_on_included_associations_with_construct_finder_sql_for_association_limiting_and_is_distinct - + :test_take_and_first_and_last_with_integer_should_use_sql_limit ] include SqlserverCoercedTest - # TODO This test passes in rails 4.0.0 but not 4.0.1-2 - def test_coerced_find_with_order_on_included_associations_with_construct_finder_sql_for_association_limiting_and_is_distinct - p = Post.all.merge!(includes: { authors: :author_address }, - order: 'author_addresses.id DESC ', - limit: 2) - # ar_version = Gem.loaded_specs['activerecord'].version.version - # arel_to_png(p, "#{ar_version}") - count = p.to_a.size - #puts "*****#{ActiveRecord::SQLCounter.log_all.join("\n\n")}" - assert_equal 2, count - - assert_equal 3, Post.all.merge!(includes: { author: :author_address, authors: :author_address}, - order: 'author_addresses_authors.id DESC ', limit: 3).to_a.size + def test_find_with_order_on_included_associations_with_construct_finder_sql_for_association_limiting_and_is_distinct_and_offset + # Based on test_find_with_order_on_included_associations_with_construct_finder_sql_for_association_limiting_and_is_distinct + # and issue https://github.com/rails-sqlserver/activerecord-sqlserver-adapter/issues/306 + assert_equal( + posts(:welcome, :thinking), + Post.all.merge!( + :includes => { authors: :author_address }, + :order => ['authors.name DESC'], + :limit => 2, + :offset => 1 + ).to_a + ) end - def test_coerced_exists_does_not_select_columns_without_alias assert_sql(/SELECT TOP \(1\) 1 AS one FROM \[topics\]/i) do Topic.exists?