forked from pat/thinking-sphinx
/
attribute.rb
340 lines (296 loc) · 10.1 KB
/
attribute.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
module ThinkingSphinx
# Attributes - eternally useful when it comes to filtering, sorting or
# grouping. This class isn't really useful to you unless you're hacking
# around with the internals of Thinking Sphinx - but hey, don't let that
# stop you.
#
# One key thing to remember - if you're using the attribute manually to
# generate SQL statements, you'll need to set the base model, and all the
# associations. Which can get messy. Use Index.link!, it really helps.
#
class Attribute < ThinkingSphinx::Property
attr_accessor :query_source
# To create a new attribute, you'll need to pass in either a single Column
# or an array of them, and some (optional) options.
#
# Valid options are:
# - :as => :alias_name
# - :type => :attribute_type
# - :source => :field, :query, :ranged_query
#
# Alias is only required in three circumstances: when there's
# another attribute or field with the same name, when the column name is
# 'id', or when there's more than one column.
#
# Type is not required, unless you want to force a column to be a certain
# type (but keep in mind the value will not be CASTed in the SQL
# statements). The only time you really need to use this is when the type
# can't be figured out by the column - ie: when not actually using a
# database column as your source.
#
# Source is only used for multi-value attributes (MVA). By default this will
# use a left-join and a group_concat to obtain the values. For better performance
# during indexing it can be beneficial to let Sphinx use a separate query to retrieve
# all document,value-pairs.
# Either :query or :ranged_query will enable this feature, where :ranged_query will cause
# the query to be executed incremental.
#
# Example usage:
#
# Attribute.new(
# Column.new(:created_at)
# )
#
# Attribute.new(
# Column.new(:posts, :id),
# :as => :post_ids
# )
#
# Attribute.new(
# Column.new(:posts, :id),
# :as => :post_ids,
# :source => :ranged_query
# )
#
# Attribute.new(
# [Column.new(:pages, :id), Column.new(:articles, :id)],
# :as => :content_ids
# )
#
# Attribute.new(
# Column.new("NOW()"),
# :as => :indexed_at,
# :type => :datetime
# )
#
# If you're creating attributes for latitude and longitude, don't forget
# that Sphinx expects these values to be in radians.
#
def initialize(source, columns, options = {})
super
@type = options[:type]
@query_source = options[:source]
@crc = options[:crc]
@type ||= :multi unless @query_source.nil?
if @type == :string && @crc
@type = is_many? ? :multi : :integer
end
source.attributes << self
end
# Get the part of the SELECT clause related to this attribute. Don't forget
# to set your model and associations first though.
#
# This will concatenate strings and arrays of integers, and convert
# datetimes to timestamps, as needed.
#
def to_select_sql
return nil unless include_as_association?
separator = all_ints? || all_datetimes? || @crc ? ',' : ' '
clause = @columns.collect { |column|
part = column_with_prefix(column)
case type
when :string
adapter.convert_nulls(part)
when :datetime
adapter.cast_to_datetime(part)
else
part
end
}.join(', ')
# clause = adapter.cast_to_datetime(clause) if type == :datetime
clause = adapter.crc(clause) if @crc
clause = adapter.concatenate(clause, separator) if concat_ws?
clause = adapter.group_concatenate(clause, separator) if is_many?
"#{clause} AS #{quote_column(unique_name)}"
end
def type_to_config
{
:multi => :sql_attr_multi,
:datetime => :sql_attr_timestamp,
:string => :sql_attr_str2ordinal,
:float => :sql_attr_float,
:boolean => :sql_attr_bool,
:integer => :sql_attr_uint
}[type]
end
def include_as_association?
! (type == :multi && (query_source == :query || query_source == :ranged_query))
end
# Returns the configuration value that should be used for
# the attribute.
# Special case is the multi-valued attribute that needs some
# extra configuration.
#
def config_value(offset = nil, delta = false)
if type == :multi
multi_config = include_as_association? ? "field" :
source_value(offset, delta).gsub(/\s+/m, " ").strip
"uint #{unique_name} from #{multi_config}"
else
unique_name
end
end
# Returns the type of the column. If that's not already set, it returns
# :multi if there's the possibility of more than one value, :string if
# there's more than one association, otherwise it figures out what the
# actual column's datatype is and returns that.
#
def type
@type ||= begin
base_type = case
when is_many_datetimes?
:datetime
when is_many?, is_many_ints?
:multi
when @associations.values.flatten.length > 1
:string
else
translated_type_from_database
end
if base_type == :string && @crc
base_type = :integer
else
@crc = false unless base_type == :multi && is_many_strings? && @crc
end
base_type
end
end
def updatable?
[:integer, :datetime, :boolean].include?(type) && !is_string?
end
def live_value(instance)
object = instance
column = @columns.first
column.__stack.each { |method| object = object.send(method) }
object.send(column.__name)
end
def all_ints?
all_of_type?(:integer)
end
def all_datetimes?
all_of_type?(:datetime, :date, :timestamp)
end
def all_strings?
all_of_type?(:string, :text)
end
private
def source_value(offset, delta)
if is_string?
return "#{query_source.to_s.dasherize}; #{columns.first.__name}"
end
query = query(offset)
if query_source == :ranged_query
query += query_clause
query += " AND #{query_delta.strip}" if delta
"ranged-query; #{query}; #{range_query}"
else
query += "WHERE #{query_delta.strip}" if delta
"query; #{query}"
end
end
def query(offset)
base_assoc = base_association_for_mva
end_assoc = end_association_for_mva
raise "Could not determine SQL for MVA" if base_assoc.nil?
<<-SQL
SELECT #{foreign_key_for_mva base_assoc}
#{ThinkingSphinx.unique_id_expression(offset)} AS #{quote_column('id')},
#{primary_key_for_mva(end_assoc)} AS #{quote_column(unique_name)}
FROM #{quote_table_name base_assoc.table} #{association_joins}
SQL
end
def query_clause
foreign_key = foreign_key_for_mva base_association_for_mva
"WHERE #{foreign_key} >= $start AND #{foreign_key} <= $end"
end
def query_delta
foreign_key = foreign_key_for_mva base_association_for_mva
<<-SQL
#{foreign_key} IN (SELECT #{quote_column model.primary_key}
FROM #{model.quoted_table_name}
WHERE #{@source.index.delta_object.clause(model, true)})
SQL
end
def range_query
assoc = base_association_for_mva
foreign_key = foreign_key_for_mva assoc
"SELECT MIN(#{foreign_key}), MAX(#{foreign_key}) FROM #{quote_table_name assoc.table}"
end
def primary_key_for_mva(assoc)
quote_with_table(
assoc.table, assoc.primary_key_from_reflection || columns.first.__name
)
end
def foreign_key_for_mva(assoc)
quote_with_table assoc.table, assoc.reflection.primary_key_name
end
def end_association_for_mva
@association_for_mva ||= associations[columns.first].detect { |assoc|
assoc.has_column?(columns.first.__name)
}
end
def base_association_for_mva
@first_association_for_mva ||= begin
assoc = end_association_for_mva
while !assoc.parent.nil?
assoc = assoc.parent
end
assoc
end
end
def association_joins
joins = []
assoc = end_association_for_mva
while assoc != base_association_for_mva
joins << assoc.to_sql
assoc = assoc.parent
end
joins.join(' ')
end
def is_many_ints?
concat_ws? && all_ints?
end
def is_many_datetimes?
is_many? && all_datetimes?
end
def is_many_strings?
is_many? && all_strings?
end
def type_from_database
klass = @associations.values.flatten.first ?
@associations.values.flatten.first.reflection.klass : @model
column = klass.columns.detect { |col|
@columns.collect { |c| c.__name.to_s }.include? col.name
}
column.nil? ? nil : column.type
end
def translated_type_from_database
case type_from_db = type_from_database
when :datetime, :string, :float, :boolean, :integer
type_from_db
when :decimal
:float
when :timestamp, :date
:datetime
else
raise <<-MESSAGE
Cannot automatically map attribute #{unique_name} in #{@model.name} to an
equivalent Sphinx type (integer, float, boolean, datetime, string as ordinal).
You could try to explicitly convert the column's value in your define_index
block:
has "CAST(column AS INT)", :type => :integer, :as => :column
MESSAGE
end
end
def all_of_type?(*column_types)
@columns.all? { |col|
klasses = @associations[col].empty? ? [@model] :
@associations[col].collect { |assoc| assoc.reflection.klass }
klasses.all? { |klass|
column = klass.columns.detect { |column| column.name == col.__name.to_s }
!column.nil? && column_types.include?(column.type)
}
}
end
end
end