Skip to content
Browse files

simplify model, enrich Dataset with segment representation and parent…

… associations
  • Loading branch information...
1 parent dc3d9e7 commit e36d2acde9977dc855efda2f2e0dd814adc85786 @mccraigmccraig committed Mar 26, 2012
Showing with 331 additions and 164 deletions.
  1. +1 −0 lib/mdquery.rb
  2. +122 −27 lib/mdquery/dataset.rb
  3. +10 −27 lib/mdquery/model.rb
  4. +175 −42 spec/mdquery/dataset_spec.rb
  5. +23 −68 spec/mdquery/model_spec.rb
View
1 lib/mdquery.rb
@@ -1,3 +1,4 @@
+$: << File.expand_path('..', __FILE__) if !$:.include?(File.expand_path('..', __FILE__))
require 'mdquery/dsl'
# a DSL for specifying analytic queries
View
149 lib/mdquery/dataset.rb
@@ -4,70 +4,172 @@ module MDQuery
module Dataset
class DimensionValue
- attr_reader :segment_key
+ # DimensionSegment this value belongs to
+ attr_reader :dimension_segment
+
+ # the value
attr_reader :value
+
+ # Optional label for the value
attr_reader :label
- def initialize(attrs)
- MDQuery::Util.assign_attributes(self, attrs, [:segment_key, :value, :label])
+ def initialize(dimension_segment, value, label)
+ @dimension_segment = dimension_segment
+ @value = value
+ @label = label
validate
end
def validate
- raise "no segment_key!" if !segment_key
+ raise "no dimension_segment!" if !dimension_segment
raise "no value!" if !value
end
end
- class Dimension
+ class DimensionSegment
+ # Dimension this Segment belongs to
+ attr_reader :dimension
+
+ # key of segment, unique within Dimension
attr_reader :key
- attr_reader :label
+
+ # ordered list of DimensionValues in segment
+ attr_reader :dimension_values
+
+ # ordered list of all values in segment
attr_reader :values
- attr_reader :label_index
- attr_reader :value_list
- def initialize(attrs)
- MDQuery::Util.assign_attributes(self, attrs, [:key, :label, :values])
+ def initialize(model, dimension)
+ @dimension = dimension
+ @key = model.key
+
+ @values = model.get_values(dimension.dataset.model.source)
+
+ label_index = model.labels(@values)
+ @dimension_values = @values.map{|v| DimensionValue.new(self, v, label_index[v]) }
+ @dimension_value_index = @dimension_values.reduce({}){|dvi,dv| dvi[dv.value] = dv ; dvi}
+
validate
- @value_list = values.map(&:value)
- @label_index = values.reduce({}){|li,dv| li[dv.value] = dv.label ; li}
end
def validate
+ raise "no dimension!" if !dimension
raise "no key!" if !key
- raise "no values!" if !values || values.empty?
+ raise "no values!" if !values
+ end
+
+ def dimension_value_for(value)
+ @dimension_value_index[value]
end
def label_for(value)
- label_index[value]
+ (dv = dimension_value_for(value)) && dv.label
end
+ end
- def values_for_segment(segment_key)
- values.select{|v| v.segment_key == segment_key}
+ class Dimension
+ attr_reader :dataset
+
+ attr_reader :key
+ attr_reader :label
+
+ attr_reader :segments
+
+ # an ordered list of values for the dimension
+ attr_reader :values
+
+ def initialize(model, dataset)
+ @dataset = dataset
+ @key = model.key
+ @label = model.label
+
+ @segments = model.segment_models.map{|sm| DimensionSegment.new(sm, self) }
+ @segment_index = @segments.reduce({}){|si, s| si[s.key] = s ; si}
+
+ @values = segments.map(&:values).reduce(&:+)
+ @dimension_value_index = segments.map(&:dimension_values).reduce(&:+).reduce({}){|dvi,dv| dvi[dv.value] = dv ; dvi}
+
+ validate
end
+ def validate
+ raise "no dataset!" if !dataset
+ raise "no key!" if !key
+ raise "no segments!" if !segments || segments.empty?
+ end
+
+ # lookup a segment by +key+
+ def segment(key)
+ @segment_index[key]
+ end
+
+ # return an ordered list of values for 0 or more segments.
+ # * +segment_keys+ a list of segment keys. if empty, methods returns +values+,
+ # otherwise returns the concatentation of +values+ for each identified segment
def values_for_segments(segment_keys)
if segment_keys && !segment_keys.empty?
- segment_keys.map{|sk| values_for_segment(sk)}.reduce(&:concat)
+ segment_keys.map{|sk| segment(sk)}.map(&:values).reduce(&:+)
else
values
end
end
+
+ # the DimensionValue describing +value+ or nil
+ def dimension_value_for(value)
+ @dimension_value_index[value]
+ end
+
+ # the label for the +value+ or nil
+ def label_for(value)
+ (dv = dimension_value_for(value)) && dv.label
+ end
+ end
+
+ class Measure
+ attr_reader :dataset
+ attr_reader :key
+ attr_reader :definition
+
+ def initialize(model, dataset)
+ @dataset = dataset
+ @key = model.key
+ @definition = model.definition
+ validate
+ end
+
+ def validate
+ raise "no dataset" if !dataset
+ raise "no key!" if !key
+ raise "no definition!" if !definition || definition=~/^\s*$/
+ end
end
class Dataset
attr_reader :model
+
+ attr_reader :data
attr_reader :dimensions
attr_reader :measures
- attr_reader :data
attr_reader :indexed_data
- def initialize(attrs)
- MDQuery::Util.assign_attributes(self, attrs, [:model, :dimensions, :measures, :data])
+ def initialize(model, data)
+ @model = model
+ @data = data
+
+ @measures = model.measure_models.map{|mm| Measure.new(mm, self) }.reduce({}){|mi,m| mi[m.key] = m ; mi}
+ @dimensions = model.dimension_models.map{|dm| Dimension.new(dm, self) }.reduce({}){|di,d| di[d.key] = d ; di}
+
validate
index
end
+ def validate
+ raise "no model!" if !model
+ raise "no data!" if !data
+ raise "no dimensions!" if !dimensions || dimensions.empty?
+ raise "no measures!" if !measures || measures.empty?
+ end
+
# retrieve a datapoint given a hash of {dimension_key=>dimension_values}
def datapoint(dimension_values, measure)
d = @indexed_data[dimension_values]
@@ -76,13 +178,6 @@ def datapoint(dimension_values, measure)
private
- def validate
- raise "no model!" if !model
- raise "no dimensions!" if !dimensions || dimensions.empty?
- raise "no measures!" if !measures || measures.empty?
- raise "no data!" if !data
- end
-
def index_key(point)
Hash[dimensions.keys.map{|k| [k, point[k]]}]
end
View
37 lib/mdquery/model.rb
@@ -99,19 +99,13 @@ def get_values(scope)
end
end
- # map of values to names
+ # map of values to labels
def labels(values)
values.reduce({}) do |labels,value|
labels[value] = (label_proc || DEFAULT_LABEL_PROC).call(value)
labels
end
end
-
- def dimension_values(scope)
- get_values(scope).map{|v| MDQuery::Dataset::DimensionValue.new(:segment_key=>key,
- :value=>v,
- :label=>(label_proc || DEFAULT_LABEL_PROC).call(v))}
- end
end
class DimensionModel
@@ -140,19 +134,6 @@ def inspect
def index_list(prefixes=nil)
(0...segment_models.length).reduce([]){|l, i| l + (prefixes||[[]]).map{|prefix| prefix.clone << i}}
end
-
- def dimension_values(scope)
- segment_models.map do |s|
- s.dimension_values(scope)
- end.reduce(&:concat)
- end
-
- def dimension(scope)
- MDQuery::Dataset::Dimension.new(:key=>key,
- :label=>label,
- :values=>dimension_values(scope))
- end
-
end
class MeasureModel
@@ -237,6 +218,7 @@ def with_regions(&proc)
end
end
+ # construct a query for a region
def construct_query(scope, region_segment_models, measure_models)
narrowed_scope = region_segment_models.reduce(scope){|scope, ds| ds.do_narrow(scope)}
@@ -251,6 +233,7 @@ def construct_query(scope, region_segment_models, measure_models)
narrowed_scope.select(select_string).group(group_string)
end
+ # extract data points from a list of ActiveRecord models
def extract(rows, region_segment_models, measure_models)
rows.map do |row|
dimension_values = region_segment_models.map do |ds|
@@ -264,8 +247,8 @@ def extract(rows, region_segment_models, measure_models)
end
end
-
- def analyse
+ # run the queries defined by the DatasetModel
+ def do_queries
data = []
with_regions do |region_segment_models|
@@ -274,12 +257,12 @@ def analyse
data += points
end
- ds = dimension_models.reduce({}){|h,dm| h[dm.key] = dm.dimension(source) ; h}
+ data
+ end
- MDQuery::Dataset::Dataset.new(:model=>self,
- :dimensions=>ds,
- :measures=>measure_models.map(&:key),
- :data=>data)
+ # run the queries and put the results in a Dataset
+ def analyse
+ MDQuery::Dataset::Dataset.new(self, do_queries)
end
end
end
View
217 spec/mdquery/dataset_spec.rb
@@ -2,68 +2,201 @@
require 'mdquery/dataset'
module MDQuery::Dataset
+
+ describe DimensionSegment do
+
+ def build
+ @model = Object.new
+ mock(@model).key{:foo_segment}
+
+ @dimension = Object.new
+ @source = Object.new
+ mock(@dimension).dataset.mock!.model.mock!.source{@source}
+
+ @values = [:foo, :bar, :baz]
+ mock(@model).get_values(@source){@values}
+ @labels = {:foo=>"foo", :bar=>"BAR", :baz=>"blah"}
+ mock(@model).labels(@values){@labels}
+
+ DimensionSegment.new(@model, @dimension)
+ end
+
+ it "should constract a DimensionSegment from a DimensionSegmentModel" do
+ ds = build
+ ds.dimension.should == @dimension
+ ds.key.should == :foo_segment
+ ds.dimension_values.map(&:dimension_segment).should == [ds, ds, ds]
+ ds.dimension_values.map(&:value).should == [:foo, :bar, :baz]
+ ds.dimension_values.map(&:label).should == ["foo", "BAR", "blah"]
+ ds.values.should == [:foo, :bar, :baz]
+ end
+
+ describe "dimension_value_for" do
+ it "should retrieve a DimensionValue from the segment by value" do
+ ds = build
+ dv0 = ds.dimension_value_for(:foo)
+ dv0.dimension_segment.should == ds
+ dv0.value.should == :foo
+ dv0.label.should == "foo"
+ end
+ end
+
+ describe "label_for" do
+ it "should get a label for a value from the segment" do
+ ds = build
+ ds.label_for(:foo).should == "foo"
+ ds.label_for(:bar).should == "BAR"
+ end
+ end
+ end
+
describe Dimension do
- it "label_for should lookup labels for values" do
- d = Dimension.new(:key=>:foo, :label=>:foo_label,
- :values=>[DimensionValue.new(:segment_key=>:bar, :value=>:barbar, :label=>"BARBAR"),
- DimensionValue.new(:segment_key=>:bar, :value=>:barbarbar, :label=>"BARBARBAR"),
- DimensionValue.new(:segment_key=>:baz, :value=>:bazbaz, :label=>"BAZBAZ")])
- d.label_for(:barbar).should == "BARBAR"
- d.label_for(:barbarbar).should == "BARBARBAR"
- d.label_for(:bazbaz).should == "BAZBAZ"
+ def build
+ @model = Object.new
+ mock(@model).key{:foodim}
+ mock(@model).label{"Dimension Foo"}
+
+ @sm0 = Object.new
+ @segment0 = Object.new
+ stub(@segment0).key{:segment0_key}
+ stub(@segment0).values{[:foo, :bar]}
+ @s0v0 = Object.new
+ stub(@s0v0).label{"Foo"}
+ stub(@s0v0).value{:foo}
+ @s0v1 = Object.new
+ mock(@s0v1).value{:bar}
+ stub(@segment0).dimension_values{[@s0v0, @s0v1]}
+
+ mock(DimensionSegment).new(@sm0, anything){@segment0}
+
+ @sm1 = Object.new
+ @segment1 = Object.new
+ stub(@segment1).key{:segment1_key}
+ stub(@segment1).values{[:baz, :waz]}
+ @s1v0 = Object.new
+ stub(@s1v0).value{:baz}
+ @s1v1 = Object.new
+ stub(@s1v1).value{:waz}
+ stub(@s1v1).label{"WAZ"}
+ stub(@segment1).dimension_values{[@s1v0, @s1v1]}
+
+ mock(DimensionSegment).new(@sm1, anything){@segment1}
+
+ mock(@model).segment_models{[@sm0,@sm1]}
+
+ @dataset = Object.new
+
+ Dimension.new(@model, @dataset)
end
- it "values_for_segment should extract values belonging to a segment" do
- vs = [DimensionValue.new(:segment_key=>:bar, :value=>:barbar, :label=>"BARBAR"),
- DimensionValue.new(:segment_key=>:bar, :value=>:barbarbar, :label=>"BARBARBAR"),
- DimensionValue.new(:segment_key=>:baz, :value=>:bazbaz, :label=>"BAZBAZ")]
+ it "should construct a Dimension from a DimensionModel" do
+ d = build
+ d.dataset.should == @dataset
+ d.key.should == :foodim
+ d.label.should == "Dimension Foo"
+ end
+
+ describe "segment" do
+ it "should retrieve a segment by key" do
+ d = build
+ d.segment(:segment0_key).should == @segment0
+ d.segment(:segment1_key).should == @segment1
+ end
+ end
+
+ describe "values_for_segments" do
+ it "values_for_segment should extract values belonging to a segment" do
+ d = build
+ d.values_for_segments([:segment1_key, :segment0_key]).should == [:baz, :waz, :foo, :bar]
+ end
+ end
- d = Dimension.new(:key=>:foo, :label=>:foo_label, :values=>vs)
- d.values_for_segment(:bar).should == vs[0..1]
+ describe "dimension_value_for" do
+ it "should retrieve the DimensionValue for a segment value" do
+ d = build
+ d.dimension_value_for(:foo).should == @s0v0
+ d.dimension_value_for(:waz).should == @s1v1
+ end
end
- it "values_for_segments should extract values for given segments in given order" do
- vs = [DimensionValue.new(:segment_key=>:bar, :value=>:barbar, :label=>"BARBAR"),
- DimensionValue.new(:segment_key=>:bar, :value=>:barbarbar, :label=>"BARBARBAR"),
- DimensionValue.new(:segment_key=>:baz, :value=>:bazbaz, :label=>"BAZBAZ"),
- DimensionValue.new(:segment_key=>:foo, :value=>:foofoo, :label=>"FOOFOO"),
- DimensionValue.new(:segment_key=>:foo, :value=>:foofoofoo, :label=>"FOOFOOFOO")]
+ describe "label_for" do
+ it "should retrieve the label for a segment value" do
+ d = build
+ d.label_for(:foo).should == "Foo"
+ d.label_for(:waz).should == "WAZ"
+ d.label_for(:blah).should == nil
+ end
+ end
+ end
- d = Dimension.new(:key=>:woot, :label=>:woot_label, :values=>vs)
- d.values_for_segments([:foo, :bar]).should == vs[3..4] + vs[0..1]
+ describe Measure do
+ def build
+ @dataset = Object.new
+ @model = Object.new
+ mock(@model).key{:count}
+ mock(@model).definition{"count(*)"}
+ Measure.new(@model, @dataset)
+ end
+
+ it "should construct a Measure from a MeasureModel" do
+ m = build
+ m.dataset.should == @dataset
+ m.key.should == :count
+ m.definition.should == "count(*)"
end
end
describe Dataset do
- it "should index the dataset" do
- model = Object.new
- foo_dim = Object.new
- bar_dim = Object.new
- dimensions = {:foo=>foo_dim, :bar=>bar_dim}
- count_measure = Object.new
+ def build
+ @data = [{:foo=>10, :bar=>10, :count=>100, :sum=>200}, {:foo=>5, :bar=>4, :count=>10, :sum=>20}]
- ds = Dataset.new(:model => model,
- :dimensions => dimensions,
- :measures => [count_measure],
- :data => [{:foo=>10, :bar=>10, :count=>100}, {:foo=>5, :bar=>4, :count=>10}])
+ @model = Object.new
- ds.model.should == model
- ds.dimensions.should == dimensions
- ds.measures.should == [count_measure]
+ @mm0 = Object.new
+ @mm1 = Object.new
+ stub(@model).measure_models{[@mm0, @mm1]}
- ds.indexed_data.should == {{:foo=>10, :bar=>10}=>{:count=>100}, {:foo=>5, :bar=>4}=>{:count=>10}}
+ @m0 = Object.new
+ stub(@m0).key{:count}
+ @m1 = Object.new
+ stub(@m1).key{:sum}
+ mock(Measure).new(@mm0, anything){@m0}
+ mock(Measure).new(@mm1, anything){@m1}
+
+ @dm0 = Object.new
+ @dm1 = Object.new
+ stub(@model).dimension_models{[@dm0, @dm1]}
+
+ @d0 = Object.new
+ stub(@d0).key{:foo}
+ @d1 = Object.new
+ stub(@d1).key{:bar}
+
+ mock(Dimension).new(@dm0, anything){@d0}
+ mock(Dimension).new(@dm1, anything){@d1}
+
+ Dataset.new(@model, @data)
end
- it "should retrieve datapoints from the index" do
- ds = Dataset.new(:model => Object.new,
- :dimensions => {:foo=>Object.new, :bar=>Object.new},
- :measures => [Object.new],
- :data => [{:foo=>10, :bar=>10, :count=>100}, {:foo=>5, :bar=>4, :count=>10}])
+ it "should construct a Dataset from a DatasetModel" do
+ ds = build
+ ds.data.should == @data
+ ds.dimensions.should == {:foo=>@d0, :bar=>@d1}
+ ds.measures.should == {:count=>@m0, :sum=>@m1}
+ ds.indexed_data.should == {{:foo=>10, :bar=>10}=>{:count=>100, :sum=>200}, {:foo=>5, :bar=>4}=>{:count=>10, :sum=>20}}
+ end
+ it "should index the dataset" do
+ ds = build
+ ds.indexed_data.should == {{:foo=>10, :bar=>10}=>{:count=>100, :sum=>200}, {:foo=>5, :bar=>4}=>{:count=>10, :sum=>20}}
+ end
+
+ it "should retrieve datapoints from the index" do
+ ds = build
ds.datapoint({:foo=>10, :bar=>10}, :count).should == 100
- ds.datapoint({:foo=>5, :bar=>4}, :count).should == 10
+ ds.datapoint({:foo=>5, :bar=>4}, :sum).should == 20
end
end
end
View
91 spec/mdquery/model_spec.rb
@@ -186,20 +186,6 @@ def create(attrs={})
dsm.labels([:foo]).should == {:foo=>"FOO"}
end
end
-
- describe "dimension_values" do
- it "should return a list of DimensionValue objects for each value of get_values(scope)" do
- scope = Object.new
-
- dsm = create
- mock(dsm).get_values(scope){[1,2,3]}
-
- dvs = dsm.dimension_values(scope)
- dvs.map(&:segment_key).should == [:foo, :foo, :foo]
- dvs.map(&:value).should == [1,2,3]
- dvs.map(&:label).should == ["1", "2", "3"]
- end
- end
end
describe DimensionModel do
@@ -214,48 +200,6 @@ def create(attrs={})
dm.index_list([[0],[1]]).should == [[0,0],[1,0],[0,1],[1,1]]
end
end
-
- describe "dimension_values" do
- it "should concatenate values from each of the segment_models" do
- scope = Object.new
- sm1 = Object.new
- sm1dv1 = Object.new
- sm1dv2 = Object.new
- mock(sm1).dimension_values(scope){[sm1dv1, sm1dv2]}
- sm2 = Object.new
- sm2dv1 = Object.new
- sm2dv2 = Object.new
- mock(sm2).dimension_values(scope){[sm2dv1,sm2dv2]}
-
- dm = DimensionModel.new(:key=>:foo, :segment_models=>[sm1, sm2])
-
- dm.dimension_values(scope).should == [sm1dv1,sm1dv2,sm2dv1,sm2dv2]
- end
- end
-
- describe "dimension" do
- it "should return a Dataset::Dimension" do
- scope = Object.new
- sm1 = Object.new
- sm1dv1 = MDQuery::Dataset::DimensionValue.new(:segment_key=>:s1key, :value=>10, :label=>"ten")
- sm1dv2 = MDQuery::Dataset::DimensionValue.new(:segment_key=>:s1key, :value=>20, :label=>"twenty")
- mock(sm1).dimension_values(scope){[sm1dv1, sm1dv2]}
- sm2 = Object.new
- sm2dv1 = MDQuery::Dataset::DimensionValue.new(:segment_key=>:s2key, :value=>1, :label=>"one")
- sm2dv2 = MDQuery::Dataset::DimensionValue.new(:segment_key=>:s2key, :value=>2, :label=>"two")
- mock(sm2).dimension_values(scope){[sm2dv1,sm2dv2]}
-
- dm = DimensionModel.new(:key=>:foo, :segment_models=>[sm1, sm2], :label=>"FOO")
-
- d = dm.dimension(scope)
- d.key.should == :foo
- d.label.should == "FOO"
- d.values.should == [sm1dv1,sm1dv2,sm2dv1,sm2dv2]
- d.values_for_segment(:s1key).should == [sm1dv1, sm1dv2]
- d.values_for_segment(:s2key).should == [sm2dv1, sm2dv2]
- d.values_for_segments([:s2key, :s1key]).should == [sm2dv1, sm2dv2, sm1dv1, sm1dv2]
- end
- end
end
describe MeasureModel do
@@ -475,7 +419,8 @@ def create(attrs={})
end
end
- describe "analyse" do
+
+ describe "do_queries" do
it "should do a query for each region and put the results in a dataset" do
dim0sm0 = Object.new
dim0 = DimensionModel.new(:key=>:foo, :segment_models=>[dim0sm0])
@@ -498,19 +443,29 @@ def create(attrs={})
mock(dm).extract(records, [dim0sm0, dim1sm0], [mm1]){[{:foo=>"foo1", :bar=>"bar1", :count=>10},
{:foo=>"foo2", :bar=>"bar2", :count=>20}]}
- ddim0 = Object.new
- mock(dim0).dimension(dm.source){ddim0}
- ddim1 = Object.new
- mock(dim1).dimension(dm.source){ddim1}
+ data = dm.do_queries
+ data.should == [{:foo=>"foo1", :bar=>"bar1", :count=>10},
+ {:foo=>"foo2", :bar=>"bar2", :count=>20}]
+ end
+ end
+
+ describe "analyse" do
+ it "should do_queries and use the result to constract a Dataset" do
+ dim0sm0 = Object.new
+ dim0 = DimensionModel.new(:key=>:foo, :segment_models=>[dim0sm0])
+ mm1 = MeasureModel.new(:key=>:count, :definition=>"count(*)", :cast=>:int)
+
+ dm = DatasetModel.new(:source=>Object.new,
+ :dimension_models=>[dim0],
+ :measure_models => [mm1])
+
+ dataset = Object.new
+ mock(dm).do_queries{dataset}
+
+ mock(MDQuery::Dataset::Dataset).new(dm, dataset)
- dataset = dm.analyse
+ dm.analyse
- dataset.model.should == dm
- dataset.dimensions.should == {:foo=>ddim0, :bar=>ddim1}
- dataset.measures.should == [:count]
- dataset.data.should == [{:foo=>"foo1", :bar=>"bar1", :count=>10},
- {:foo=>"foo2", :bar=>"bar2", :count=>20}]
- dataset.datapoint({:foo=>"foo1", :bar=>"bar1"}, :count).should == 10
end
end
end

0 comments on commit e36d2ac

Please sign in to comment.
Something went wrong with that request. Please try again.