Skip to content

Commit

Permalink
Support for process objectId as date in aggr. algorithm.
Browse files Browse the repository at this point in the history
  • Loading branch information
michaljurecko committed Jul 12, 2017
1 parent 86071d7 commit ad8e23c
Show file tree
Hide file tree
Showing 19 changed files with 594 additions and 59 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:

env:
- MONGODB_VERSION=unstable
- MONGODB_VERSION=3.5.6 COVERALLS="-service=travis-ci" BUILD=true
- MONGODB_VERSION=3.5.9 COVERALLS="-service=travis-ci" BUILD=true
- MONGODB_VERSION=3.4
- MONGODB_VERSION=3.2
- MONGODB_VERSION=3.0
Expand Down
2 changes: 1 addition & 1 deletion _contrib/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: '2'

services:
test_server:
image: mongo:3.5.7
image: mongo:3.5.9
command: mongod --bind_ip 0.0.0.0 --noprealloc --smallfiles --port 27017 --nojournal --oplogSize 16 --noauth
ports:
- "5555:27017"
Expand Down
10 changes: 8 additions & 2 deletions analysis/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,18 @@ const NameSeparator = "."
// ArrayItemMark represents array item in full field name
const ArrayItemMark = "[]"

// AggregationMinVersion is minimal MongoDB version that allows analysis using aggregation framework
var AggregationMinVersion = []int{3, 5, 6}
// AggregationMinVersion is minimal MongoDB version that allows analysis using aggregation framework
var AggregationMinVersion = []int{3, 5, 9}

// AggregationMinVersionStr (string) is minimal MongoDB version that allows analysis using aggregation framework
var AggregationMinVersionStr = "3.5.9"

// RandomSampleMinVersion is minimal MongoDB version that allows analysis using random samples
var RandomSampleMinVersion = []int{3, 2, 0}

// RandomSampleMinVersionStr (string) is minimal MongoDB version that allows analysis using random samples
var RandomSampleMinVersionStr = "3.2.0"

// Analysis consists of the options, target collection and the four contiguous stages.
type Analysis struct {
options *Options // common options
Expand Down
1 change: 1 addition & 0 deletions analysis/stages/03group/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ func NormalizeType(t *analysis.Type, location *time.Location) {
t.ValueHistogram.Start = int64(helpers.ToDouble(t.ValueHistogram.Start))
t.ValueHistogram.End = int64(helpers.ToDouble(t.ValueHistogram.End))
}
case "objectId":
case "date":
{
t.ValueHistogram.Start = helpers.SafeToDate(t.ValueHistogram.Start).In(location)
Expand Down
11 changes: 9 additions & 2 deletions analysis/stages/03group/groupInDB/group_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,24 @@ func GroupStats(p *expr.Pipeline, groupOptions *group.Options) {
)

// ValuesHistogram
var dateTypeCondition interface{}
if groupOptions.ProcessObjectIdAsDate {
dateTypeCondition = expr.In(expr.Field(analysis.BsonId, analysis.BsonFieldType), []interface{}{"objectId", "date"})
} else {
dateTypeCondition = expr.Eq(expr.Field(analysis.BsonId, analysis.BsonFieldType), "date")
}

sw.AddBranch(
expr.Eq(statType, valueHistogram),
bson.M{
analysis.BsonValueHistogram: bson.M{
analysis.BsonHistogramStart: expr.Cond(
expr.Eq(expr.Field(analysis.BsonId, analysis.BsonFieldType), "date"),
dateTypeCondition,
expr.TimestampToDate(expr.Field(analysis.BsonHistogramStart)),
expr.Field(analysis.BsonHistogramStart),
),
analysis.BsonHistogramEnd: expr.Cond(
expr.Eq(expr.Field(analysis.BsonId, analysis.BsonFieldType), "date"),
dateTypeCondition,
expr.TimestampToDate(expr.Field(analysis.BsonHistogramEnd)),
expr.Field(analysis.BsonHistogramEnd),
),
Expand Down
41 changes: 28 additions & 13 deletions analysis/stages/03group/groupInDB/group_values.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
// ...
// ]
func GroupValues(p *expr.Pipeline, options *group.Options) {
filterOutUnnecessaryFields(p, options)
prepareFields(p, options)

fields := bson.M{
analysis.BsonId: bson.M{
Expand Down Expand Up @@ -83,7 +83,10 @@ func GroupValues(p *expr.Pipeline, options *group.Options) {
p.AddStage("group", fields)
}

// Deletes fields that will no longer be needed according to group.Options.
// Prepare fields for grouping:
// - converts the values to the desired format
// - deletes fields that will no longer be needed according to group.Options.
//
// Example results:
// [
// {
Expand All @@ -94,7 +97,7 @@ func GroupValues(p *expr.Pipeline, options *group.Options) {
// }
// ...
// ]
func filterOutUnnecessaryFields(p *expr.Pipeline, options *group.Options) {
func prepareFields(p *expr.Pipeline, options *group.Options) {
var valueProject interface{}
if options.StoreMinMaxAvgValue ||
options.StoreWeekdayHistogram ||
Expand All @@ -104,18 +107,30 @@ func filterOutUnnecessaryFields(p *expr.Pipeline, options *group.Options) {
options.StoreBottomNValues > 0 ||
options.ValueHistogramMaxRes > 0 {

valueProject = expr.Cond(
expr.In(expr.Field(expand.BsonFieldType), group.StoreValueTypes),
typeSw := expr.Switch()
typeSw.SetDefault(expr.Field(expand.BsonValue))

// Convert boolean to 1 or 0 values for average calculation
typeSw.AddBranch(
expr.Eq(expr.Field(expand.BsonFieldType), "bool"),
expr.Cond(
// If bool, convert bool value to int for AVG calculation
expr.Eq(expr.Field(expand.BsonFieldType), "bool"),
expr.Cond(
expr.Eq(expr.Field(expand.BsonValue), true),
1,
0,
),
expr.Field(expand.BsonValue),
expr.Eq(expr.Field(expand.BsonValue), true),
1,
0,
),
)

// Convert objectId to date
if options.ProcessObjectIdAsDate {
typeSw.AddBranch(
expr.Eq(expr.Field(expand.BsonFieldType), "objectId"),
expr.ObjectIdToDate(expr.Field(expand.BsonValue)),
)
}

valueProject = expr.Cond(
expr.In(expr.Field(expand.BsonFieldType), group.StoreValueTypes),
typeSw.Bson(),
expr.Var("REMOVE"), // remove if no longer needed
)
} else {
Expand Down
51 changes: 30 additions & 21 deletions analysis/stages/03group/groupInDB/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,29 @@ import (
"gopkg.in/mgo.v2/bson"
)

func generateHistogram(p *expr.Pipeline, histogramType string, minField string, maxField string, valueField string, valueType string, resolution uint) {
func generateHistogram(p *expr.Pipeline, histogramType string, minField string, maxField string, valueField string, valueType string, resolution uint, processObjectIdAsDate bool) {
nameField := analysis.BsonId + "." + group.BsonFieldName
typeField := analysis.BsonId + "." + analysis.BsonFieldType

var dateTypeCondition interface{}
// Allows objectId to be processed as a date,
// value is converted in "prepareFields" function
if processObjectIdAsDate {
dateTypeCondition = expr.In(valueType, []interface{}{"objectId", "date"})
} else {
dateTypeCondition = expr.Eq(valueType, "date")
}

// Convert min/max date values to timestamp
p.AddStage("project", bson.M{
analysis.BsonId: 1,
minField: expr.Cond(
expr.Eq(valueType, "date"),
dateTypeCondition,
expr.DateToTimestamp(expr.Field(minField)),
expr.Field(minField),
),
maxField: expr.Cond(
expr.Eq(valueType, "date"),
dateTypeCondition,
expr.DateToTimestamp(expr.Field(maxField)),
expr.Field(maxField),
),
Expand All @@ -29,7 +38,7 @@ func generateHistogram(p *expr.Pipeline, histogramType string, minField string,

// range = max - min
p.AddStage("addFields", bson.M{
analysis.BsonHistogramRange: expr.Subtract("$"+maxField, "$"+minField),
analysis.BsonHistogramRange: expr.Subtract(expr.Field(maxField), expr.Field(minField)),
})

// Skip if range == 0
Expand All @@ -38,7 +47,7 @@ func generateHistogram(p *expr.Pipeline, histogramType string, minField string,
})

// Calculate histogram constants (step, range, ...)
calcHistogramConstants(p, minField, maxField, valueType, resolution)
calcHistogramConstants(p, minField, maxField, valueType, resolution, dateTypeCondition)

// Map values to interval
p.AddStage("project", bson.M{
Expand All @@ -48,7 +57,7 @@ func generateHistogram(p *expr.Pipeline, histogramType string, minField string,
analysis.BsonHistogramRange: expr.Field(histogramConstants, analysis.BsonHistogramRange),
analysis.BsonHistogramStep: expr.Field(histogramConstants, analysis.BsonHistogramStep),
analysis.BsonHistogramNumOfSteps: expr.Field(histogramConstants, analysis.BsonHistogramNumOfSteps),
bsonAllValues: mapValueToInterval(valueField, valueType),
bsonAllValues: mapValueToInterval(valueField, valueType, dateTypeCondition),
})

p.AddStage("unwind", expr.Field(bsonAllValues))
Expand Down Expand Up @@ -92,28 +101,28 @@ func generateHistogram(p *expr.Pipeline, histogramType string, minField string,
}

// Calculate histogram constants (step, range, ...) and store them to CONSTANTS field.
func calcHistogramConstants(p *expr.Pipeline, minField string, maxField string, valueType string, resolution uint) {
func calcHistogramConstants(p *expr.Pipeline, minField string, maxField string, valueType string, resolution uint, dateTypeCondition interface{}) {
// Corresponding GO code:
// density := (_resolution - 1) / _range
// shift := math.Pow10(int(math.Floor(math.Log10(density))))
// normDensity := density / shift
// divisor := calcDivisorFromNormDensity(normDensity)
// histogram := group.Histogram{}
// histogram.Step = 1 / (divisor * shift)
//if t == "int" || t == "int" || t == "long" {
// if t == "int" || t == "int" || t == "long" {
// // Decimal steps do not make sense for numbers
// histogram.Step = math.Ceil(histogram.Step)
//} else if t == "date" {
// } else if t == "date" {
// // 1,2,5,10,15,30,60 seconds/minutes ... , 1-24 hours ..., x days
// histogram.Step = ceilDateStep(histogram.Step)
//}
//start := helpers.FloorWithStep(min, histogram.Step)
//end := helpers.CeilWithStep(max, histogram.Step) + histogram.Step
// }
// start := helpers.FloorWithStep(min, histogram.Step)
// end := helpers.CeilWithStep(max, histogram.Step) + histogram.Step
//
//histogram.Start = fromDoubleTo(t, start, groupOptions)
//histogram.End = fromDoubleTo(t, end, groupOptions)
//histogram.Range = end - start
//histogram.NumberOfSteps = uint(histogram.Range / histogram.Step)
// histogram.Start = fromDoubleTo(t, start, groupOptions)
// histogram.End = fromDoubleTo(t, end, groupOptions)
// histogram.Range = end - start
// histogram.NumberOfSteps = uint(histogram.Range / histogram.Step)
p.AddStage("addFields", bson.M{
histogramConstants: expr.Let(
bson.M{histogramDensity: expr.Divide(resolution-1, expr.Field(analysis.BsonHistogramRange))},
Expand All @@ -126,7 +135,7 @@ func calcHistogramConstants(p *expr.Pipeline, minField string, maxField string,
expr.Let(
bson.M{analysis.BsonHistogramStep: expr.Divide(1, expr.Multiply(expr.Var(histogramDivisor), expr.Var(histogramShift)))},
expr.Let(
bson.M{analysis.BsonHistogramStep: ceilStep(expr.Var(analysis.BsonHistogramStep), valueType)},
bson.M{analysis.BsonHistogramStep: ceilStep(expr.Var(analysis.BsonHistogramStep), valueType, dateTypeCondition)},
expr.Let(
bson.M{
analysis.BsonHistogramStart: expr.FloorWithStep(expr.Field(minField), expr.Var(analysis.BsonHistogramStep)),
Expand All @@ -153,7 +162,7 @@ func calcHistogramConstants(p *expr.Pipeline, minField string, maxField string,
}

// Map value to interval according histogram CONSTANTS
func mapValueToInterval(valueField string, valueType string) bson.M {
func mapValueToInterval(valueField string, valueType string, dateTypeCondition interface{}) bson.M {
if valueType == "int" {
return expr.Map(
expr.Field(bsonAllValues),
Expand All @@ -168,7 +177,7 @@ func mapValueToInterval(valueField string, valueType string) bson.M {
}

return expr.Cond(
expr.Eq(valueType, "date"),
dateTypeCondition,
// Date
expr.Map(
expr.Field(bsonAllValues),
Expand All @@ -194,7 +203,7 @@ func mapValueToInterval(valueField string, valueType string) bson.M {
)
}

func ceilStep(step interface{}, t interface{}) bson.M {
func ceilStep(step interface{}, t interface{}, dateTypeCondition interface{}) bson.M {
//if t == "int" || t == "int" || t == "long" {
// // Decimal steps do not make sense for numbers
// histogram.Step = math.Ceil(histogram.Step)
Expand All @@ -219,7 +228,7 @@ func ceilStep(step interface{}, t interface{}) bson.M {

// date
sw.AddBranch(
expr.Eq(t, "date"),
dateTypeCondition,
expr.CeilDateSeconds(step),
)

Expand Down
18 changes: 14 additions & 4 deletions analysis/stages/03group/groupInDB/histogram_date_hour.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,25 @@ import (
)

// DateHourHistogram returns hour histogram histogram calculation pipeline.
func DateHourHistogram(location *time.Location) *expr.Pipeline {
func DateHourHistogram(location *time.Location, options *group.Options) *expr.Pipeline {
p := expr.NewPipeline()

nameField := expr.Field(analysis.BsonId, group.BsonFieldName)
typeField := expr.Field(analysis.BsonId, analysis.BsonFieldType)

p.AddStage("match", bson.M{
(analysis.BsonId + "." + analysis.BsonFieldType): "date",
})
// Allows objectId to be processed as a date,
// value is converted in "prepareFields" function
if options.ProcessObjectIdAsDate {
p.AddStage("match", bson.M{
(analysis.BsonId + "." + analysis.BsonFieldType): bson.M{
"$in": []interface{}{"objectId", "date"},
},
})
} else {
p.AddStage("match", bson.M{
(analysis.BsonId + "." + analysis.BsonFieldType): "date",
})
}

p.AddStage("project", bson.M{
analysis.BsonId: 1,
Expand Down
18 changes: 14 additions & 4 deletions analysis/stages/03group/groupInDB/histogram_date_weekday.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,25 @@ import (
)

// DateWeekdayHistogram returns weekday histogram calculation pipeline.
func DateWeekdayHistogram(location *time.Location) *expr.Pipeline {
func DateWeekdayHistogram(location *time.Location, options *group.Options) *expr.Pipeline {
p := expr.NewPipeline()

nameField := expr.Field(analysis.BsonId, group.BsonFieldName)
typeField := expr.Field(analysis.BsonId, analysis.BsonFieldType)

p.AddStage("match", bson.M{
(analysis.BsonId + "." + analysis.BsonFieldType): "date",
})
// Allows objectId to be processed as a date,
// value is converted in "prepareFields" function
if options.ProcessObjectIdAsDate {
p.AddStage("match", bson.M{
(analysis.BsonId + "." + analysis.BsonFieldType): bson.M{
"$in": []interface{}{"objectId", "date"},
},
})
} else {
p.AddStage("match", bson.M{
(analysis.BsonId + "." + analysis.BsonFieldType): "date",
})
}

p.AddStage("project", bson.M{
analysis.BsonId: 1,
Expand Down
2 changes: 1 addition & 1 deletion analysis/stages/03group/groupInDB/histogram_lengths.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func LengthsHistogram(options *group.Options) *expr.Pipeline {
fieldType: bson.M{"$in": group.LengthHistogramTypes},
})

generateHistogram(p, analysis.BsonLengthHistogram, analysis.BsonMinLength, analysis.BsonMaxLength, expand.BsonLength, "int", options.LengthHistogramMaxRes)
generateHistogram(p, analysis.BsonLengthHistogram, analysis.BsonMinLength, analysis.BsonMaxLength, expand.BsonLength, "int", options.LengthHistogramMaxRes, false)

return p
}
11 changes: 9 additions & 2 deletions analysis/stages/03group/groupInDB/histogram_values.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,19 @@ import (
func ValuesHistogram(options *group.Options) *expr.Pipeline {
p := expr.NewPipeline()

allowedTypes := group.ValueHistogramTypes[:]
// Allows objectId to be processed as a date,
// value is converted in "prepareFields" function
if options.ProcessObjectIdAsDate {
allowedTypes = append(allowedTypes, "objectId")
}

typeField := analysis.BsonId + "." + analysis.BsonFieldType
p.AddStage("match", bson.M{
typeField: bson.M{"$in": group.ValueHistogramTypes},
typeField: bson.M{"$in": allowedTypes},
})

generateHistogram(p, analysis.BsonValueHistogram, analysis.BsonMinValue, analysis.BsonMaxValue, expand.BsonValue, expr.Field(typeField), options.ValueHistogramMaxRes)
generateHistogram(p, analysis.BsonValueHistogram, analysis.BsonMinValue, analysis.BsonMaxValue, expand.BsonValue, expr.Field(typeField), options.ValueHistogramMaxRes, options.ProcessObjectIdAsDate)

return p
}
4 changes: 2 additions & 2 deletions analysis/stages/03group/groupInDB/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ func ComputeStats(p *expr.Pipeline, groupOptions *group.Options, analysisOptions
}

if groupOptions.StoreWeekdayHistogram {
f.AddField(weekdayHistogram, DateWeekdayHistogram(analysisOptions.Location))
f.AddField(weekdayHistogram, DateWeekdayHistogram(analysisOptions.Location, groupOptions))
}

if groupOptions.StoreHourHistogram {
f.AddField(hourHistogram, DateHourHistogram(analysisOptions.Location))
f.AddField(hourHistogram, DateHourHistogram(analysisOptions.Location, groupOptions))
}

p.AddStage("facet", f.GetMap())
Expand Down
Loading

0 comments on commit ad8e23c

Please sign in to comment.