Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TOOLS-3411 Fix issue where sort order of views was not always preserved on dump #618

Merged
merged 5 commits into from Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/db/namespaces.go
Expand Up @@ -21,7 +21,7 @@ import (
type CollectionInfo struct {
Name string `bson:"name"`
Type string `bson:"type"`
Options bson.M `bson:"options"`
Options bson.D `bson:"options"`
Info bson.M `bson:"info"`
}

Expand Down
6 changes: 3 additions & 3 deletions common/intents/intent.go
Expand Up @@ -59,7 +59,7 @@ type Intent struct {
MetadataLocation string

// Collection options
Options bson.M
Options bson.D

// UUID (for MongoDB 3.6+) as a big-endian hex string
UUID string
Expand Down Expand Up @@ -170,8 +170,8 @@ func (it *Intent) HasSimpleCollation() bool {
if it == nil || it.Options == nil {
return true
}
collation, ok := it.Options["collation"].(bson.D)
if !ok {
collation, err := bsonutil.FindSubdocumentByKey("collation", &it.Options)
if err != nil {
return true
}
localeValue, _ := bsonutil.FindValueByKey("locale", &collation)
Expand Down
4 changes: 2 additions & 2 deletions common/intents/intent_prioritizer_test.go
Expand Up @@ -124,13 +124,13 @@ func TestBySizeAndView(t *testing.T) {
intents := []*Intent{
{C: "non-view2", Size: 32},
{C: "view", Size: 0,
Options: bson.M{"viewOn": true},
Options: bson.D{{Key: "viewOn", Value: true}},
Type: "view",
},
{C: "non-view1", Size: 1024},
{C: "non-view3", Size: 2},
{C: "view", Size: 0,
Options: bson.M{"viewOn": true},
Options: bson.D{{Key: "viewOn", Value: true}},
Type: "view",
},
}
Expand Down
2 changes: 1 addition & 1 deletion mongodump/metadata_dump.go
Expand Up @@ -19,7 +19,7 @@ import (

// Metadata holds information about a collection's options and indexes.
type Metadata struct {
Options bson.M `bson:"options,omitempty"`
Options bson.D `bson:"options,omitempty"`
Indexes []bson.D `bson:"indexes"`
UUID string `bson:"uuid,omitempty"`
CollectionName string `bson:"collectionName"`
Expand Down
16 changes: 11 additions & 5 deletions mongodump/mongodump.go
Expand Up @@ -13,13 +13,15 @@ import (

"github.com/mongodb/mongo-tools/common/archive"
"github.com/mongodb/mongo-tools/common/auth"
"github.com/mongodb/mongo-tools/common/bsonutil"
"github.com/mongodb/mongo-tools/common/db"
"github.com/mongodb/mongo-tools/common/failpoint"
"github.com/mongodb/mongo-tools/common/intents"
"github.com/mongodb/mongo-tools/common/log"
"github.com/mongodb/mongo-tools/common/options"
"github.com/mongodb/mongo-tools/common/progress"
"github.com/mongodb/mongo-tools/common/util"
"github.com/pkg/errors"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"go.mongodb.org/mongo-driver/mongo"
Expand Down Expand Up @@ -585,9 +587,13 @@ func (dump *MongoDump) DumpIntent(intent *intents.Intent, buffer resettableOutpu
switch {
case len(dump.query) > 0:
if intent.IsTimeseries() {
metaKey, ok := intent.Options["timeseries"].(bson.M)["metaField"].(string)
if !ok {
return fmt.Errorf("could not determine the metaField for %s", intent.Namespace())
timeseriesOptions, err := bsonutil.FindSubdocumentByKey("timeseries", &intent.Options)
if err != nil {
return errors.Wrapf(err, "could not find timeseries options for %s", intent.Namespace())
}
metaKey, err := bsonutil.FindStringValueByKey("metaField", &timeseriesOptions)
if err != nil {
return errors.Wrapf(err, "could not determine the metaField for %s", intent.Namespace())
}
for i, predicate := range dump.query {
splitPredicateKey := strings.SplitN(predicate.Key, ".", 2)
Expand All @@ -608,8 +614,8 @@ func (dump *MongoDump) DumpIntent(intent *intents.Intent, buffer resettableOutpu
// special collection, the oplog, and the user is not asking to force table scans.
case dump.storageEngine == storageEngineMMAPV1 && !dump.InputOptions.TableScan &&
!isView && !intent.IsSpecialCollection() && !intent.IsOplog():
autoIndexId, found := intent.Options["autoIndexId"]
if !found || autoIndexId == true {
autoIndexId, err := bsonutil.FindValueByKey("autoIndexId", &intent.Options)
if err != nil || autoIndexId == true {
findQuery.Hint = bson.D{{"_id", 1}}
}
}
Expand Down
106 changes: 105 additions & 1 deletion mongodump/mongodump_test.go
Expand Up @@ -12,7 +12,7 @@ import (
"crypto/sha1"
"encoding/base64"
"fmt"
"github.com/stretchr/testify/require"
"io"
"io/ioutil"
"math/rand"
"os"
Expand All @@ -22,6 +22,9 @@ import (
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/mongodb/mongo-tools/common/archive"
"github.com/mongodb/mongo-tools/common/bsonutil"
"github.com/mongodb/mongo-tools/common/db"
Expand Down Expand Up @@ -109,6 +112,16 @@ func countNonIndexBSONFiles(dir string) (int, error) {
return len(files), nil
}

func assertBSONEqual(t *testing.T, expected, actual any) {
expectedJSON, err := bson.MarshalExtJSONIndent(expected, false, false, "", " ")
require.NoError(t, err)

actualJSON, err := bson.MarshalExtJSONIndent(actual, false, false, "", " ")
require.NoError(t, err)

assert.Equal(t, string(expectedJSON), string(actualJSON))
}

func listNonIndexBSONFiles(dir string) ([]string, error) {
var files []string
matchingFiles, err := getMatchingFiles(dir, ".*\\.bson")
Expand Down Expand Up @@ -2091,3 +2104,94 @@ func TestMongoDumpColumnstoreIndexes(t *testing.T) {
require.Equal(t, count, 1)
}
}

func TestOptionsOrderIsPreserved(t *testing.T) {
testtype.SkipUnlessTestType(t, testtype.IntegrationTestType)
log.SetWriter(io.Discard)

sessionProvider, _, err := testutil.GetBareSessionProvider()
require.NoError(t, err)

collName := "students"
viewName := "studentsView"

pipeline := bson.A{
bson.D{{Key: "$group", Value: bson.D{
{Key: "_id", Value: bson.D{
{Key: "year", Value: "$year"},
{Key: "name", Value: "$name"},
}},
{Key: "highest", Value: bson.D{
{Key: "$max", Value: "$score"},
}},
}}},
bson.D{{Key: "$project", Value: bson.D{
{Key: "_id", Value: 0},
}}},
bson.D{{Key: "$sort", Value: bson.D{
{Key: "year", Value: 1},
{Key: "sID", Value: -1},
{Key: "name", Value: 1},
{Key: "score", Value: 1},
}}},
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that the original bug of reordering is probabilistic, can we shuffle the pipeline in this test to test different orderings?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can make the test reliably fail when there is an issue by just running the body of the test in a loop, which might be a bit easier than trying to shuffle the bson? What do you think about that?

createViewCmd := bson.D{
{"create", viewName},
{"viewOn", collName},
{"pipeline", pipeline},
}

var result bson.D
err = sessionProvider.Run(createViewCmd, &result, testDB)
require.NoError(t, err)

// The check should be run a few times due to the probablistic nature
// of TOOLS-3411
for i := 0; i < 10; i++ {
dumpAndCheckPipelineOrder(t, collName, pipeline)
}

tearDownMongoDumpTestData()
}

func dumpAndCheckPipelineOrder(t *testing.T, collName string, pipeline bson.A) {
md := simpleMongoDumpInstance()

md.ToolOptions.Namespace.DB = testDB
md.OutputOptions.Out = "dump"

require.NoError(t, md.Init())
require.NoError(t, md.Dump())

path, err := os.Getwd()
require.NoError(t, err)

dumpDir := util.ToUniversalPath(filepath.Join(path, "dump"))
dumpDBDir := util.ToUniversalPath(filepath.Join(dumpDir, testDB))
require.True(t, fileDirExists(dumpDir))
require.True(t, fileDirExists(dumpDBDir))

metaFiles, err := getMatchingFiles(dumpDBDir, "studentsView\\.metadata\\.json")
require.NoError(t, err)
require.Equal(t, len(metaFiles), 1)

metaFile, err := os.Open(util.ToUniversalPath(filepath.Join(dumpDBDir, metaFiles[0])))

require.NoError(t, err)
contents, err := io.ReadAll(metaFile)

var bsonResult bson.D
err = bson.UnmarshalExtJSON(contents, true, &bsonResult)
require.NoError(t, err)
options, err := bsonutil.FindSubdocumentByKey("options", &bsonResult)
require.NoError(t, err)

assertBSONEqual(t, options, bson.D{
{Key: "viewOn", Value: collName},
{Key: "pipeline", Value: pipeline},
})

os.RemoveAll(dumpDir)
metaFile.Close()
}
5 changes: 3 additions & 2 deletions mongodump/prepare.go
Expand Up @@ -18,6 +18,7 @@ import (
"strings"

"github.com/mongodb/mongo-tools/common/archive"
"github.com/mongodb/mongo-tools/common/bsonutil"
"github.com/mongodb/mongo-tools/common/db"
"github.com/mongodb/mongo-tools/common/dumprestore"
"github.com/mongodb/mongo-tools/common/intents"
Expand Down Expand Up @@ -354,8 +355,8 @@ func (dump *MongoDump) NewIntentFromOptions(dbName string, ci *db.CollectionInfo
}

if dump.OutputOptions.ViewsAsCollections && ci.IsView() {
delete(intent.Options, "viewOn")
delete(intent.Options, "pipeline")
bsonutil.RemoveKey("viewOn", &intent.Options)
bsonutil.RemoveKey("pipeline", &intent.Options)
}
//Set the MetadataFile path.
if dump.OutputOptions.Archive != "" {
Expand Down
4 changes: 2 additions & 2 deletions mongoexport/mongoexport.go
Expand Up @@ -353,8 +353,8 @@ func (exp *MongoExport) getCursor() (*mongo.Cursor, error) {
!exp.collInfo.IsView() && !exp.collInfo.IsSystemCollection() {

// Don't hint autoIndexId:false collections
autoIndexId, found := exp.collInfo.Options["autoIndexId"]
if !found || autoIndexId == true {
autoIndexId, err := bsonutil.FindValueByKey("autoIndexId", &exp.collInfo.Options)
if err != nil || autoIndexId == true {
findOpts.SetHint(bson.D{{"_id", 1}})
}
}
Expand Down
10 changes: 6 additions & 4 deletions mongorestore/restore.go
Expand Up @@ -195,13 +195,14 @@ func (restore *MongoRestore) PopulateMetadataForIntents() error {
return fmt.Errorf("error parsing metadata from %v: %v", intent.MetadataLocation, err)
}
if metadata != nil {
intent.Options = metadata.Options.Map()
intent.Options = metadata.Options

for _, indexDefinition := range metadata.Indexes {
restore.indexCatalog.AddIndex(intent.DB, intent.C, indexDefinition)
}

if _, ok := intent.Options["timeseries"]; ok {
_, err := bsonutil.FindValueByKey("timeseries", &intent.Options)
if err == nil {
intent.Type = "timeseries"
}

Expand Down Expand Up @@ -323,13 +324,14 @@ func (restore *MongoRestore) RestoreIntent(intent *intents.Intent) Result {

// first create the collection with options from the metadata file
uuid := intent.UUID
options := bsonutil.MtoD(intent.Options)
options := intent.Options
if len(options) == 0 {
logMessageSuffix = "with no metadata"
}

var isClustered bool
if v := intent.Options["clusteredIndex"]; v != nil {
clusteredIndex, err := bsonutil.FindValueByKey("clusteredIndex", &options)
if err == nil && clusteredIndex != nil {
isClustered = true
}

Expand Down