-
Notifications
You must be signed in to change notification settings - Fork 15
REP-6766 Compare documents using a batch cursor #148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f1674d2
ae55af2
fd568f0
e6f34e2
e7841f4
dc91802
77de934
945ada3
1e8c16f
dfcdc05
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ import ( | |
| "bytes" | ||
| "context" | ||
| "fmt" | ||
| "iter" | ||
| "time" | ||
|
|
||
| "github.com/10gen/migration-verifier/chanutil" | ||
|
|
@@ -12,6 +13,7 @@ import ( | |
| "github.com/10gen/migration-verifier/internal/retry" | ||
| "github.com/10gen/migration-verifier/internal/types" | ||
| "github.com/10gen/migration-verifier/internal/util" | ||
| "github.com/10gen/migration-verifier/mmongo/cursor" | ||
| "github.com/10gen/migration-verifier/option" | ||
| "github.com/pkg/errors" | ||
| "go.mongodb.org/mongo-driver/v2/bson" | ||
|
|
@@ -30,6 +32,11 @@ const ( | |
| docKeyInHashedCompare = "k" | ||
| ) | ||
|
|
||
| type seqWithTs struct { | ||
| seq iter.Seq2[bson.Raw, error] | ||
| ts bson.Timestamp | ||
| } | ||
|
|
||
| type docWithTs struct { | ||
| doc bson.Raw | ||
| ts bson.Timestamp | ||
|
|
@@ -45,7 +52,7 @@ func (verifier *Verifier) FetchAndCompareDocuments( | |
| types.ByteCount, | ||
| error, | ||
| ) { | ||
| var srcChannel, dstChannel <-chan docWithTs | ||
| var srcChannel, dstChannel <-chan seqWithTs | ||
| var readSrcCallback, readDstCallback func(context.Context, *retry.FuncInfo) error | ||
|
|
||
| results := []VerificationResult{} | ||
|
|
@@ -100,7 +107,7 @@ func (verifier *Verifier) compareDocsFromChannels( | |
| workerNum int, | ||
| fi *retry.FuncInfo, | ||
| task *VerificationTask, | ||
| srcChannel, dstChannel <-chan docWithTs, | ||
| srcChannel, dstChannel <-chan seqWithTs, | ||
| ) ( | ||
| []VerificationResult, | ||
| types.DocumentCount, | ||
|
|
@@ -204,7 +211,7 @@ func (verifier *Verifier) compareDocsFromChannels( | |
| for !srcClosed || !dstClosed { | ||
| simpleTimerReset(readTimer, readTimeout) | ||
|
|
||
| var srcDocWithTs, dstDocWithTs docWithTs | ||
| var srcDocsWithTs, dstDocsWithTs seqWithTs | ||
|
|
||
| eg, egCtx := contextplus.ErrGroup(ctx) | ||
|
|
||
|
|
@@ -219,21 +226,13 @@ func (verifier *Verifier) compareDocsFromChannels( | |
| "failed to read from source after %s", | ||
| readTimeout, | ||
| ) | ||
| case srcDocWithTs, alive = <-srcChannel: | ||
| case srcDocsWithTs, alive = <-srcChannel: | ||
| if !alive { | ||
| srcClosed = true | ||
| break | ||
| } | ||
|
|
||
| fi.NoteSuccess("received document from source") | ||
|
|
||
| srcDocCount++ | ||
| srcByteCount += types.ByteCount(len(srcDocWithTs.doc)) | ||
| verifier.workerTracker.SetSrcCounts( | ||
| workerNum, | ||
| srcDocCount, | ||
| srcByteCount, | ||
| ) | ||
| } | ||
|
|
||
| return nil | ||
|
|
@@ -251,7 +250,7 @@ func (verifier *Verifier) compareDocsFromChannels( | |
| "failed to read from destination after %s", | ||
| readTimeout, | ||
| ) | ||
| case dstDocWithTs, alive = <-dstChannel: | ||
| case dstDocsWithTs, alive = <-dstChannel: | ||
| if !alive { | ||
| dstClosed = true | ||
| break | ||
|
|
@@ -271,32 +270,72 @@ func (verifier *Verifier) compareDocsFromChannels( | |
| ) | ||
| } | ||
|
|
||
| if srcDocWithTs.doc != nil { | ||
| err := handleNewDoc(srcDocWithTs, true) | ||
| if srcDocsWithTs.seq != nil { | ||
| for doc, err := range srcDocsWithTs.seq { | ||
| if err != nil { | ||
| return nil, 0, 0, errors.Wrapf( | ||
| err, | ||
| "reading batch of docs from source (task: %s)", | ||
| task.PrimaryKey, | ||
| ) | ||
| } | ||
|
|
||
| if err != nil { | ||
| srcDocCount++ | ||
| srcByteCount += types.ByteCount(len(doc)) | ||
| verifier.workerTracker.SetSrcCounts( | ||
| workerNum, | ||
| srcDocCount, | ||
| srcByteCount, | ||
| ) | ||
|
|
||
| return nil, 0, 0, errors.Wrapf( | ||
| err, | ||
| "comparer thread failed to handle %#q's source doc (task: %s) with ID %v", | ||
| namespace, | ||
| task.PrimaryKey, | ||
| srcDocWithTs.doc.Lookup("_id"), | ||
| err := handleNewDoc( | ||
| docWithTs{ | ||
| doc: doc, | ||
| ts: srcDocsWithTs.ts, | ||
| }, | ||
| true, | ||
| ) | ||
|
|
||
| if err != nil { | ||
| return nil, 0, 0, errors.Wrapf( | ||
| err, | ||
| "comparer thread failed to handle %#q's source doc (task: %s) with ID %v", | ||
| namespace, | ||
| task.PrimaryKey, | ||
| doc.Lookup("_id"), | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| } | ||
|
|
||
| if dstDocWithTs.doc != nil { | ||
| err := handleNewDoc(dstDocWithTs, false) | ||
| if dstDocsWithTs.seq != nil { | ||
| for doc, err := range dstDocsWithTs.seq { | ||
| if err != nil { | ||
| return nil, 0, 0, errors.Wrapf( | ||
| err, | ||
| "reading batch of docs from destination (task: %s)", | ||
| task.PrimaryKey, | ||
| ) | ||
| } | ||
|
|
||
| if err != nil { | ||
| return nil, 0, 0, errors.Wrapf( | ||
| err, | ||
| "comparer thread failed to handle %#q's destination doc (task: %s) with ID %v", | ||
| namespace, | ||
| task.PrimaryKey, | ||
| dstDocWithTs.doc.Lookup("_id"), | ||
| err := handleNewDoc( | ||
| docWithTs{ | ||
| doc: doc, | ||
| ts: dstDocsWithTs.ts, | ||
| }, | ||
| false, | ||
| ) | ||
|
|
||
| if err != nil { | ||
| return nil, 0, 0, errors.Wrapf( | ||
| err, | ||
| "comparer thread failed to handle %#q's destination doc (task: %s) with ID %v", | ||
| namespace, | ||
| task.PrimaryKey, | ||
| doc.Lookup("_id"), | ||
| ) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -427,13 +466,13 @@ func simpleTimerReset(t *time.Timer, dur time.Duration) { | |
| func (verifier *Verifier) getFetcherChannelsAndCallbacks( | ||
| task *VerificationTask, | ||
| ) ( | ||
| <-chan docWithTs, | ||
| <-chan docWithTs, | ||
| <-chan seqWithTs, | ||
| <-chan seqWithTs, | ||
| func(context.Context, *retry.FuncInfo) error, | ||
| func(context.Context, *retry.FuncInfo) error, | ||
| ) { | ||
| srcChannel := make(chan docWithTs) | ||
| dstChannel := make(chan docWithTs) | ||
| srcChannel := make(chan seqWithTs) | ||
| dstChannel := make(chan seqWithTs) | ||
|
|
||
| readSrcCallback := func(ctx context.Context, state *retry.FuncInfo) error { | ||
| // We open a session here so that we can read the session’s cluster | ||
|
|
@@ -510,38 +549,44 @@ func (verifier *Verifier) getFetcherChannelsAndCallbacks( | |
| } | ||
|
|
||
| func iterateCursorToChannel( | ||
| sctx context.Context, | ||
| ctx context.Context, | ||
| state *retry.FuncInfo, | ||
| cursor *mongo.Cursor, | ||
| writer chan<- docWithTs, | ||
| myCursor *cursor.BatchCursor, | ||
| writer chan<- seqWithTs, | ||
| ) error { | ||
| defer close(writer) | ||
|
|
||
| sess := mongo.SessionFromContext(sctx) | ||
| for { | ||
| seq := myCursor.GetCurrentBatchIterator() | ||
|
|
||
| for cursor.Next(sctx) { | ||
| state.NoteSuccess("received a document") | ||
|
|
||
| clusterTime, err := util.GetClusterTimeFromSession(sess) | ||
| ct, err := myCursor.GetClusterTime() | ||
| if err != nil { | ||
| return errors.Wrap(err, "reading cluster time from session") | ||
| return errors.Wrap(err, "reading cluster time from batch") | ||
| } | ||
|
|
||
| err = chanutil.WriteWithDoneCheck( | ||
| sctx, | ||
| ctx, | ||
| writer, | ||
| docWithTs{ | ||
| doc: slices.Clone(cursor.Current), | ||
| ts: clusterTime, | ||
| seqWithTs{ | ||
| seq: seq, | ||
| ts: ct, | ||
| }, | ||
| ) | ||
|
|
||
| if err != nil { | ||
| return errors.Wrapf(err, "sending document to compare thread") | ||
| return errors.Wrapf(err, "sending iterator to compare thread") | ||
| } | ||
|
|
||
| if myCursor.IsFinished() { | ||
| return nil | ||
| } | ||
| } | ||
|
|
||
| return errors.Wrap(cursor.Err(), "failed to iterate cursor") | ||
| if err := myCursor.GetNext(ctx); err != nil { | ||
| return errors.Wrap(err, "failed to iterate cursor") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func getMapKey(docKeyValues []bson.RawValue) string { | ||
|
|
@@ -555,8 +600,13 @@ func getMapKey(docKeyValues []bson.RawValue) string { | |
| return keyBuffer.String() | ||
| } | ||
|
|
||
| func (verifier *Verifier) getDocumentsCursor(ctx context.Context, collection *mongo.Collection, clusterInfo *util.ClusterInfo, | ||
| startAtTs *bson.Timestamp, task *VerificationTask) (*mongo.Cursor, error) { | ||
| func (verifier *Verifier) getDocumentsCursor( | ||
| ctx context.Context, | ||
| collection *mongo.Collection, | ||
| clusterInfo *util.ClusterInfo, | ||
| startAtTs *bson.Timestamp, | ||
| task *VerificationTask, | ||
| ) (*cursor.BatchCursor, error) { | ||
| var findOptions bson.D | ||
| runCommandOptions := options.RunCmd() | ||
| var andPredicates bson.A | ||
|
|
@@ -673,7 +723,16 @@ func (verifier *Verifier) getDocumentsCursor(ctx context.Context, collection *mo | |
| } | ||
| } | ||
|
|
||
| return collection.Database().RunCommandCursor(ctx, cmd, runCommandOptions) | ||
| c, err := cursor.New( | ||
| collection.Database(), | ||
| collection.Database().RunCommand(ctx, cmd, runCommandOptions), | ||
| ) | ||
|
|
||
| if err == nil { | ||
| c.SetSession(mongo.SessionFromContext(ctx)) | ||
| } | ||
|
Comment on lines
+731
to
+733
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason why
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had it that way for a bit, but then we’d need 2 constructors (one with a session & the other without).
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I’m starting to wonder, though, if it should always require a session. The driver does some funny-business under the hood to ensure that cursors always send the same LSID as the initial request. |
||
|
|
||
| return c, err | ||
| } | ||
|
|
||
| func transformPipelineForToHashedIndexKey( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.