Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

SERVER-5477 DocumentSourceGroup now uses the shardKey to check if it should regroup on the router. #294

Closed
wants to merge 1 commit into from

2 participants

Samuel García Martínez alerner
Samuel García Martínez

SERVER-5477 DocumentSourceGroup now uses the shardKey to check if it should regroup on the router.

SERVER-5477 DocumentSourceGroup now uses the shardKey to check if
it should regroup on the router.
9155866
alerner

Thank you for your contribution and sorry for the late feedback.

The router (mongos) and the aggregation framework aspects that concern
it are undergoing a few changes so the code region this patches
touches is a bit dynamic. In particular we're reviewing the
dependencies/interactions between these two modules. We'd prefer to
change code once that revision is complete.

alerner alerner closed this September 17, 2012
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Showing 1 unique commit by 1 author.

Aug 29, 2012
SERVER-5477 DocumentSourceGroup now uses the shardKey to check if
it should regroup on the router.
9155866
This page is out of date. Refresh to see the latest.
8  src/mongo/db/commands/pipeline_command.cpp
@@ -229,7 +229,7 @@ namespace mongo {
229 229
 
230 230
         /* on the shard servers, create the local pipeline */
231 231
         intrusive_ptr<ExpressionContext> pShardCtx(
232  
-            ExpressionContext::create(&InterruptStatusMongod::status));
  232
+            ExpressionContext::create((BSONObj*)NULL, &InterruptStatusMongod::status));
233 233
         intrusive_ptr<Pipeline> pShardPipeline(
234 234
             Pipeline::parseCommand(errmsg, shardBson, pShardCtx));
235 235
         if (!pShardPipeline.get()) {
@@ -272,8 +272,10 @@ namespace mongo {
272 272
                               BSONObjBuilder &result, bool fromRepl) {
273 273
         // PRINT(cmdObj); // uncomment when debugging
274 274
 
  275
+        string ns(parseNs(db, cmdObj));
  276
+
275 277
         intrusive_ptr<ExpressionContext> pCtx(
276  
-            ExpressionContext::create(&InterruptStatusMongod::status));
  278
+            ExpressionContext::create((BSONObj*)NULL, &InterruptStatusMongod::status));
277 279
 
278 280
         /* try to parse the command; if this fails, then we didn't run */
279 281
         intrusive_ptr<Pipeline> pPipeline(
@@ -281,8 +283,6 @@ namespace mongo {
281 283
         if (!pPipeline.get())
282 284
             return false;
283 285
 
284  
-        string ns(parseNs(db, cmdObj));
285  
-
286 286
         if (pPipeline->isExplain())
287 287
             return runExplain(result, errmsg, ns, db, pPipeline, pCtx);
288 288
         else
43  src/mongo/db/pipeline/document_source_group.cpp
@@ -24,6 +24,7 @@
24 24
 #include "db/pipeline/expression.h"
25 25
 #include "db/pipeline/expression_context.h"
26 26
 #include "db/pipeline/value.h"
  27
+#include "mongo/s/grid.h"
27 28
 
28 29
 namespace mongo {
29 30
     const char DocumentSourceGroup::groupName[] = "$group";
@@ -202,6 +203,7 @@ namespace mongo {
202 203
                         Expression::removeFieldPrefix(groupString));
203 204
                     intrusive_ptr<ExpressionFieldPath> pFieldPath(
204 205
                         ExpressionFieldPath::create(pathString));
  206
+
205 207
                     pGroup->setIdExpression(pFieldPath);
206 208
                     idSet = true;
207 209
                 }
@@ -394,6 +396,47 @@ namespace mongo {
394 396
     }
395 397
 
396 398
     intrusive_ptr<DocumentSource> DocumentSourceGroup::getRouterSource() {
  399
+    	BSONObj* shardKey(pExpCtx->getShardKey());
  400
+
  401
+    	if(shardKey) {
  402
+    		ExpressionObject* objExpression(dynamic_cast<ExpressionObject *>(pIdExpression.get()));
  403
+
  404
+    		if(objExpression) {
  405
+    			BSONObjBuilder objBuilder;
  406
+    			objExpression->documentToBson(&objBuilder, true);
  407
+    			BSONObj groupId = objBuilder.done();
  408
+
  409
+    			//group _id must meet ((shardKey UNION groupId) EQ groupId) so, if groupId has lower fields than the shardKey
  410
+    			//we already know that groupId doesn't meet the requirements.
  411
+    			if(groupId.nFields() >= shardKey->nFields()) {
  412
+					BSONObjIterator iter(groupId);
  413
+					int fieldCount = 0;
  414
+					//group key must be equal that shard key or be a super set which contains the shard key, never can be a subset of it
  415
+					while(iter.more() && fieldCount < shardKey->nFields()) {
  416
+						BSONElement fieldElement(iter.next());
  417
+
  418
+						//there is no need to validate type, since its already done by the Expression parsing process.
  419
+						if(shardKey->hasField(Expression::removeFieldPrefix(fieldElement.String()).c_str())) {
  420
+							fieldCount++;
  421
+						}
  422
+					}
  423
+
  424
+					if(fieldCount == shardKey->nFields()) {
  425
+						return NULL;
  426
+					}
  427
+    			}
  428
+    		}
  429
+    		else {
  430
+    			ExpressionFieldPath* fieldExpression(dynamic_cast<ExpressionFieldPath *>(pIdExpression.get()));
  431
+    			//field path expression can only references one field.
  432
+    			//group _id must meet ((shardKey UNION groupId) EQ groupId)
  433
+    			if(fieldExpression && shardKey->nFields() == 1 &&
  434
+    					shardKey->hasField(fieldExpression->getFieldPath(false).c_str())) {
  435
+					return NULL;
  436
+    			}
  437
+    		}
  438
+    	}
  439
+
397 440
         intrusive_ptr<ExpressionContext> pMergerExpCtx = pExpCtx->clone();
398 441
         pMergerExpCtx->setDoingMerge(true);
399 442
         intrusive_ptr<DocumentSourceGroup> pMerger(DocumentSourceGroup::create(pMergerExpCtx));
9  src/mongo/db/pipeline/expression_context.cpp
@@ -24,11 +24,12 @@ namespace mongo {
24 24
     ExpressionContext::~ExpressionContext() {
25 25
     }
26 26
 
27  
-    inline ExpressionContext::ExpressionContext(InterruptStatus *pS):
  27
+    inline ExpressionContext::ExpressionContext(BSONObj* shardKeyObj, InterruptStatus *pS):
28 28
         doingMerge(false),
29 29
         inShard(false),
30 30
         inRouter(false),
31 31
         intCheckCounter(1),
  32
+        shardKey(shardKeyObj),
32 33
         pStatus(pS) {
33 34
     }
34 35
 
@@ -43,15 +44,15 @@ namespace mongo {
43 44
     }
44 45
 
45 46
     ExpressionContext* ExpressionContext::clone() {
46  
-        ExpressionContext* newContext = create(pStatus);
  47
+        ExpressionContext* newContext = create(getShardKey(), pStatus);
47 48
         newContext->setDoingMerge(getDoingMerge());
48 49
         newContext->setInShard(getInShard());
49 50
         newContext->setInRouter(getInRouter());
50 51
         return newContext;
51 52
     }
52 53
 
53  
-    ExpressionContext *ExpressionContext::create(InterruptStatus *pStatus) {
54  
-        return new ExpressionContext(pStatus);
  54
+    ExpressionContext *ExpressionContext::create(BSONObj* shardKey, InterruptStatus *pStatus) {
  55
+        return new ExpressionContext(shardKey, pStatus);
55 56
     }
56 57
 
57 58
 }
13  src/mongo/db/pipeline/expression_context.h
@@ -17,8 +17,8 @@
17 17
 #pragma once
18 18
 
19 19
 #include "mongo/pch.h"
20  
-
21 20
 #include "util/intrusive_counter.h"
  21
+#include "bson/bsonobj.h"
22 22
 
23 23
 namespace mongo {
24 24
 
@@ -32,10 +32,12 @@ namespace mongo {
32 32
         void setDoingMerge(bool b);
33 33
         void setInShard(bool b);
34 34
         void setInRouter(bool b);
  35
+        void setShardKey(BSONObj* shardKey);
35 36
 
36 37
         bool getDoingMerge() const;
37 38
         bool getInShard() const;
38 39
         bool getInRouter() const;
  40
+        BSONObj* getShardKey();
39 41
 
40 42
         /**
41 43
            Used by a pipeline to check for interrupts so that killOp() works.
@@ -46,15 +48,16 @@ namespace mongo {
46 48
 
47 49
         ExpressionContext* clone();
48 50
 
49  
-        static ExpressionContext *create(InterruptStatus *pStatus);
  51
+        static ExpressionContext *create(BSONObj* shardKey, InterruptStatus *pStatus);
50 52
 
51 53
     private:
52  
-        ExpressionContext(InterruptStatus *pStatus);
  54
+        ExpressionContext(BSONObj* shardKey, InterruptStatus *pStatus);
53 55
         
54 56
         bool doingMerge;
55 57
         bool inShard;
56 58
         bool inRouter;
57 59
         unsigned intCheckCounter; // interrupt check counter
  60
+        BSONObj* shardKey;
58 61
         InterruptStatus *const pStatus;
59 62
     };
60 63
 }
@@ -88,4 +91,8 @@ namespace mongo {
88 91
         return inRouter;
89 92
     }
90 93
 
  94
+    inline BSONObj* ExpressionContext::getShardKey() {
  95
+		return shardKey;
  96
+	}
  97
+
91 98
 };
9  src/mongo/dbtests/accumulatortests.cpp
@@ -28,12 +28,15 @@
28 28
 
29 29
 namespace AccumulatorTests {
30 30
 
  31
+	static const char* const ns = "unittests";
  32
+	static BSONObj* shardKey = NULL;
  33
+
31 34
     class Base {
32 35
     public:
33 36
         Base() :
34  
-            _standalone( ExpressionContext::create( &InterruptStatusMongod::status ) ),
35  
-            _shard( ExpressionContext::create( &InterruptStatusMongod::status ) ),
36  
-            _router( ExpressionContext::create( &InterruptStatusMongod::status ) ) {
  37
+            _standalone( ExpressionContext::create(shardKey, &InterruptStatusMongod::status ) ),
  38
+            _shard( ExpressionContext::create(shardKey, &InterruptStatusMongod::status ) ),
  39
+            _router( ExpressionContext::create(shardKey, &InterruptStatusMongod::status ) ) {
37 40
                 _standalone->setInShard( false );
38 41
                 _standalone->setDoingMerge( false );
39 42
                 _shard->setInShard( true );
175  src/mongo/dbtests/documentsourcetests.cpp
@@ -29,6 +29,7 @@
29 29
 namespace DocumentSourceTests {
30 30
 
31 31
     static const char* const ns = "unittests.documentsourcetests";
  32
+    static BSONObj* shardKey = NULL;
32 33
     static DBDirectClient client;
33 34
 
34 35
     BSONObj toBson( const intrusive_ptr<DocumentSource>& source ) {
@@ -52,7 +53,7 @@ namespace DocumentSourceTests {
52 53
         public:
53 54
             Base() :
54 55
                 CollectionBase(),
55  
-                _ctx( ExpressionContext::create( &InterruptStatusMongod::status ) ) {
  56
+                _ctx( ExpressionContext::create(shardKey, &InterruptStatusMongod::status ) ) {
56 57
             }
57 58
         protected:
58 59
             void createSource() {
@@ -322,16 +323,19 @@ namespace DocumentSourceTests {
322 323
         class Base : public DocumentSourceCursor::Base {
323 324
         protected:
324 325
             void createGroup( const BSONObj &spec, bool inShard = false ) {
325  
-                BSONObj namedSpec = BSON( "$group" << spec );
326  
-                BSONElement specElement = namedSpec.firstElement();
327  
-                intrusive_ptr<ExpressionContext> expressionContext =
328  
-                        ExpressionContext::create( &InterruptStatusMongod::status );
329  
-                if ( inShard ) {
330  
-                    expressionContext->setInShard( true );
331  
-                }
332  
-                _group = DocumentSourceGroup::createFromBson( &specElement, expressionContext );
333  
-                assertRoundTrips( _group );
334  
-                _group->setSource( source() );
  326
+                createGroup(spec, (BSONObj *) NULL, inShard);
  327
+            }
  328
+            void createGroup(const BSONObj &spec, BSONObj* shardKey, bool inShard = false) {
  329
+            	BSONObj namedSpec = BSON( "$group" << spec );
  330
+				BSONElement specElement = namedSpec.firstElement();
  331
+				intrusive_ptr<ExpressionContext> expressionContext =
  332
+						ExpressionContext::create(shardKey, &InterruptStatusMongod::status );
  333
+				if ( inShard ) {
  334
+					expressionContext->setInShard( true );
  335
+				}
  336
+				_group = DocumentSourceGroup::createFromBson( &specElement, expressionContext );
  337
+				assertRoundTrips( _group );
  338
+				_group->setSource( source() );
335 339
             }
336 340
             DocumentSource* group() { return _group.get(); }
337 341
             /** Assert that iterator state accessors consistently report the source is exhausted. */
@@ -767,7 +771,148 @@ namespace DocumentSourceTests {
767 771
             virtual string expectedResultSetString() { return "[{_id:0}]"; }            
768 772
         };
769 773
 
770  
-        /** Simulate merging sharded results in the router. */ 
  774
+		class CheckSplittingBase : public Base {
  775
+		public:
  776
+			virtual ~CheckSplittingBase() {
  777
+			}
  778
+
  779
+			void run() {
  780
+				BSONObj shardingKey(shardKey());
  781
+				createGroup(groupSpec(), &shardingKey, true);
  782
+
  783
+				 // Check if this source is splittable
  784
+				SplittableDocumentSource* splittable =
  785
+					dynamic_cast<SplittableDocumentSource *>(group());
  786
+
  787
+				ASSERT_EQUALS((bool)splittable->getShardSource(), shouldSplitMongod());
  788
+				ASSERT_EQUALS((bool)splittable->getRouterSource(), shouldSplitRouter());
  789
+			}
  790
+		protected:
  791
+			virtual BSONObj shardKey() = 0;
  792
+
  793
+			virtual BSONObj groupSpec() = 0;
  794
+
  795
+			virtual bool shouldSplitMongod() = 0;
  796
+
  797
+			virtual bool shouldSplitRouter() = 0;
  798
+		};
  799
+
  800
+		class FieldPathEqualsShardKey : public CheckSplittingBase {
  801
+		protected:
  802
+			virtual BSONObj shardKey() {
  803
+				return fromjson("{'x.y': 1}");
  804
+			}
  805
+
  806
+			virtual BSONObj groupSpec() {
  807
+				return fromjson( "{ _id: '$x.y', count:{'$sum':1}}");
  808
+			}
  809
+
  810
+			virtual bool shouldSplitMongod() {
  811
+				return true;
  812
+			}
  813
+
  814
+			virtual bool shouldSplitRouter() {
  815
+				return false;
  816
+			}
  817
+ 		};
  818
+
  819
+		class FieldPathNotEqualsShardKey : public CheckSplittingBase {
  820
+		protected:
  821
+			virtual BSONObj shardKey() {
  822
+				return fromjson("{'x.y': 1}");
  823
+			}
  824
+
  825
+			virtual BSONObj groupSpec() {
  826
+				return fromjson( "{ _id: '$x.z', count:{'$sum':1}}");
  827
+			}
  828
+
  829
+			virtual bool shouldSplitMongod() {
  830
+				return true;
  831
+			}
  832
+
  833
+			virtual bool shouldSplitRouter() {
  834
+				return true;
  835
+			}
  836
+		};
  837
+
  838
+		class FieldPathShardKeyMultiple : public CheckSplittingBase {
  839
+		protected:
  840
+			virtual BSONObj shardKey() {
  841
+				return fromjson("{'x.y': 1, 'x.z': 1}");
  842
+			}
  843
+
  844
+			virtual BSONObj groupSpec() {
  845
+				return fromjson( "{ _id: '$x.z', count:{'$sum':1}}");
  846
+			}
  847
+
  848
+			virtual bool shouldSplitMongod() {
  849
+				return true;
  850
+			}
  851
+
  852
+			virtual bool shouldSplitRouter() {
  853
+				return true;
  854
+			}
  855
+		};
  856
+
  857
+		class NullGroupId: public CheckSplittingBase {
  858
+		protected:
  859
+			virtual BSONObj shardKey() {
  860
+				return fromjson("{'x.y': 1}");
  861
+			}
  862
+
  863
+			virtual BSONObj groupSpec() {
  864
+				return fromjson( "{ _id: null, count:{'$sum':1}}");
  865
+			}
  866
+
  867
+			virtual bool shouldSplitMongod() {
  868
+				return true;
  869
+			}
  870
+
  871
+			virtual bool shouldSplitRouter() {
  872
+				return true;
  873
+			}
  874
+		};
  875
+
  876
+		class ObjectGroupIdEqualsShardKey : public CheckSplittingBase {
  877
+		protected:
  878
+			virtual BSONObj shardKey() {
  879
+				return fromjson("{'x': 1, 'y': 1}");
  880
+			}
  881
+
  882
+			virtual BSONObj groupSpec() {
  883
+				return fromjson( "{ _id: {'key1':'$x', 'key2': '$y'}, count:{'$sum':1}}");
  884
+			}
  885
+
  886
+			virtual bool shouldSplitMongod() {
  887
+				return true;
  888
+			}
  889
+
  890
+			virtual bool shouldSplitRouter() {
  891
+				return false;
  892
+			}
  893
+		};
  894
+
  895
+		class ObjectGroupIdNotEqualsShardKey : public CheckSplittingBase {
  896
+		protected:
  897
+			virtual BSONObj shardKey() {
  898
+				return fromjson("{'z': 1, 'y': 1}");
  899
+			}
  900
+
  901
+			virtual BSONObj groupSpec() {
  902
+				return fromjson( "{ _id: {'key1':'$x', 'key2': '$y'}, count:{'$sum':1}}");
  903
+			}
  904
+
  905
+			virtual bool shouldSplitMongod() {
  906
+				return true;
  907
+			}
  908
+
  909
+			virtual bool shouldSplitRouter() {
  910
+				return true;
  911
+			}
  912
+		};
  913
+
  914
+
  915
+        /** Simulate merging sharded results in the router. */
771 916
         class RouterMerger : public CheckResultsBase {
772 917
         public:
773 918
             void run() {
@@ -1734,6 +1879,12 @@ namespace DocumentSourceTests {
1734 1879
             add<DocumentSourceGroup::GroupNullUndefinedIds>();
1735 1880
             //add<DocumentSourceGroup::ComplexId>(); uncomment after 6195
1736 1881
             add<DocumentSourceGroup::UndefinedAccumulatorValue>();
  1882
+            add<DocumentSourceGroup::FieldPathEqualsShardKey>();
  1883
+            add<DocumentSourceGroup::FieldPathNotEqualsShardKey>();
  1884
+            add<DocumentSourceGroup::FieldPathShardKeyMultiple>();
  1885
+            add<DocumentSourceGroup::NullGroupId>();
  1886
+            add<DocumentSourceGroup::ObjectGroupIdEqualsShardKey>();
  1887
+            add<DocumentSourceGroup::ObjectGroupIdNotEqualsShardKey>();
1737 1888
             add<DocumentSourceGroup::RouterMerger>();
1738 1889
             add<DocumentSourceGroup::Dependencies>();
1739 1890
             add<DocumentSourceGroup::StringConstantIdAndAccumulatorExpressions>();
20  src/mongo/s/commands_public.cpp
@@ -1564,8 +1564,21 @@ namespace mongo {
1564 1564
                                   BSONObjBuilder &result, bool fromRepl) {
1565 1565
             //const string shardedOutputCollection = getTmpName( collection );
1566 1566
 
1567  
-            intrusive_ptr<ExpressionContext> pExpCtx(
1568  
-                ExpressionContext::create(&InterruptStatusMongos::status));
  1567
+        	string fullns(parseNs(dbName, cmdObj));
  1568
+        	intrusive_ptr<ExpressionContext> pExpCtx;
  1569
+
  1570
+        	DBConfigPtr conf(grid.getDBConfig(dbName, false));
  1571
+        	ChunkManagerPtr chunkManager(conf->getChunkManagerIfExists(fullns, false, false));
  1572
+        	BSONObj shardKey;
  1573
+
  1574
+        	if(conf->isSharded(fullns) && chunkManager) {
  1575
+        		shardKey = chunkManager->getShardKey().key();
  1576
+        		pExpCtx = ExpressionContext::create(&shardKey, &InterruptStatusMongos::status);
  1577
+        	}
  1578
+        	else {
  1579
+        		pExpCtx = ExpressionContext::create((BSONObj*) NULL, &InterruptStatusMongos::status);
  1580
+        	}
  1581
+
1569 1582
             pExpCtx->setInRouter(true);
1570 1583
 
1571 1584
             /* parse the pipeline specification */
@@ -1574,13 +1587,10 @@ namespace mongo {
1574 1587
             if (!pPipeline.get())
1575 1588
                 return false; // there was some parsing error
1576 1589
 
1577  
-            string fullns(dbName + "." + pPipeline->getCollectionName());
1578  
-
1579 1590
             /*
1580 1591
               If the system isn't running sharded, or the target collection
1581 1592
               isn't sharded, pass this on to a mongod.
1582 1593
             */
1583  
-            DBConfigPtr conf(grid.getDBConfig(dbName , false));
1584 1594
             if (!conf || !conf->isShardingEnabled() || !conf->isSharded(fullns))
1585 1595
                 return passthrough(conf, cmdObj, result);
1586 1596
 
Commit_comment_tip

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.