### $match

In [1]:
mongo_url = 'mongodb://localhost:27017/'



In [2]:
from pymongo import MongoClient
mongo_url = 'mongodb://localhost:27017/'
client = MongoClient(mongo_url)
db = client.cloud_resources # database
inventory = db.inventory   # collection

In [5]:
match_stage = {"$match": {"provider_code": "aws"}}
group_by_stage = {"$group": {"_id": "all", "sum":{"$sum": 1}}}

In [6]:
import pprint
pprint.pprint(list(db.inventory.aggregate([match_stage, group_by_stage])))

[{'_id': 'all', 'sum': 800000}]


### Compound match 

In [11]:
match_stage = {"$match": {"provider_code": "aws", "status": "Active"}}  # and condition in match 

In [12]:
group_by_stage = {"$group": {"_id": "all", "sum":{"$sum": 1}}}
pprint.pprint(list(db.inventory.aggregate([match_stage, group_by_stage])))

[{'_id': 'all', 'sum': 800000}]


In [13]:
# another example 

In [14]:
match_stage = {"$match": {"provider_code": "aws", "manage": {"$in": ["ibm_managed"]}}}  # and condition in match 

In [15]:
group_by_stage = {"$group": {"_id": "all", "sum":{"$sum": 1}}}
pprint.pprint(list(db.inventory.aggregate([match_stage, group_by_stage])))

[{'_id': 'all', 'sum': 199776}]


```
$match is a pipeline operator , 
hence in the pipeline you can use match at any number of time 
```

### Control the order of document passed in pipeline

In [16]:
sort_stage = {"$sort": {"provider_resource_name": 1
                       }}
match_stage = {"$match": {"provider_account": "5432"}}

limit_stage = {"$limit": 2}

skip_stage = {"$skip": 1}  # skip 1 record in the pipeline

In [17]:
import pprint
pprint.pprint(list(db.inventory.aggregate([match_stage, sort_stage, limit_stage, skip_stage], allowDiskUse=True)))

[{'_id': ObjectId('5d021544f2e62711cbbd9dbe'),
  'context': {'application': ['cam'], 'environment': ['qa']},
  'location': {'availability_zone_code': ['ap-southeast-2a',
                                          'ap-southeast-2b',
                                          'ap-southeast-2c'],
               'region_code': 'ap-southeast-2'},
  'manage': 'ibm_managed',
  'provider_account': '5432',
  'provider_account_name': 'test_account_5',
  'provider_code': 'aws',
  'provider_resource_id': {'name': '0EILXU8H7E8U2IKC8BY8NSWKM0HX7CZD2YUABI85HJ3HNYYITDNXIZ3PMLRUHLFQ7',
                           'value': 'I8EN9HWFEL6HCB1XFWDUL5817MI4TNBYBXM99NFJ1RZZY'},
  'provider_resource_name': 'Batch EC2 - 0005d01a-aae0-4c17-8360-0be2a7d96844',
  'service_category_type': 'compute',
  'status': 'Active',
  'type': 'service_instance'}]


In [6]:
### sort performance is depended on indexing 

```if sort is used after $group , $sort , $project , the indexing wont be used, because $group, $project generate the new document and these document dont have indexes , Hence it is recommended to use sort before these pipeline operator .```

``` place $match before $sort ```

```$skip is used when you want to remove outlinears from your calculation.
another use in pagination ```