## Import the pymongo package and connect to the cluster

In [None]:
import pymongo
from pymongo import MongoClient
client = MongoClient('localhost', 27017)

## Get a reference to a database with either of these syntax options

In [None]:
db = client['productdb']
print(db)

In [None]:
db = client.productdb
print(db)

## Examine the reference to the collection

In [None]:
print(db.productdb)

## Show the collections in the database

In [None]:
db.list_collection_names()

## Let's get rid of the collection and see how it is dynamically created when we first use it

In [None]:
db.products.drop()

## There are functions to insert one or many records

In [None]:
db.products.insert({"manufacturer":"KitchenAid", 'model':1})

list(db.products.find({"manufacturer":"KitchenAid"}))

In [None]:
db.products.insert_one({"manufacturer":"KitchenAid", 'model':2})
db.products.insert_many([{"manufacturer":"KitchenAid", 'model':3}, 
                         {"manufacturer":"KitchenAid", 'model':4}])

list(db.products.find({"manufacturer":"KitchenAid"}))

## Don't use deprecated functions, favor the newer ones

## The find_one function will find and return a Python dictionary of the document

In [None]:
db.products.find_one()

## All the same search parameters apply here as they do for the shell

In [None]:
db.products.find_one({"manufacturer":"KitchenAid"})

## The find function returns a cursor object, which you can materialize with by converting to a list, or iterate through with a loop

In [None]:
db.products.find()


In [None]:
list(db.products.find())

In [None]:
for product in db.products.find():
    print(product)

## You need to quote the special operators like $gt

In [None]:
list(db.products.find({"model": {"$gt" : 2}}))

## There are several methods for counting documents

In [None]:
# db.products.count() #deprecated
print(db.products.estimated_document_count())
print(db.products.count_documents({'manufacturer':'KitchenAid', "model": {"$gt":2}}))



## Delete has a function for one or many similiar to insert

In [None]:
db.products.delete_one({"manufacturer":"KitchenAid"})
list(db.products.find({"manufacturer":"KitchenAid"}))

In [None]:
db.products.delete_many({"manufacturer":"KitchenAid"})
list(db.products.find({"manufacturer":"KitchenAid"}))

## And of course updating

In [None]:
# put some data in to update
db.products.insert_many([{"manufacturer":"KitchenAid", 'model':5}, 
                         {"manufacturer":"KitchenAid", 'model':6}])

db.products.update_one({"manufacturer":"KitchenAid"}, {"$set": {"price":100}})
print(list(db.products.find({"manufacturer":"KitchenAid"})))

db.products.update_many({"manufacturer":"KitchenAid"}, {"$set": {"price":200}})
print(list(db.products.find({"manufacturer":"KitchenAid"})))

## Upsert is more Pythonic as a parameter instead of another dictionary

In [None]:
db.products.update_one({"manufacturer":"KitchenAid", "model":7}, {"$set": {"price":100}}
                       , upsert = False)
print(list(db.products.find({"manufacturer":"KitchenAid"})))

print('-' * 20)
db.products.update_one({"manufacturer":"KitchenAid", "model":8}, {"$set": {"price":100}}
                       , upsert = True)
print(list(db.products.find({"manufacturer":"KitchenAid"})))




## replace_one will use the search condition to find the document and replace it with new values, but preserving the _id

In [None]:
# fix model 8 in case it is wrong
db.products.update_one({"manufacturer":"KitchenAid", "model":8}, {"$set": {"price":100}}
                       , upsert = True)
print(list(db.products.find({"manufacturer":"KitchenAid"})))
print('----')


db.products.replace_one({"manufacturer":"KitchenAid", "model":8}
                        , {"manufacturer":"KitchenAid", "model":9})
print(list(db.products.find({"manufacturer":"KitchenAid"})))


# Note how the price disappeared. It's usually safer to use update methods than replace


## The find_and functions will return the original object in addition to modifying it



In [None]:
p1 = db.products.find_one_and_delete({"manufacturer":"KitchenAid", "model":9})
print(p1)
print('----')
print(list(db.products.find({"manufacturer":"KitchenAid"})))



In [None]:
p1 = db.products.find_one_and_update({"manufacturer":"KitchenAid", "model":5}, 
                                     {"$set":{"price":500}})
print(p1)
print('----')
print(list(db.products.find({"manufacturer":"KitchenAid"})))



In [None]:
p1 = db.products.find_one_and_replace({"manufacturer":"KitchenAid", "model":5}
                                      , {"manufacturer":"KitchenAid", "model":8})
print(p1)
print('----')
print(list(db.products.find({"manufacturer":"KitchenAid"})))


## Indexes can be used to speed up searches

In [None]:
list(db.products.list_indexes())

In [None]:
db.products.create_index("manufacturer")
print(db.products.find_one({"manufacturer":"KitchenAid"}))
#db.products.drop_index("manufacturer_1")

In [None]:
db.products.create_index([("manufacturer", 1), ("model", 1)])
print(db.products.find_one({"manufacturer":"KitchenAid"}))
#db.products.drop_index("manufacturer_1_model_1")

In [None]:
db.products.create_index([("manufacturer", 1), ("model", 1)])
print(db.products.find_one({"manufacturer":"KitchenAid"}))
print(db.products.drop_index("manufacturer_1_model_1"))

In [None]:
db.products.create_index([("manufacturer", 1), ("model", -1)])
print(db.products.find_one({"manufacturer":"KitchenAid"}))
db.products.drop_index("manufacturer_1_model_-1")

In [None]:
list(db.products.find({"manufacturer":"KitchenAid"}))

##  Northwind exercises

In [None]:
db = client.Northwind
print(db)

In [None]:
db.list_collection_names()

In [None]:
list(db.regions.find())

In [None]:
db.list_collection_names()
db.products.find_one()

In [None]:

list(db.products.aggregate([
  { "$match" : { "ProductID" : 1 } }
]))

## Aggregation pipelines are way to aggregate data, but also filter and sort it and more. You build a series of commands you want it to run and then submit that list or pipeline to the aggregate method.

In [None]:
# The following is a template of what a pipeline would look like
pipeline = [
  { "$match" : { … },
  { "$group" : { … },
  { "$sort" : { … },
  { "$unwind" : { … },
  { "$lookup" : { … },
  ...
]
db.collectionName.aggregate(pipeline, options)

## The pipeline must be a list of dictionaries that have the operations you want to perform. Here is a simple filter using the match operator.

In [None]:
pipeline = [{"$match": { "ProductName":"Chang"}}]
list(db.products.aggregate(pipeline))

## This example combines a filter and a sort together

In [None]:
pipeline = [{"$match": { "CategoryID":1}}
           ,{"$sort": {"ProductID": 1}}]
list(db.products.aggregate(pipeline))

## Adding a project operator allows you to include/exclude and rename fields. Note the use of the $ in front of the field name.

In [None]:
pipeline = [{"$match": { "CategoryID":1}}
           ,{"$sort": {"ProductID": 1}}
           ,{"$project": {"_id":0, "ProductID":1, "Name": "$ProductName"}}]
list(db.products.aggregate(pipeline))

## Add a limit operator to return a fixed number of documents. 

In [None]:
pipeline = [{"$match": { "CategoryID":1}}
           ,{"$sort": {"UnitPrice": -1}}
           ,{"$project": {"_id":0, "ProductID":1, "ProductName":1, "UnitPrice":1}}
           ,{"$limit":5}]
list(db.products.aggregate(pipeline))

## The group operator is really like a distinct clause in SQL to find all the distinct values for the grouping columns. You must call the column you want to group on _id, but you can rename it later with a project.

In [None]:
pipeline = [{"$group": { "_id": "$CategoryID"}}]
list(db.products.aggregate(pipeline)) 


## To add aggregates to the mix, create the alias you want to the column to have and the operation to perform. In this example summing up a one for each document gives a count.

In [None]:
pipeline = [{"$group": { "_id": "$CategoryID", "product_count": {"$sum":1}}}]
list(db.products.aggregate(pipeline)) 


## Could use \$sum and \$avg and other aggregate operators.

In [None]:
pipeline = [{"$group": { "_id": "$CategoryID"
                        , "product_count": {"$sum":1}
                        , "product_avg": {"$avg":"$UnitPrice"}
                       }
            }]
list(db.products.aggregate(pipeline)) 


## the \$push operator can be used to create a list of nested children items to get a nested repeating effect similar to the Hive COLLECT_LIST function

In [None]:
pipeline = [{"$group": { "_id": "$CategoryID"
                        , "product_count": {"$sum": 1}
                        , "product_avg": {"$avg": "$UnitPrice"}
                        , "product_list": {"$push": "$ProductName" }
                       }
            }]
                        
list(db.products.aggregate(pipeline)) 



## You can also collect a list of multiple fields together to create a list of structures.

In [None]:
pipeline = [{"$group": { "_id": "$CategoryID"
                        ,"Products" : { "$push": {"ProductID": "$ProductID"
                                                  ,"ProductName": "$ProductName"
                                                  , "UnitPrice": "$UnitPrice"}}
                        }
             }
            ]
                        
list(db.products.aggregate(pipeline)) 





## Can use the \$project to rename the _id column

In [None]:
pipeline = [{"$group": { "_id": "$CategoryID"
                        ,"Products" : { "$push": {"ProductID": "$ProductID"
                                                  ,"ProductName": "$ProductName"
                                                  , "UnitPrice": "$UnitPrice"}}
                        }
             }
            , {"$project": {"CategoryID": "$_id", "Products":1, "_id":0}}
            ]
                        
list(db.products.aggregate(pipeline)) 


## Here we combine many elements together.

In [None]:
pipeline = [{"$group": { "_id": "$CategoryID"
                        ,"product_list" : { "$push": {"ProductID": "$ProductID"
                                                  ,"ProductName": "$ProductName"
                                                  , "UnitPrice": "$UnitPrice"}}
                        , "product_count": {"$sum": 1}
                        , "product_avg": {"$avg": "$UnitPrice"}
                        }
             }
            , {"$project": {"CategoryID": "$_id", "product_count": 1, "product_avg": 1, "product_list":1, "_id":0}}
            , {"$match": {"product_count": {"$gte":10}}}
            , {"$sort": {"product_count": -1, "product_avg":1}}
            ]
   
x = db.products.aggregate(pipeline)
list(x)


## Let's save the results of the nested outcome to a collection.

In [None]:
pipeline = [{"$group": { "_id": "$CategoryID"
                        ,"product_list" : { "$push": {"ProductID": "$ProductID"
                                                  ,"ProductName": "$ProductName"
                                                  , "UnitPrice": "$UnitPrice"}}
                        , "product_count": {"$sum": 1}
                        , "product_avg": {"$avg": "$UnitPrice"}
                        }
             }
            , {"$project": {"CategoryID": "$_id", "product_count": 1, "product_avg": 1, "product_list":1, "_id":0}}
            , {"$match": {"product_count": {"$gte":10}}}
            , {"$sort": {"product_count": -1, "product_avg":1}}
            ]

db.products_nested.delete_many({})
x = db.products.aggregate(pipeline)
db.products_nested.insert_many(x)


## Now if you have some nested data and want to flatten or unnest it back to a traditional denomalized format, use the \$unwind operator.

In [None]:
pipeline = [{"$unwind": "$product_list"}]
x = db.products_nested.aggregate(pipeline)
list(x)


## The results are a bit funny looking so use \$project to fix them up.

In [None]:
pipeline = [{"$unwind": "$product_list"}
           ,{"$project": {"CategoryID":1, "product_avg":1, "product_count":1
                          , "ProductID":"$product_list.ProductID"
                          , "ProductName":"$product_list.ProductName"
                          , "UnitPrice":"$product_list.UnitPrice"
                          , "_id":0}}]
x = db.products_nested.aggregate(pipeline)
list(x)


## Generally a MongoDB database should not rely on relational structure but instead each document should contain everything it needs, but if you need to you can use a \$lookup operator which is sort of like using a JOIN or correlated subquery in SQL.

In [None]:
pipeline = [{"$lookup": {"from": "categories"
                        , "localField": "CategoryID"
                        , "foreignField": "CategoryID"
                        , "as": "Category"}}
            ]
x = db.products_nested.aggregate(pipeline)
list(x)


## It inserts the whole of the looked up document into the output so use a \$project to fix it up.

In [None]:
pipeline = [{"$lookup": {"from": "categories"
                        , "localField": "CategoryID"
                        , "foreignField": "CategoryID"
                        , "as": "Category"}}
            ,{"$project": {"CategoryID":1, "CategoryName":"$Category.CategoryName", "product_list":1}}
            ]
x = db.products_nested.aggregate(pipeline)
list(x)
