# Mongo

# Lab: MongoDB

In [1]:
import pymongo

import json

import pprint

## Connect to the mongo database; mongodb is the protocol; mongo is the hostname, which for us is the container name; 27017 is the TCP port number

In [2]:
mongo = pymongo.MongoClient("mongodb://mongo:27017/")

## In case the database acme_gourmet_meals already exists from prior runs, we will delete it; if it does not exist, it will not hurt anything

In [3]:
mongo.drop_database("acme_gourmet_meals")

## List the mongo databases

In [4]:
db_list = mongo.list_database_names()
db_list

['admin', 'config', 'local']

## Create a new database called acme_gourmet_meals; the new database will not actually get created until we add documents to it

In [5]:
db = mongo["acme_gourmet_meals"]

In [6]:
db_list = mongo.list_database_names()
db_list

['admin', 'config', 'local']

## Create a collection called stores;  A collection is a collection of documents, similar to a list of JSON objects; like the database the new collection will not actually get created until we add documents to it

In [7]:
stores_collection = db["stores"]

In [8]:
db_list = mongo.list_database_names()
db_list

['admin', 'config', 'local']

In [9]:
collection_list = db.list_collection_names()
collection_list

[]

## Read in the JSON file temp_stores_nested.json which we have previously created back in Data Wrangling; insert the first JSON store object as a document in the stores collection in the acme_gourmet_meals database; mongo assignes a unique inserted_id to each document added 

In [10]:
f = open("temp_stores_nested.json")
file_json = json.load(f)
f.close()

stores_json_list = file_json["stores"]

In [11]:
first_store_json = stores_json_list[0]

store_document = stores_collection.insert_one(first_store_json)

In [12]:
print(store_document.inserted_id)

67cbd961da592b0c7bacbf53


## Now that we have actually inserted a document, the database and the collection we previously specified has now been created

In [13]:
db_list = mongo.list_database_names()
db_list

['acme_gourmet_meals', 'admin', 'config', 'local']

In [14]:
collection_list = db.list_collection_names()
collection_list

['stores']

## Query the document we just inserted; find_one() will find the first document in the collection; from Python, we always insert a dictionary, and find_one() returns a dictionary

In [15]:
store_document = stores_collection.find_one()

In [16]:
print(type(store_document))

<class 'dict'>


In [17]:
print(store_document)

{'_id': ObjectId('67cbd961da592b0c7bacbf53'), 'store_id': 1, 'street': '3000 Telegraph Ave', 'city': 'Berkeley', 'state': 'CA', 'zip': '94705', 'latitude': 37.8555, 'longitude': -122.2604, 'sales': [{'sale_id': 128112, 'sale_date': '2020-04-30', 'total_amount': 24, 'customer': {'customer_id': 3491, 'first_name': 'Siouxie', 'last_name': "M'Quharge", 'street': '747 Westridge Center', 'city': 'Alameda', 'state': 'CA', 'zip': '94501', 'closest_store_id': 1, 'distance': 6}, 'line_items': [{'line_item_id': 1, 'quantity': 1, 'product': {'product_id': 1, 'description': 'Pistachio Salmon'}}, {'line_item_id': 2, 'quantity': 1, 'product': {'product_id': 8, 'description': 'Brocolli Stir Fry'}}]}, {'sale_id': 144249, 'sale_date': '2020-05-16', 'total_amount': 84, 'customer': {'customer_id': 1597, 'first_name': 'Norry', 'last_name': 'Macauley', 'street': '654 Sommers Plaza', 'city': 'Oakland', 'state': 'CA', 'zip': '94612', 'closest_store_id': 1, 'distance': 3}, 'line_items': [{'line_item_id': 1, 'q

In [18]:
pprint.pprint(store_document)

{'_id': ObjectId('67cbd961da592b0c7bacbf53'),
 'city': 'Berkeley',
 'latitude': 37.8555,
 'longitude': -122.2604,
 'sales': [{'customer': {'city': 'Alameda',
                         'closest_store_id': 1,
                         'customer_id': 3491,
                         'distance': 6,
                         'first_name': 'Siouxie',
                         'last_name': "M'Quharge",
                         'state': 'CA',
                         'street': '747 Westridge Center',
                         'zip': '94501'},
            'line_items': [{'line_item_id': 1,
                            'product': {'description': 'Pistachio Salmon',
                                        'product_id': 1},
                            'quantity': 1},
                           {'line_item_id': 2,
                            'product': {'description': 'Brocolli Stir Fry',
                                        'product_id': 8},
                            'quantity': 1}],
            'sal

## Delete the stores collection; recreate the stores collection; load all the stores JSON objects into the collection; print the inserted_ids; 

In [19]:
stores_collection.drop()

In [20]:
stores_collection = db["stores"]

In [21]:
inserted_documents = stores_collection.insert_many(stores_json_list)

In [22]:
print(inserted_documents.inserted_ids)

[ObjectId('67cbd961da592b0c7bacbf53'), ObjectId('67cbdb92da592b0c7bacbf54'), ObjectId('67cbdb92da592b0c7bacbf55'), ObjectId('67cbdb92da592b0c7bacbf56'), ObjectId('67cbdb92da592b0c7bacbf57')]


## Query all the documents in the stores collection, loop through them, and print some info about each document 

In [23]:
for store_document in stores_collection.find():
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 1 city: Berkeley
store_id: 2 city: Seattle
store_id: 3 city: Dallas
store_id: 4 city: Miami
store_id: 5 city: Nashville


## Query using a filter object in the find() method

In [24]:
filter = { "city": "Berkeley" }
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 1 city: Berkeley


In [25]:
filter = { "city": "Nashville" }
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 5 city: Nashville


In [26]:
filter = { "city": { "$gt": "M"} } #Greater than or equal to M
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 2 city: Seattle
store_id: 4 city: Miami
store_id: 5 city: Nashville


## Query on a nested field

In [27]:
filter = { "sales.total_amount": 60 }
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 1 city: Berkeley
store_id: 4 city: Miami


In [28]:
filter = { "sales.line_items.quantity": 4 }
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 1 city: Berkeley
store_id: 2 city: Seattle
store_id: 3 city: Dallas
store_id: 4 city: Miami


## Sort the results of a query in ascending order

In [29]:
filter = { "city": { "$gt": "M"} }
    
for store_document in stores_collection.find(filter).sort("city"):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 4 city: Miami
store_id: 5 city: Nashville
store_id: 2 city: Seattle


## Sort the results of a query in descending order

In [30]:
filter = { "city": { "$gt": "M"} }
    
for store_document in stores_collection.find(filter).sort("city", -1):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 2 city: Seattle
store_id: 5 city: Nashville
store_id: 4 city: Miami


## Delete one document matching a filter

In [31]:
filter = { "city": "Berkeley" }
    
stores_collection.delete_one(filter)


<pymongo.results.DeleteResult at 0x7f7691eaf340>

In [32]:
for store_document in stores_collection.find():
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 2 city: Seattle
store_id: 3 city: Dallas
store_id: 4 city: Miami
store_id: 5 city: Nashville


## Delete many documents matching a filter

In [33]:
filter = { "city": { "$gt": "N"} }
    
stores_collection.delete_many(filter)

<pymongo.results.DeleteResult at 0x7f7691ec8200>

In [34]:
for store_document in stores_collection.find():
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 3 city: Dallas
store_id: 4 city: Miami


## Delete all documents in a collection

In [35]:
filter = { }
    
stores_collection.delete_many(filter)

<pymongo.results.DeleteResult at 0x7f7693d56ac0>

In [36]:
for store_document in stores_collection.find():
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

## You try it

## Create a collection called sales; load the file temp_sales_nested.json into the collection; run some queries on the collection

## Create a collection called customers; load the file temp_customers_nested.json into the collection; run some queries on the collection

In [38]:
sales_collection = db["sales"]

In [42]:
collection_list = db.list_collection_names()
collection_list

['stores']

In [45]:
f2 = open("temp_sales_nested.json")
file_json2 = json.load(f2)
f2.close()

sales_json_list = file_json2["sales"]

In [47]:
first_sales_json = sales_json_list[0]

sale_document = sales_collection.insert_one(first_sales_json)

In [48]:
print(sale_document.inserted_id)

67cbdf20da592b0c7bacbf58


In [54]:
sale_document = sales_collection.find_one()

In [57]:
pprint.pprint(sale_document)

{'_id': ObjectId('67cbdf20da592b0c7bacbf58'),
 'customer': {'area': 8.3977,
              'city': 'Alameda',
              'closest_store_id': 1,
              'customer_id': 3491,
              'density': 7602.4,
              'distance': 6,
              'first_last_name': "Siouxie M'Quharge",
              'first_name': 'Siouxie',
              'last_first_name': "M'Quharge, Siouxie",
              'last_name': "M'Quharge",
              'population': 63843,
              'state': 'CA',
              'street': '747 Westridge Center',
              'zip': '94501'},
 'line_items': [{'description': 'Pistachio Salmon',
                 'line_total': 12,
                 'price': 12,
                 'product_id': 1,
                 'quantity': 1},
                {'description': 'Brocolli Stir Fry',
                 'line_total': 12,
                 'price': 12,
                 'product_id': 8,
                 'quantity': 1}],
 'receipt_number': '001-000128112',
 'sale_date': '2020-

In [75]:
for sale_document in sales_collection.find():
    print("receipt_number:", sale_document["receipt_number"], "sale_date:", sale_document["sale_date"])

receipt_number: 001-000128112 sale_date: 2020-04-30


In [66]:
customers_collection = db["customers"]

In [67]:
collection_list = db.list_collection_names()
collection_list

['stores', 'sales']

In [68]:
f3 = open("temp_customers_nested.json")
file_json3 = json.load(f3)
f3.close()

customers_json_list = file_json3["customers"]

In [69]:
first_customer_json = customers_json_list[0]

customer_document = customers_collection.insert_one(first_customer_json)

In [72]:
customer_document = customers_collection.find_one()

In [73]:
pprint.pprint(customer_document)

{'_id': ObjectId('67cbe0b1da592b0c7bacbf59'),
 'area': 1.2177,
 'city': 'Berkeley',
 'closest_store_id': 1,
 'customer_id': 563,
 'density': 23972.16,
 'distance': 1,
 'first_last_name': 'Rose Slimings',
 'first_name': 'Rose',
 'last_first_name': 'Slimings, Rose',
 'last_name': 'Slimings',
 'population': 29190,
 'sales': [{'line_items': [{'description': 'Teriyaki Chicken',
                            'line_total': 12,
                            'price': 12,
                            'product_id': 2,
                            'quantity': 1},
                           {'description': 'Eggplant Lasagna',
                            'line_total': 24,
                            'price': 12,
                            'product_id': 4,
                            'quantity': 2}],
            'receipt_number': '001-000255285',
            'sale_date': '2020-08-29',
            'store': {'city': 'Berkeley',
                      'latitude': 37.8555,
                      'longitude': -1

In [77]:
for customer_document in customers_collection.find():
    print("customer_id:", customer_document["customer_id"], "city:", customer_document["city"])

customer_id: 563 city: Berkeley
