# This tutorial is intended as an introduction to working with MongoDB and PyMongo.
### https://pymongo.readthedocs.io/en/stable/tutorial.html

### Install MongoDB Community Edition Ubuntu
#### https://www.mongodb.com/docs/manual/tutorial/install-mongodb-on-ubuntu/#std-label-install-mdb-community-ubuntu
sudo apt-get install gnupg curl

curl -fsSL https://www.mongodb.org/static/pgp/server-7.0.asc | \
   sudo gpg -o /usr/share/keyrings/mongodb-server-7.0.gpg \
   --dearmor

echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-7.0.gpg ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/7.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-7.0.list

sudo apt-get update
sudo apt-get install -y mongodb-org

ps --no-headers -o comm 1

sudo apt-get install -y mongodb-atlas


sudo systemctl enable mongod

sudo systemctl daemon-reload

sudo systemctl start mongod

sudo systemctl status mongod

sudo systemctl stop mongod

sudo systemctl restart mongod

sudo gedit /etc/mongod.conf

bind_ip = 0.0.0.0

In [1]:
import pymongo

In [21]:
%matplotlib inline
import numpy as np
#import pandas as pd
import matplotlib.pyplot as plt
#from pymongo import mongo_
from pymongo import MongoClient
#from pandas.io.json import json_normalize
import json
import warnings
warnings.filterwarnings('ignore')

In [22]:
host = '192.168.48.131'

In [23]:
from pymongo import MongoClient
client = MongoClient()

In [24]:
client = MongoClient(host, 27017)

In [25]:
db = client.test_database

In [26]:
client.list_database_names()

['admin', 'config', 'local']

In [27]:
db = client["test-database"]

In [28]:
collection = db.test_collection

In [29]:
import datetime

In [30]:
post = {
    "author": "Mike",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.datetime.now(tz=datetime.timezone.utc),
}

In [31]:
posts = db.posts

In [32]:
post_id = posts.insert_one(post).inserted_id

In [34]:
post_id

ObjectId('66d50598c550f01a2a4b93a2')

In [35]:
db.list_collection_names()

['posts']

In [37]:
import pprint
pprint.pprint(posts.find_one())

{'_id': ObjectId('66d50598c550f01a2a4b93a2'),
 'author': 'Mike',
 'date': datetime.datetime(2024, 9, 2, 0, 23, 50, 711000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [38]:
pprint.pprint(posts.find_one({"author": "Mike"}))

{'_id': ObjectId('66d50598c550f01a2a4b93a2'),
 'author': 'Mike',
 'date': datetime.datetime(2024, 9, 2, 0, 23, 50, 711000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [39]:
posts.find_one({"author": "Eliot"})

In [40]:
post_id

ObjectId('66d50598c550f01a2a4b93a2')

In [41]:
pprint.pprint(posts.find_one({"_id": post_id}))

{'_id': ObjectId('66d50598c550f01a2a4b93a2'),
 'author': 'Mike',
 'date': datetime.datetime(2024, 9, 2, 0, 23, 50, 711000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [42]:
post_id_as_str = str(post_id)
posts.find_one({"_id": post_id_as_str})  # No result

In [43]:
from bson.objectid import ObjectId

# The web framework gets post_id from the URL and passes it as a string
def get(post_id):
    # Convert from string to ObjectId:
    document = client.db.collection.find_one({'_id': ObjectId(post_id)})

In [44]:
new_posts = [
    {
        "author": "Mike",
        "text": "Another post!",
        "tags": ["bulk", "insert"],
        "date": datetime.datetime(2009, 11, 12, 11, 14),
    },
    {
        "author": "Eliot",
        "title": "MongoDB is fun",
        "text": "and pretty easy too!",
        "date": datetime.datetime(2009, 11, 10, 10, 45),
    },
]

In [45]:
result = posts.insert_many(new_posts)

In [46]:
result.inserted_ids

[ObjectId('66d506eec550f01a2a4b93a3'), ObjectId('66d506eec550f01a2a4b93a4')]

In [47]:
for post in posts.find():
    pprint.pprint(post)

{'_id': ObjectId('66d50598c550f01a2a4b93a2'),
 'author': 'Mike',
 'date': datetime.datetime(2024, 9, 2, 0, 23, 50, 711000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('66d506eec550f01a2a4b93a3'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}
{'_id': ObjectId('66d506eec550f01a2a4b93a4'),
 'author': 'Eliot',
 'date': datetime.datetime(2009, 11, 10, 10, 45),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}


In [48]:
for post in posts.find({"author": "Mike"}):
    pprint.pprint(post)

{'_id': ObjectId('66d50598c550f01a2a4b93a2'),
 'author': 'Mike',
 'date': datetime.datetime(2024, 9, 2, 0, 23, 50, 711000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('66d506eec550f01a2a4b93a3'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}


In [49]:
posts.count_documents({})

3

In [52]:
d = datetime.datetime(2009, 11, 12, 12)
for post in posts.find({"date": {"$lt": d}}).sort("author"):
    pprint.pprint(post)

{'_id': ObjectId('66d506eec550f01a2a4b93a4'),
 'author': 'Eliot',
 'date': datetime.datetime(2009, 11, 10, 10, 45),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}
{'_id': ObjectId('66d506eec550f01a2a4b93a3'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}


In [53]:
result = db.profiles.create_index([("user_id", pymongo.ASCENDING)], unique=True)
sorted(list(db.profiles.index_information()))

['_id_', 'user_id_1']

In [54]:
user_profiles = [{"user_id": 211, "name": "Luke"}, {"user_id": 212, "name": "Ziltoid"}]
result = db.profiles.insert_many(user_profiles)

In [None]:
new_profile = {"user_id": 213, "name": "Drew"}
duplicate_profile = {"user_id": 212, "name": "Tommy"}
result = db.profiles.insert_one(new_profile)  # This is fine.
result = db.profiles.insert_one(duplicate_profile)

## Aggregation Examples
### https://pymongo.readthedocs.io/en/stable/examples/aggregation.html

In [None]:
client = MongoClient(host, 27017)

In [57]:
from pymongo import MongoClient
db = MongoClient(host, 27017).aggregation_example
result = db.things.insert_many(
    [
        {"x": 1, "tags": ["dog", "cat"]},
        {"x": 2, "tags": ["cat"]},
        {"x": 2, "tags": ["mouse", "cat", "dog"]},
        {"x": 3, "tags": []},
    ]
)
result.inserted_ids

[ObjectId('66d50bb0c550f01a2a4b93af'),
 ObjectId('66d50bb0c550f01a2a4b93b0'),
 ObjectId('66d50bb0c550f01a2a4b93b1'),
 ObjectId('66d50bb0c550f01a2a4b93b2')]

In [58]:
from bson.son import SON
pipeline = [
    {"$unwind": "$tags"},
    {"$group": {"_id": "$tags", "count": {"$sum": 1}}},
    {"$sort": SON([("count", -1), ("_id", -1)])},
]
import pprint
pprint.pprint(list(db.things.aggregate(pipeline)))

[{'_id': 'cat', 'count': 3},
 {'_id': 'dog', 'count': 2},
 {'_id': 'mouse', 'count': 1}]


In [60]:
from pymongoexplain import ExplainableCollection
ExplainableCollection(collection).aggregate(pipeline)

{'explainVersion': '1',
 'stages': [{'$cursor': {'queryPlanner': {'namespace': 'test-database.test_collection',
     'indexFilterSet': False,
     'parsedQuery': {},
     'maxIndexedOrSolutionsReached': False,
     'maxIndexedAndSolutionsReached': False,
     'maxScansToExplodeReached': False,
     'winningPlan': {'stage': 'EOF'},
     'rejectedPlans': []}}},
  {'$unwind': {'path': '$tags'}},
  {'$group': {'_id': '$tags', 'count': {'$sum': {'$const': 1}}}},
  {'$sort': {'sortKey': {'count': -1, '_id': -1}}}],
 'serverInfo': {'host': 'pharmapp',
  'port': 27017,
  'version': '7.0.14',
  'gitVersion': 'ce59cfc6a3c5e5c067dca0d30697edd68d4f5188'},
 'serverParameters': {'internalQueryFacetBufferSizeBytes': 104857600,
  'internalQueryFacetMaxOutputDocSizeBytes': 104857600,
  'internalLookupStageIntermediateDocumentMaxSizeBytes': 104857600,
  'internalDocumentSourceGroupMaxMemoryBytes': 104857600,
  'internalQueryMaxBlockingSortMemoryUsageBytes': 104857600,
  'internalQueryProhibitBlockingMer

In [61]:
db.command('aggregate', 'things', pipeline=pipeline, explain=True)

{'explainVersion': '1',
 'stages': [{'$cursor': {'queryPlanner': {'namespace': 'aggregation_example.things',
     'indexFilterSet': False,
     'parsedQuery': {},
     'queryHash': 'B55D7837',
     'planCacheKey': 'B55D7837',
     'maxIndexedOrSolutionsReached': False,
     'maxIndexedAndSolutionsReached': False,
     'maxScansToExplodeReached': False,
     'winningPlan': {'stage': 'PROJECTION_SIMPLE',
      'transformBy': {'tags': 1, '_id': 0},
      'inputStage': {'stage': 'COLLSCAN', 'direction': 'forward'}},
     'rejectedPlans': []}}},
  {'$unwind': {'path': '$tags'}},
  {'$group': {'_id': '$tags', 'count': {'$sum': {'$const': 1}}}},
  {'$sort': {'sortKey': {'count': -1, '_id': -1}}}],
 'serverInfo': {'host': 'pharmapp',
  'port': 27017,
  'version': '7.0.14',
  'gitVersion': 'ce59cfc6a3c5e5c067dca0d30697edd68d4f5188'},
 'serverParameters': {'internalQueryFacetBufferSizeBytes': 104857600,
  'internalQueryFacetMaxOutputDocSizeBytes': 104857600,
  'internalLookupStageIntermediateDoc

In [64]:
db.orders.aggregate( [

   # Stage 1: Filter pizza order documents by pizza size
   {
      $match: { size: "medium" }
   },

   # Stage 2: Group remaining documents by pizza name and calculate total quantity
   {
      $group: { _id: "$name", totalQuantity: { $sum: "$quantity" } }
   }

] )

SyntaxError: invalid syntax (489312058.py, line 5)

In [62]:
db.orders.insertMany( [
   { _id: 0, name: "Pepperoni", size: "small", price: 19,
     quantity: 10, date: ISODate( "2021-03-13T08:14:30Z" ) },
   { _id: 1, name: "Pepperoni", size: "medium", price: 20,
     quantity: 20, date : ISODate( "2021-03-13T09:13:24Z" ) },
   { _id: 2, name: "Pepperoni", size: "large", price: 21,
     quantity: 30, date : ISODate( "2021-03-17T09:22:12Z" ) },
   { _id: 3, name: "Cheese", size: "small", price: 12,
     quantity: 15, date : ISODate( "2021-03-13T11:21:39.736Z" ) },
   { _id: 4, name: "Cheese", size: "medium", price: 13,
     quantity:50, date : ISODate( "2022-01-12T21:23:13.331Z" ) },
   { _id: 5, name: "Cheese", size: "large", price: 14,
     quantity: 10, date : ISODate( "2022-01-12T05:08:13Z" ) },
   { _id: 6, name: "Vegan", size: "small", price: 17,
     quantity: 10, date : ISODate( "2021-01-13T05:08:13Z" ) },
   { _id: 7, name: "Vegan", size: "medium", price: 18,
     quantity: 10, date : ISODate( "2021-01-13T05:10:13Z" ) }
] )

NameError: name '_id' is not defined