# Basic PyMongo guide


*   CRUD part 2
*   Query operators
>-  Count
>-  Maximum and Minimum
>-  Inclusion, exclusion operators IN and NIN
>-  Relational operators "greater than or equal", "greater than", "equal", "little than", "little than or equal"
>-  Logical Query Operators
>-  Exists & not exists
*   List items that belong to a list or not
*   Indexes


## 1. Import PyMongo and set database

In [None]:
import datetime                           # Imports datetime library
import pymongo                            # Imports PyMongo library
from pymongo import MongoClient           # Imports MongoClient 

# uri (uniform resource identifier) defines the connection parameters 
# uri = 'mongodb:// USER : PASSWORD @ SERVER_NAME : PORT / DATABASENAME')
# uri = 'mongodb:// USER : PASSWORD @ SERVER_NAME : PORT / DATABASE_NAME, CLUSTER_1_NAME : PORT , CLUSTER_2_NAME : PORT')
# uri = 'localhost:27017'
uri = 'mongodb://u1kkdrchfjim80tclysv:FeesC2ACNmI7be61RTst@brny4kjelauboxl-mongodb.services.clever-cloud.com:27017/brny4kjelauboxl'
# start client to connect to MongoDB server 
client = MongoClient( uri )

In [97]:
client.list_database_names()                        # Checks the database name

['brny4kjelauboxl']

In [32]:
db = client.brny4kjelauboxl                         # Set the database to work on
if 'addressbook' in list(db.list_collection_names()):
  print ('Deleting "Addressbook" collection')
  db.drop_collection('addressbook')                 # We delete previous module data
else:
  print ('Not previous collection found')

Not previous collection found


In [101]:
db.list_collection_names()

['addressbook']

## 1.1 Download database and import dataset into mongodb

In [34]:
# Downloading JSON with Agenda
!wget https://raw.githubusercontent.com/Giffy/Personal_dataset_repository/master/contacts.json
  
# Uploading data to Mongo Database
!mongodb-linux-x86_64-debian71-3.0.15/bin/mongoimport --host brny4kjelauboxl-mongodb.services.clever-cloud.com \
                                                      --port 27017 \
                                                      --username='u1kkdrchfjim80tclysv' \
                                                      --password='FeesC2ACNmI7be61RTst' \
                                                      --db brny4kjelauboxl \
                                                      --collection addressbook \
                                                      --jsonArray /content/contacts.json

# Reference https://www.kenwalger.com/blog/nosql/mongodb/importing-data-mongoimport/

--2020-03-20 22:21:38--  https://raw.githubusercontent.com/Giffy/Personal_dataset_repository/master/contacts.json
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 549538 (537K) [text/plain]
Saving to: ‘contacts.json.1’


2020-03-20 22:21:39 (8.50 MB/s) - ‘contacts.json.1’ saved [549538/549538]

2020-03-20T22:21:43.254+0000	connected to: brny4kjelauboxl-mongodb.services.clever-cloud.com:27017
2020-03-20T22:21:44.193+0000	[########################] brny4kjelauboxl.addressbook	536.7 KB/536.7 KB (100.0%)
2020-03-20T22:21:44.895+0000	imported 1000 documents


# 2. Database overview

In [36]:
db = client.brny4kjelauboxl               # Set the database to work on
db.list_collection_names()                # List the collections available

['addressbook']

In [None]:
collection = db.addressbook               # Set the collection to work on

## 2.1 Dataset size and attributes

In [38]:
## Dataset content summary
num_documents = collection.count_documents({'_id' : {'$exists' : 1}})
attributes = list (collection.find().limit(1)[1])     ## WARNING use as reference , NoSQL db can have different attributes by document

print ('Number of documents : %d' % num_documents)
print ('Attributes names : %s' % attributes)

Number of documents : 1000
Attributes names : ['_id', 'index', 'name', 'isActive', 'registered', 'age', 'gender', 'eyeColor', 'favoriteFruit', 'company', 'tags']


### Warning  NoSQL database can have multiple attributes per document

In [39]:
## Dataset content summary
def content_attribute( attribute_name ):
  return collection.distinct( attribute_name )

for attribute_name in attributes:
    content =  content_attribute( attribute_name )
    sample = content
    if len(content) > 20:
      sample = content[:20]
    print ('Item name : ' + attribute_name + 
           '\n   Unique content : ' + str(len(content)) +
           '\n   Content : ' + str(sample) )

Item name : _id
   Unique content : 1000
   Content : [ObjectId('5e7541f88307b5e3d4a15b2e'), ObjectId('5e7541f88307b5e3d4a15b2f'), ObjectId('5e7541f88307b5e3d4a15b30'), ObjectId('5e7541f88307b5e3d4a15b31'), ObjectId('5e7541f88307b5e3d4a15b32'), ObjectId('5e7541f88307b5e3d4a15b33'), ObjectId('5e7541f88307b5e3d4a15b34'), ObjectId('5e7541f88307b5e3d4a15b35'), ObjectId('5e7541f88307b5e3d4a15b36'), ObjectId('5e7541f88307b5e3d4a15b37'), ObjectId('5e7541f88307b5e3d4a15b38'), ObjectId('5e7541f88307b5e3d4a15b39'), ObjectId('5e7541f88307b5e3d4a15b3a'), ObjectId('5e7541f88307b5e3d4a15b3b'), ObjectId('5e7541f88307b5e3d4a15b3c'), ObjectId('5e7541f88307b5e3d4a15b3d'), ObjectId('5e7541f88307b5e3d4a15b3e'), ObjectId('5e7541f88307b5e3d4a15b3f'), ObjectId('5e7541f88307b5e3d4a15b40'), ObjectId('5e7541f88307b5e3d4a15b41')]
Item name : index
   Unique content : 1000
   Content : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Item name : name
   Unique content : 1000
   Content : ['A

In [40]:
## We can list one document
list ( collection.find().limit(1) )

[{'_id': ObjectId('5e7541f88307b5e3d4a15b2e'),
  'age': 20,
  'company': {'email': 'aureliagonzales@yurture.com',
   'location': {'address': '694 Hewes Street', 'country': 'USA'},
   'phone': '+1 (940) 501-3963',
   'title': 'YURTURE'},
  'eyeColor': 'green',
  'favoriteFruit': 'banana',
  'gender': 'female',
  'index': 0,
  'isActive': False,
  'name': 'Aurelia Gonzales',
  'registered': datetime.datetime(2015, 2, 11, 4, 22, 39),
  'tags': ['enim', 'id', 'velit', 'ad', 'consequat']}]

# 3. Create Read Update Delete - Part 2

## 3.1 Adding and delete attributes

### Add new atribute to your collection

In [41]:
collection.update_many( {"age" :{ "$gte" :0 }}, {"$set" : { "favoriteColor" : "red" }})

<pymongo.results.UpdateResult at 0x7fec7951c788>

In [42]:
collection.update_many( {"age" :{ "$nin" : [""] }}, {"$set" : { "favoriteBook" : "Harry Potter" }})

<pymongo.results.UpdateResult at 0x7fec7951c5c8>

In [43]:
list(collection.find({"age" : 20} , {"favoriteColor","favoriteBook", "name","age"}).limit(1))

[{'_id': ObjectId('5e7541f88307b5e3d4a15b2e'),
  'age': 20,
  'favoriteBook': 'Harry Potter',
  'favoriteColor': 'red',
  'name': 'Aurelia Gonzales'}]

### Delete attribute and data

In [44]:
## Deprecated   collection.update( {"age" :{ "$gte" :0 }}, {"$unset" :{ "favoriteColor" :1 }}, {multi :1})
collection.update_many( {"age" :{ "$gte" :0 }}, {"$unset" :{ "favoriteColor" :1 , "favoriteBook" :1}})

<pymongo.results.UpdateResult at 0x7fec7952d188>

In [45]:
list(collection.find({"age" : 20} , {"favoriteColor","favoriteBook", "name","age"}).limit(1))

[{'_id': ObjectId('5e7541f88307b5e3d4a15b2e'),
  'age': 20,
  'name': 'Aurelia Gonzales'}]

## 3.2 Data visualization and Queries

### 3.2.1 Visualization of attribute names  -  first level

In [46]:
print (list (collection.find({"age" : 38, "gender" : 'female'}).limit(1)[1]))

['_id', 'index', 'name', 'isActive', 'registered', 'age', 'gender', 'eyeColor', 'favoriteFruit', 'company', 'tags']


### 3.2.2 Visualization of attribute unique content (sorted)

In [47]:
query = collection.distinct( "age" )
print ("Age :" + str( sorted( query ) ))

query = collection.distinct( "gender" ) 
print ("Gender :" + str( sorted( query ) ))

Age :[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
Gender :['female', 'male']


- Exercise:  Check the unique eye colors and favorite fruit

      hint: attributes =  eyeColor and favoriteFruit

In [None]:
## Answer here

### 3.2.3 Find document by id

In [51]:
# Get the id of existing document

documents = collection.find( {"_id": {"$exists": True}} , ['name','age']).limit(1)

itemId = ""
for item in documents:
  itemId = str( item['_id'] )

print (itemId) 

5e7541f88307b5e3d4a15b2e


In [None]:
# First import objectid object
from bson.objectid import ObjectId

In [53]:
list(collection.find({"_id": ObjectId( itemId )} , ['name','age','favoriteFruit','company.email']))   ##  reading second level attribute 'email'

[{'_id': ObjectId('5e7541f88307b5e3d4a15b2e'),
  'age': 20,
  'company': {'email': 'aureliagonzales@yurture.com'},
  'favoriteFruit': 'banana',
  'name': 'Aurelia Gonzales'}]

### 3.2.4 Filter by fields

In [54]:
# collection.find(  FILTER_CONDITION   ,  FIELDS_to_retreive )
# FIELDS      1 field:      string
#             more than 1 : a list [] 

filters = {"isActive": True}
fields = ['name','age', 'isActive','company.email']

list(collection.find( filters , fields ).limit(1))

[{'_id': ObjectId('5e7541f88307b5e3d4a15b31'),
  'age': 39,
  'company': {'email': 'karynrhodes@rodemco.com'},
  'isActive': True,
  'name': 'Karyn Rhodes'}]

In [55]:
print( collection.count_documents( filters ) )

516


In [56]:
# Multiple filters

filters = {"$or": [{"age" : 28}, {"age" : 29}] , "gender" : 'female'}

print( collection.count_documents ( filters ) ) # count in Mongo the found documents 
print( len (list (collection.find( filters ))) ) # count in Python the found documents

36
36


### 3.2.5 Find by regex ( name starts with ' Ki* ')

In [57]:
import re
regex = re.compile('^Ki', re.IGNORECASE)

# find documents which ssn starts with Ki

filters = { 'name' : regex }
fields = { '_id' : 0, 'name' : 1, 'isActive' : 1, 'age' : 1 }     #  Hide _id in reply  

list ( collection.find( filters , fields ) )

[{'age': 38, 'isActive': False, 'name': 'Kitty Snow'},
 {'age': 20, 'isActive': True, 'name': 'Kimberly House'},
 {'age': 34, 'isActive': False, 'name': 'Kirk Walsh'},
 {'age': 21, 'isActive': True, 'name': 'Kirby Buckley'},
 {'age': 22, 'isActive': True, 'name': 'Kinney Wynn'},
 {'age': 38, 'isActive': True, 'name': 'Kirkland Buckner'},
 {'age': 40, 'isActive': True, 'name': 'Kidd Arnold'},
 {'age': 22, 'isActive': False, 'name': 'Kimberley Chase'}]

###3.2.6 Sort query Ascending

In [58]:
# Ascending
list ( collection.find( filters , fields ).sort('age', pymongo.ASCENDING) )

[{'age': 20, 'isActive': True, 'name': 'Kimberly House'},
 {'age': 21, 'isActive': True, 'name': 'Kirby Buckley'},
 {'age': 22, 'isActive': True, 'name': 'Kinney Wynn'},
 {'age': 22, 'isActive': False, 'name': 'Kimberley Chase'},
 {'age': 34, 'isActive': False, 'name': 'Kirk Walsh'},
 {'age': 38, 'isActive': False, 'name': 'Kitty Snow'},
 {'age': 38, 'isActive': True, 'name': 'Kirkland Buckner'},
 {'age': 40, 'isActive': True, 'name': 'Kidd Arnold'}]

In [59]:
# Descending 
list ( collection.find( filters , fields ).sort('age', pymongo.DESCENDING) )

[{'age': 40, 'isActive': True, 'name': 'Kidd Arnold'},
 {'age': 38, 'isActive': False, 'name': 'Kitty Snow'},
 {'age': 38, 'isActive': True, 'name': 'Kirkland Buckner'},
 {'age': 34, 'isActive': False, 'name': 'Kirk Walsh'},
 {'age': 22, 'isActive': True, 'name': 'Kinney Wynn'},
 {'age': 22, 'isActive': False, 'name': 'Kimberley Chase'},
 {'age': 21, 'isActive': True, 'name': 'Kirby Buckley'},
 {'age': 20, 'isActive': True, 'name': 'Kimberly House'}]

# 4. Query operators

## 4.1 Count

In [60]:
# Count documents with "age" equal to 38
collection.count_documents({"age": 38})
# the previous method was : collection.find({"age": 38}).count()  (deprecated)

49

## 4.2 Maximum and Minimum

In [61]:
#Maximum
list( collection.find({},{"_id": 0, "age": 1}).sort('age', pymongo.DESCENDING).limit(1))  # gets maximum age from MongoDB

[{'age': 40}]

In [62]:
max( collection.distinct( "age" ))   # gets all ages range from MongoDB and python gets the maximum

40

In [63]:
#Minimum
list( collection.find({},{"_id": 0, "age": 1}).sort('age', pymongo.ASCENDING).limit(1))   # gets minimum age from MongoDB

[{'age': 20}]

In [64]:
min( collection.distinct( "age" ))   # gets all ages range from MongoDB and python gets the maximum

20

In [65]:
agemale   = collection.find({"gender" : 'male'}).distinct( "age" )
agefemale = collection.find({"gender" : 'female'}).distinct( "age")

print ('Male -  Min age: ' + str(min(agemale)) + ' and Max age: ' + str(max(agemale)))
print ('Female -  Min age: ' + str(min(agefemale)) + ' and Max age: ' + str(max(agefemale)))

Male -  Min age: 20 and Max age: 40
Female -  Min age: 20 and Max age: 40


## 4.3 Inclusion, exclusion operators  IN and  NIN

In [66]:
print( collection.count_documents( { "name" : { "$in": [ "Kimberley Chase", "Kinney Wynn" ] }} ))    # includes the names in count
print( collection.count_documents( { "name" : { "$nin": [ "Kimberley Chase", "Kinney Wynn" ] }} ))   # excludes the names in count

2
998


In [67]:
list( collection.find( { "name" : { "$in": ["Kimberley Chase", "Kinney Wynn"] }}, ''))      # includes the names in find

[{'_id': ObjectId('5e7541f88307b5e3d4a15d76')},
 {'_id': ObjectId('5e7541f88307b5e3d4a15eff')}]

In [68]:
collection.count_documents({"age" : { "$nin" : [""] } })     # all documents

1000

## 4.4 Relational operators

### 4.4.1 Relational operators with numbers :  "greater than or equal", "greater than",  "equal", "little than", "little than or equal"

In [69]:
collection.count_documents({"age": {"$gte" : 38}})      # greater than or equal

152

In [70]:
collection.count_documents({"age": {"$gt" : 38}})       # greater than

103

In [71]:
collection.count_documents({"age": {"$eq" : 38}})       # equal

49

In [72]:
collection.count_documents({"age": {"$lt" : 38}})       # lower than

848

In [73]:
collection.count_documents({"age": {"$lte" : 38}})      # lower than or equal

897

### 4.4.2 Relational operators with letters :  "greater than or equal"  or  "little than or equal"

Be careful, capital letters come before than lowercase letters in MongoDB

In [74]:
collection.count_documents({"name": {"$lt" : "B" }})   # Names which start with A

56

In [75]:
collection.insert_one({'name' : "B"})
collection.count_documents({"name": {"$lte" : "B" }})   # Names which start with A or is "B"    ## WARNING Uppercase the names before apply filter, 

57

In [76]:
collection.count_documents({"name": {"$gte" : "B" }})   # Names which start with letters B to Z 

945

In [77]:
collection.count_documents({"name": {"$gt" : "B" }})   # Names which are not "B" and start with letters B to Z 

944

In [78]:
collection.delete_one({'name' : "B"})

<pymongo.results.DeleteResult at 0x7fec793e5448>

## 4.5 Logical Query Operators

### 4.5.1 AND

In [79]:
filters = { "$and":[ {"name" : "Kinney Wynn"}, {"age": 22} ]}
fields = {}      # if fields are empty, it shows by default the id

list ( collection.find( filters , fields ))

[{'_id': ObjectId('5e7541f88307b5e3d4a15d76')}]

### 4.5.2 OR

In [80]:
filters = {"$or":[ {"age" : 28}, {"age" : 29} ]}
fields = {}

collection.count_documents( filters , fields )

88

### 4.5.3 AND & OR

In [81]:
filters = { "$and":[ 
                {"$or":[ {"name" : "Kinney Wynn"}, {"name" : "Kimberley Chase"}]},
                {"age": 22} 
            ]}
fields = {'name','age'}      # if fields are empty, it shows by default the id

list ( collection.find( filters , fields ))

[{'_id': ObjectId('5e7541f88307b5e3d4a15d76'),
  'age': 22,
  'name': 'Kinney Wynn'},
 {'_id': ObjectId('5e7541f88307b5e3d4a15eff'),
  'age': 22,
  'name': 'Kimberley Chase'}]

## 4.6 Exists & not exists

In [82]:
print(collection.count_documents({'_id' : {'$exists' : 1}}))  # Counts all documents with attribute '_id'

1000


In [83]:
# Counts documents without attribute 'age'
print(collection.count_documents({'age' : {'$exists' : 0}}))

0


# 5. List items that belong to a list or not

In [84]:
# Count documents with age equal to 28, 29 or 30
print(collection.count_documents({'age' : {'$in': [ 28, 29, 30]}}))

126


In [85]:
# Count documents with age different to 28, 29 or 30
print(collection.count_documents({'age' : {'$nin': [ 28, 29, 30]}}))

874


In [86]:
# Count documents with favorite fruit different to banana and apple
print(collection.count_documents({'favoriteFruit' : {'$nin': [ 'banana', 'apple']}}))


323


# 6. Indexes
Adding indexes can help accelerate certain queries and can also add additional functionality to querying and storing documents.

## 6.1 Index information

In [87]:
collection.index_information()                            # Shows the existing indexes

{'_id_': {'key': [('_id', 1)], 'ns': 'brny4kjelauboxl.addressbook', 'v': 2}}

## 6.2 Create index 

In [88]:
collection.create_index([( "age" , pymongo.ASCENDING)])   # Creates an index for age and Returns the name of the index

'age_1'

##6.3 Create index with unique registers

In [121]:
db = client.brny4kjelauboxl                         # Set the database to work on
if 'profiles' in list(db.list_collection_names()):
  print ('Deleting "profiles" collection')
  db.drop_collection('profiles')                 # We delete previous module data
else:
  print ('Not previous collection found')

Deleting "profiles" collection


In [122]:
result = db.profiles.create_index([('user_id', pymongo.ASCENDING)], unique=True)  # unique index on a key that rejects documents whose value for that key already exists in the index
sorted(list(db.profiles.index_information()))

['_id_', 'user_id_1']

In [123]:
user_profiles = [ {'user_id': 211, 'name': 'Luke'}, {'user_id': 212, 'name': 'Ziltoid'}]
result = db.profiles.insert_many(user_profiles)
print( result.acknowledged )

True


In [124]:
list( db.profiles.find() )

[{'_id': ObjectId('5e754509b396a6007b0b5fa5'), 'name': 'Luke', 'user_id': 211},
 {'_id': ObjectId('5e754509b396a6007b0b5fa6'),
  'name': 'Ziltoid',
  'user_id': 212}]

In [None]:
new_profile = {'user_id': 213, 'name': 'Drew'}
result = db.profiles.insert_one(new_profile)  # This is fine.

In [126]:
duplicate_profile = {'user_id': 212, 'name': 'Tommy'}
result = db.profiles.insert_one(duplicate_profile)

## Error :  DuplicateKeyError: E11000 duplicate key error index: people.profiles.$user_id_1 dup key: { : 212 }

DuplicateKeyError: ignored