In [None]:
import pymongo

In [None]:
uri = "mongodb://localhost:27017"
myclient = pymongo.MongoClient(uri)
mydb = myclient["multiverse"]

In [None]:
# db is NOT created till there is data

dblist = myclient.list_database_names()
if "multiverse" in dblist:
  print("The database exists.")
else:
  print("DB absent")

DB absent


In [None]:
print(myclient.list_database_names())
# multiverse not present in db name list yet!

['universe']


In [None]:
# collection is created like this, but will not show up till there is data
myplanet = mydb['earth']
# again-> because there is no data, no database or no collection is created

In [None]:
country1 = {
    "name":"Honolulu",
    "population": "unknown"
}
country2 = {
    "name":"Pikaland",
    "capital": "Townsville",
    "potatoLover" : True
}
myplanet.insert_one(country1)


<pymongo.results.InsertOneResult at 0x7f3bb88b89b0>

In [None]:
# inserting returns the ID of row that is inserted. For example,
x = myplanet.insert_one(country2)
x

<pymongo.results.InsertOneResult at 0x7f3bb887abe0>

In [None]:
#if you want to see all sections of an object, you can use dir() command
print(dir(x))
# from here, we an find inserted_id field (in the last)

['_InsertOneResult__acknowledged', '_InsertOneResult__inserted_id', '_WriteResult__acknowledged', '__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '_raise_if_unacknowledged', 'acknowledged', 'inserted_id']


In [None]:
x.inserted_id
# this is an OBJECT ID and makes mongo db search very fast

ObjectId('60cd8505b92a1f0bdc7b680e')

In [None]:
# we could have inserted multiple records in one go too
countries = [{
    "name":"Tonalulu",
    "population": 100,
    "currency" : "dollar"
},{
    "name":"Gagacity",
    "area": 1000,
    "potatoLover" : True
}]
x = myplanet.insert_many(countries)

In [None]:
x.inserted_ids

[ObjectId('60cd8509b92a1f0bdc7b680f'), ObjectId('60cd8509b92a1f0bdc7b6810')]

In [None]:
# get first record
x = myplanet.find_one()

In [None]:
# X is now a POINTER or a CURSOR pointing to the collection
x

{'_id': ObjectId('60cd8504b92a1f0bdc7b680d'),
 'name': 'Honolulu',
 'population': 'unknown'}

In [None]:
x = myplanet.find_one()
x

{'_id': ObjectId('60cd8504b92a1f0bdc7b680d'),
 'name': 'Honolulu',
 'population': 'unknown'}

In [None]:
# to print all records
for x in myplanet.find():
  print(x)

{'_id': ObjectId('60cd8504b92a1f0bdc7b680d'), 'name': 'Honolulu', 'population': 'unknown'}
{'_id': ObjectId('60cd8505b92a1f0bdc7b680e'), 'name': 'Pikaland', 'capital': 'Townsville', 'potatoLover': True}
{'_id': ObjectId('60cd8509b92a1f0bdc7b680f'), 'name': 'Tonalulu', 'population': 100, 'currency': 'dollar'}
{'_id': ObjectId('60cd8509b92a1f0bdc7b6810'), 'name': 'Gagacity', 'area': 1000, 'potatoLover': True}


In [None]:
# we can choose not to filter certain fields, like _id
for x in myplanet.find({},{ "_id": 0}):
  print on my(x)

{'name': 'Honolulu', 'population': 'unknown'}
{'name': 'Pikaland', 'capital': 'Townsville', 'potatoLover': True}
{'name': 'Tonalulu', 'population': 100, 'currency': 'dollar'}
{'name': 'Gagacity', 'area': 1000, 'potatoLover': True}


In [None]:
# or we can select certain fields too
for x in myplanet.find({},{ "_id": 0,"name":1,"potatoLover": 1}):
  print(x)

{'name': 'Honolulu'}
{'name': 'Pikaland', 'potatoLover': True}
{'name': 'Tonalulu'}
{'name': 'Gagacity', 'potatoLover': True}


In [None]:
# BUT ONE limitation- we cannot create combination of 0 and 1 filters in the same object, except for _id field
# what does this mean?
# EIther provide the list of what to show, or provide a list of what not to show- DO NOT MIX THEM UP
# this will work
for x in myplanet.find({},{ "_id": 0,"name":1,"population": 1}):
  print(x)

{'name': 'Honolulu', 'population': 'unknown'}
{'name': 'Pikaland'}
{'name': 'Tonalulu', 'population': 100}
{'name': 'Gagacity'}


In [None]:
# this will not work
for x in myplanet.find({},{ "_id": 0,"name":1,"population": 0}):
  print(x)

  # this is called a PROJECTION- and it cannot include both inclusion and exclusion rule

OperationFailure: ignored

In [None]:
# you can build exact search filters also
search_doc = {'population': 'unknown'}
for x in myplanet.find(search_doc):
  print(x)

{'_id': ObjectId('60cd8504b92a1f0bdc7b680d'), 'name': 'Honolulu', 'population': 'unknown'}


In [None]:
# you can search less than, greater than, and other comparitive operations
search_doc ={ "population": { "$lt": 1000 } }
for x in myplanet.find(search_doc):
  print(x)

{'_id': ObjectId('60cd8509b92a1f0bdc7b680f'), 'name': 'Tonalulu', 'population': 100, 'currency': 'dollar'}


In [None]:
# greater than, less than of course works on numbers, but you can even use it on strings
search_doc ={ "name": { "$gt": "P" } }
for x in myplanet.find(search_doc):
  print(x)

{'_id': ObjectId('60cd8505b92a1f0bdc7b680e'), 'name': 'Pikaland', 'capital': 'Townsville', 'potatoLover': True}
{'_id': ObjectId('60cd8509b92a1f0bdc7b680f'), 'name': 'Tonalulu', 'population': 100, 'currency': 'dollar'}


In [None]:
#but strings are much better searched through regular expressions, rather than gt or lt
# here is a regular expression to find all name values starting with T
myquery = { "name": { "$regex": "^T" } }
for x in myplanet.find(myquery):
  print(x)

{'_id': ObjectId('60cd8509b92a1f0bdc7b680f'), 'name': 'Tonalulu', 'population': 100, 'currency': 'dollar'}


In [None]:
# you can find and update as well
searchfor = { "name": "Pikaland" }
updatefor = { "$set": { "population": 500, "drama": True } }

myplanet.update_one(searchfor, updatefor)

for place in myplanet.find():
  print(place)

{'_id': ObjectId('60cd8504b92a1f0bdc7b680d'), 'name': 'Honolulu', 'population': 'unknown'}
{'_id': ObjectId('60cd8505b92a1f0bdc7b680e'), 'name': 'Pikaland', 'capital': 'Townsville', 'potatoLover': True, 'population': 500, 'drama': True}
{'_id': ObjectId('60cd8509b92a1f0bdc7b680f'), 'name': 'Tonalulu', 'population': 100, 'currency': 'dollar'}
{'_id': ObjectId('60cd8509b92a1f0bdc7b6810'), 'name': 'Gagacity', 'area': 1000, 'potatoLover': True}


In [None]:
# you could find and update many too
# this reg ex to update wherever name starts with H,P,T or G
searchfor = { "name": { "$regex": "^[HPTG]" } }
updatefor = { "$set": { "lovesKetchup": False } }

x = myplanet.update_many(searchfor, updatefor)

print(x.modified_count, "documents updated.")

for place in myplanet.find():
  print(place)

4 documents updated.
{'_id': ObjectId('60cd8504b92a1f0bdc7b680d'), 'name': 'Honolulu', 'population': 'unknown', 'lovesKetchup': False}
{'_id': ObjectId('60cd8505b92a1f0bdc7b680e'), 'name': 'Pikaland', 'capital': 'Townsville', 'potatoLover': True, 'population': 500, 'drama': True, 'lovesKetchup': False}
{'_id': ObjectId('60cd8509b92a1f0bdc7b680f'), 'name': 'Tonalulu', 'population': 100, 'currency': 'dollar', 'lovesKetchup': False}
{'_id': ObjectId('60cd8509b92a1f0bdc7b6810'), 'name': 'Gagacity', 'area': 1000, 'potatoLover': True, 'lovesKetchup': False}


In [None]:
# limit can be used to show only limited records
for place in myplanet.find().limit(2):
  print(place)

{'_id': ObjectId('60cd8504b92a1f0bdc7b680d'), 'name': 'Honolulu', 'population': 'unknown', 'lovesKetchup': False}
{'_id': ObjectId('60cd8505b92a1f0bdc7b680e'), 'name': 'Pikaland', 'capital': 'Townsville', 'potatoLover': True, 'population': 500, 'drama': True, 'lovesKetchup': False}


In [None]:
# we could also right a random function to generate data and then update records
import random
def sendRandomNumber():
  return int(random.random() * 1000 )


all_ids = []
# this will give us all object IDs and names in the table, where name is greater than equal to P
for x in myplanet.find({ "name": { "$gte": "P" } },{ "_id": 1, "name": 1}):
  all_ids.append(x)
print(all_ids)

[{'_id': ObjectId('60cd8505b92a1f0bdc7b680e'), 'name': 'Pikaland'}, {'_id': ObjectId('60cd8509b92a1f0bdc7b680f'), 'name': 'Tonalulu'}]


In [None]:
# now i can create a loop to update population with random numbers only on these 2 IDs
for record in all_ids:
  searchfor = { "name": record["name"] }
  updatefor = { "$set": { "population": sendRandomNumber() } }
  print(searchfor)
  res = myplanet.update_one(searchfor, updatefor)
  print(res)

{'name': 'Pikaland'}
<pymongo.results.UpdateResult object at 0x7f3bb475caf0>
{'name': 'Tonalulu'}
<pymongo.results.UpdateResult object at 0x7f3bb475cb90>


In [None]:
for item in myplanet.find():
  print(item)

{'_id': ObjectId('60cd8504b92a1f0bdc7b680d'), 'name': 'Honolulu', 'population': 'unknown', 'lovesKetchup': False}
{'_id': ObjectId('60cd8505b92a1f0bdc7b680e'), 'name': 'Pikaland', 'capital': 'Townsville', 'potatoLover': True, 'population': 818, 'drama': True, 'lovesKetchup': False}
{'_id': ObjectId('60cd8509b92a1f0bdc7b680f'), 'name': 'Tonalulu', 'population': 999, 'currency': 'dollar', 'lovesKetchup': False}
{'_id': ObjectId('60cd8509b92a1f0bdc7b6810'), 'name': 'Gagacity', 'area': 1000, 'potatoLover': True, 'lovesKetchup': False}


  This is separate from the ipykernel package so we can avoid doing imports until


4

In [None]:
# ok. Let's look at our data. Let's say we wanted to do some analysis on population
# but our problem is that not all records even have population
for x in mydata:
  print(x)

In [None]:


# let's delete all records where population is unknown
dirty_ids = myplanet.find({'population': { "$not": { "$gt": 0 } }}, {"_id":1})
# because population is a NON NEGATIVE NATURAL number! you cannot have a city with 3.14 or -32 population
# and then we selected _id of all such records which are not positive numbers
dirty_ids.count()

  import sys


2

In [None]:

for dirtyid in dirty_ids:
  print(dirtyid)


{'_id': ObjectId('60cd8504b92a1f0bdc7b680d')}
{'_id': ObjectId('60cd8509b92a1f0bdc7b6810')}


In [None]:
to_del = {'population': { "$not": { "$gt": 0 } }}
result = myplanet.delete_many( to_del )

In [None]:
for x in myplanet.find():
  print(x)

{'_id': ObjectId('60cd8505b92a1f0bdc7b680e'), 'name': 'Pikaland', 'capital': 'Townsville', 'potatoLover': True, 'population': 818, 'drama': True, 'lovesKetchup': False}
{'_id': ObjectId('60cd8509b92a1f0bdc7b680f'), 'name': 'Tonalulu', 'population': 999, 'currency': 'dollar', 'lovesKetchup': False}


In [None]:
# you can also sort the data
mydata = myplanet.find().sort('_id')
mydata.count()