# MongoDB

> humongous: of an extremely large size. https://en.wiktionary.org/wiki/humongous



### Characteristics

- databases -> collections -> documents
- document = JSON-like (nested) dictionaries
- no schema required
- easy to use
- made for processing tons of data
- built-in support for distributed architecture (replication, sharding)

In [None]:
!pip install pymongo

In [1]:
import pymongo

In [3]:
# is my mongodb-container running?
!docker-compose ps

         Name                   Command           State            Ports        
--------------------------------------------------------------------------------
06_data_pipeline_etl_1   python ./src/app.py      Exit 0                        
06_data_pipeline_mongo   docker-entrypoint.sh     Up       0.0.0.0:27017->27017/
db_1                     mongod                            tcp,:::27017->27017/t
                                                           cp                   
06_data_pipeline_tweet   python ./src/app.py      Exit 0                        
_collector_1                                                                    


## Connect to the DB Server

In [5]:
# connect to the database inside the container
client = pymongo.MongoClient(host='localhost', port=27017)

# connect to the database from inside another container
# client = pymongo.MongoClient('mongodb', 27017)

### List databases

In [17]:
# list available databases
client.list_database_names()

['admin', 'config', 'local', 'spiced']

### List collections

In [18]:
# use the spiced database
db = client.spiced

In [19]:
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'spiced')

In [20]:
# list collections
db.list_collection_names()

['students']

In [22]:
# count documents
db.students.count_documents({})

1

In [29]:
db.students.find_one({'name':'Malte'})

{'_id': ObjectId('6141b3e46c8c2336c7878e6b'),
 'name': 'Malte',
 'faculty': 'Data science',
 'shoe_size': 45,
 'colors': ['red', 'grean', 'blue']}

## Create

In [40]:
# insert a document

doc = {
    'name':'suba',
    'faculty':'Data science',
    'shoe_size':48,
    'colors': ['red','grean','blue'],
    'twitter_account':{'username': 'subha',
    'followers':1200
    }
}

# If the document does not have an _id field one will be added automatically!
db.students.insert_one(doc)

<pymongo.results.InsertOneResult at 0x7fa4a79375c0>

In [51]:
# insert several documents

docs = [...]

db.students.insert_many()

TypeError: insert_many() missing 1 required positional argument: 'documents'

## Read

In [42]:
# most recent document
db.students.find_one()

{'_id': ObjectId('6141b2c86c8c2336c7878e6a'),
 'name': 'Tianjun',
 'faculty': 'Data science',
 'shoe_size': 38}

In [43]:
# filter
db.students.find_one({"twitter_account.username":"subha"})

{'_id': ObjectId('6141ba476c8c2336c7878e6d'),
 'name': 'suba',
 'faculty': 'Data science',
 'shoe_size': 48,
 'colors': ['red', 'grean', 'blue'],
 'twitter_account': {'username': 'subha', 'followers': 1200}}

In [44]:
# find several documents
for doc in db.students.find():
    print(doc)

{'_id': ObjectId('6141b2c86c8c2336c7878e6a'), 'name': 'Tianjun', 'faculty': 'Data science', 'shoe_size': 38}
{'_id': ObjectId('6141b3e46c8c2336c7878e6b'), 'name': 'Malte', 'faculty': 'Data science', 'shoe_size': 45, 'colors': ['red', 'grean', 'blue']}
{'_id': ObjectId('6141b4df6c8c2336c7878e6c'), 'name': 'suba', 'faculty': 'Data science', 'shoe_size': 48, 'colors': ['red', 'grean', 'blue'], 'twitter_account': {'usernaame': 'subha', 'followers': 1200}}
{'_id': ObjectId('6141ba476c8c2336c7878e6d'), 'name': 'suba', 'faculty': 'Data science', 'shoe_size': 48, 'colors': ['red', 'grean', 'blue'], 'twitter_account': {'username': 'subha', 'followers': 1200}}


In [45]:
# find all documents where shoe size>40is greater than 1977
# https://docs.mongodb.com/manual/reference/operator/query/

db.students.find_one({"shoe_size": {'$gt': 40}})

{'_id': ObjectId('6141b3e46c8c2336c7878e6b'),
 'name': 'Malte',
 'faculty': 'Data science',
 'shoe_size': 45,
 'colors': ['red', 'grean', 'blue']}

## Delete

In [47]:
db.students.delete_one({"name":"subha"})

<pymongo.results.DeleteResult at 0x7fa4a7ed0fc0>

In [50]:
db.students.count_documents({"name":"subha"})

0

In [None]:
# drop the collection
db.students.???