## Wrangling Data with MongoDB

Goals:
- Connect to a MongoDB Server
- Explore the database and locate PM2.5 readings
- Import database query into DataFrame

In [1]:
from pprint import PrettyPrinter

import pandas as pd
from pymongo import MongoClient

In [3]:
pp = PrettyPrinter(indent=2)

### Connect

In [5]:
client = MongoClient(host="localhost", port=27017)
client

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

### Explore

In [None]:
pp.pprint(list(client.list_databases()))

In [None]:
# [ {'empty': False, 'name': 'admin', 'sizeOnDisk': 40960},
#   {'empty': False, 'name': 'air-quality', 'sizeOnDisk': 4190208},
#   {'empty': False, 'name': 'config', 'sizeOnDisk': 61440},
#   {'empty': False, 'name': 'local', 'sizeOnDisk': 73728},
#   {'empty': False, 'name': 'wqu-abtest', 'sizeOnDisk': 585728}]

Asign the "air-quality" database to a variable db

In [None]:
db = client["air-quality"]

List Collections

In [None]:
list(db.list_collections())[0]

In [9]:
# {'name': 'lagos',
#  'type': 'timeseries',
#  'options': {'timeseries': {'timeField': 'timestamp',
#    'metaField': 'metadata',
#    'granularity': 'seconds',
#    'bucketMaxSpanSeconds': 3600}},
#  'info': {'readOnly': False}}

In [None]:
for c in db.list_collections():
    print(c["name"])

In [None]:
# lagos
# system.buckets.lagos
# system.views
# dar-es-salaam
# system.buckets.dar-es-salaam
# nairobi
# system.buckets.nairobi

Asign Collections

In [None]:
nairobi = db["nairobi"]

Count Documents

In [None]:
nairobi.count_documents({})
# 202212

Find One Document

In [None]:
result = nairobi.find_one({})
pp.pprint(result)

In [None]:
# { '_id': ObjectId('6525d772f44bfedd842a6fcc'),
#   'metadata': { 'lat': -1.3,
#                 'lon': 36.785,
#                 'measurement': 'temperature',
#                 'sensor_id': 58,
#                 'sensor_type': 'DHT22',
#                 'site': 29},
#   'temperature': 16.5,
#   'timestamp': datetime.datetime(2018, 9, 1, 0, 0, 4, 301000)}

Distinct Censor Sites

nairobi.distinct("metadata.site")
[6, 29]

### Split