# MongoDB custom module example usage

#### Install python mongo module

In [None]:
!pip install pymongo==3.12

#### Check python version

In [27]:
import sys
print(sys.version)

3.10.9 | packaged by Anaconda, Inc. | (main, Mar  1 2023, 18:18:15) [MSC v.1916 64 bit (AMD64)]


### Test mongo connection

In [None]:
from pymongo.mongo_client import MongoClient

uri = "mongodb+srv://luisresende13:<Gaia0333>@pluvia-cluster.ea8fb4s.mongodb.net/?retryWrites=true&w=majority"

# Create a new client and connect to the server
client = MongoClient(uri)

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

### Mongo methods

In [None]:
import pymongo, requests, pandas as pd
from urllib.parse import urlencode

# ---
# MongoDB connection string

conn_str = "mongodb+srv://luisresende13:Gaia0333@pluvia-cluster.ea8fb4s.mongodb.net/?retryWrites=true&w=majority"

client = pymongo.MongoClient(conn_str, serverSelectionTimeoutMS=10*1e3)

def post_to_mongo(records, db, coll):
    try:
        ids_posted = client[db][coll].insert_many(records).inserted_ids
        return 'POST TO MONGO SUCCESS'
    except Exception as err:
        print(f'POST TO MONGO FAILED. DATABASE: {db}, COLLECTION: {coll}. ERROR TRACEBACK:')
        print(err)
        return 'POST TO MONGO FAILED'

def mongo_overwrite(records, db, coll):
    delete = client[db][coll].delete_many({})
    return client[db][coll].insert_many(records)

## Example usage

### Overwrite database collection

In [236]:
api_root = 'https://bolsao-api-j2fefywbia-rj.a.run.app'
# api_root = 'http://127.0.0.1:5000/'
subpath = 'ipp/polygons'

records = requests.get(f'{api_root}/{subpath}').json()
db = 'Waterbag'
coll = 'Polygons IPP'

# overwrite = mongo_overwrite(records, db, coll)

print('Inserted documents:', len(overwrite.inserted_ids))

Inserted documents: 968


### Group by first query

In [89]:
db = 'Waterbag'
coll = 'Polygons'

n_distinct = len(client[db][coll].distinct('cluster_id'))
records = client[db][coll].find({}).sort([('timestamp', -1)]).limit(n_distinct)

df = pd.DataFrame(list(records))

print('Records shape:', df.shape)
df.head()

Records shape: (1, 36)


Unnamed: 0,_id,status_code,status_name,cluster_id,main_neighborhood,main_route,main_street_number_range,lat_centroid,lng_centroid,label_count,...,enchente_count,enchente_status,enchente_ids,vazamento_count,vazamento_status,vazamento_ids,sirene_count,sirene_status,sirene_ids,timestamp
0,63e111442379e13df85751fa,1,ATENCAO,-1,Barra da Tijuca,Avenida Brasil,35025 - 14,-22.910743,-43.303404,1624,...,0,0,[],2,1,"[90184, 90187]",0,0,[],2023-02-06 14:40:00.052


In [238]:
coll = 'Polygons'
df = pd.DataFrame(requests.get(f'{api_root}/mongo/{coll}').json())

In [205]:
# api_root = 'https://bolsao-api-j2fefywbia-rj.a.run.app'
api_root = 'http://127.0.0.1:5000/'

#### '/mongo/' endpoint

In [206]:
colls = [
    'Prediction', 'City',
    'Polygons', 'Polygons Overview',
    'Polygons IPP', 'Polygons IPP Overview',
    'Polygons AlertaRio', 'Cameras AlertaRio'
]

# for i in range(len(colls) - 1):
#     colls[i] += '?as_str=timestamp'

dfs = []
for coll in colls:
    print(coll)
#     try:
    dfs.append(requests.get(f'{api_root}/mongo/{coll}').json()) #.find({}).sort([('timestamp', -1)]).limit(n_distinct)
#     except:
#         dfs.append(requests.get(f'{api_root}/mongo/{coll}?as_str=timestamp').json()) #.find({}).sort([('timestamp', -1)]).limit(n_distinct)

[len(df) for df in dfs]

Prediction
City
Polygons
Polygons Overview
Polygons IPP
Polygons IPP Overview
Polygons AlertaRio
Cameras AlertaRio


[109404, 1, 79, 1, 968, 1, 79, 2842]

#### '/mongo/gbf/' endpoint

In [207]:
colls_gbf = {
    'Prediction': {},
    'City': {},
    'Polygons':{},
    'Polygons Overview': {},
    'Polygons IPP': {},
    'Polygons IPP Overview': {},
    'Polygons AlertaRio': {},
    'Cameras AlertaRio': {},
}

dfs = []
for coll, params in colls_gbf.items():
    try:
        dfs.append(requests.get(f'{api_root}/mongo/gbf/{coll}?{urlencode(params)}').json()) #.find({}).sort([('timestamp', -1)]).limit(n_distinct)
        print(coll)
    except:
        pass

[len(df) for df in dfs]

Prediction
City
Polygons
Polygons Overview
Polygons IPP
Polygons IPP Overview
Polygons AlertaRio
Cameras AlertaRio


[79, 1, 79, 1, 968, 1, 79, 41]

### '/geojson/' endpoint

In [208]:
colls_geojson = {
    'City': {'obj_type': 'Polygon'},
    'gbf/Polygons':{'obj_type': 'Polygon'},
    'Polygons Overview': {'obj_type': 'Polygon', 'coords': 'coordinates', 'geometry_type': 'MultiPolygon'},
    'Polygons IPP': {'obj_type': 'Polygon', 'coords': 'coordinates', 'as_str': 'timestamp'},
    'Polygons IPP Overview': {'obj_type': 'Polygon', 'coords': 'coordinates', 'geometry_type': 'MultiPolygon'},
    'Polygons AlertaRio': {'obj_type': 'Polygon'},
    'Cameras AlertaRio': {'obj_type': 'Point', 'coords': 'Longitude,Latitude'},
}
dfs = []
for coll, info in colls_geojson.items():
    try:
        dfs.append(requests.get(f'{api_root}/geojson/mongo/{coll}?{urlencode(info)}').json()) #.find({}).sort([('timestamp', -1)]).limit(n_distinct)
        print(coll)
    except:
        pass

[len(df['features']) for df in dfs]

City
gbf/Polygons
Polygons Overview
Polygons IPP
Polygons IPP Overview
Polygons AlertaRio
Cameras AlertaRio


[1, 79, 1, 968, 1, 79, 2842]

#### Obs: geojson urls

In [204]:
[f'{api_root}/geojson/mongo/{coll}?{urlencode(info)}' for coll, info in colls_geojson.items()]

['https://bolsao-api-j2fefywbia-rj.a.run.app/geojson/mongo/City?obj_type=Polygon',
 'https://bolsao-api-j2fefywbia-rj.a.run.app/geojson/mongo/gbf/Polygons?obj_type=Polygon',
 'https://bolsao-api-j2fefywbia-rj.a.run.app/geojson/mongo/Polygons Overview?obj_type=Polygon&coords=coordinates&geometry_type=MultiPolygon',
 'https://bolsao-api-j2fefywbia-rj.a.run.app/geojson/mongo/Polygons IPP?obj_type=Polygon&coords=coordinates&as_str=timestamp',
 'https://bolsao-api-j2fefywbia-rj.a.run.app/geojson/mongo/Polygons IPP Overview?obj_type=Polygon&coords=coordinates&geometry_type=MultiPolygon',
 'https://bolsao-api-j2fefywbia-rj.a.run.app/geojson/mongo/Polygons AlertaRio?obj_type=Polygon',
 'https://bolsao-api-j2fefywbia-rj.a.run.app/geojson/mongo/Cameras AlertaRio?obj_type=Point&coords=Longitude%2CLatitude']

### Save mongo database collections locally using octa-api

In [35]:
# from pymongo import MongoClient
# from pymongo.server_api import ServerApi
# import os
# import json
# from bson import ObjectId

import os, requests, json
from tqdm import tqdm

In [None]:
# Base data directory
base_directory = 'data'

# MongoDB database name
database_name = 'Waterbag'

# Base octa-api root
api_root = 'http://127.0.0.1:5000'

# Get all collection names in the database
collection_names = [
    'Prediction', 'City',
    'Polygons', 'Polygons Overview',
    'Polygons IPP', 'Polygons IPP Overview',
    'Polygons AlertaRio', 'Cameras AlertaRio'
]

# Create a directory to save the collections
output_directory = f'{base_directory}/{database_name}'
os.makedirs(output_directory, exist_ok=True)

# Custom JSON encoder to handle ObjectId serialization
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, ObjectId):
            return str(obj)
        return super().default(obj)

# Export each collection as a separate JSON file with progress tracking
for collection_name in tqdm(collection_names, desc='Exporting collections', unit='collection'):
    # Retrieve the collection data
    collection_data = requests.get(f'{api_root}/mongo/{collection_name}').json()

    # Generate the output file path
    output_file = os.path.join(output_directory, f'{collection_name}.json')

    # Save the collection data as JSON using the custom encoder
    with open(output_file, 'w') as file:
        json.dump(collection_data, file, indent=4) # , cls=CustomEncoder)

# Disconnect from MongoDB
client.close()

Exporting collections:  38%|███████████████████▌                                | 3/8 [05:55<13:12, 158.49s/collection]

### Save mongo database collections locally using pymongo

In [13]:
from pymongo import MongoClient
from pymongo.server_api import ServerApi
import os
import json
from bson import ObjectId
from tqdm import tqdm

# Base data directory
base_directory = 'data'

# MongoDB database name
database_name = 'Waterbag'

# MongoDB connection string
uri = "mongodb+srv://luisresende13:<Gaia0333>@pluvia-cluster.ea8fb4s.mongodb.net/?retryWrites=true&w=majority"

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)
    exit()

# Get the specified database
database = client.get_database(database_name)

# Get all collection names in the database
collection_names = database.list_collection_names()

# Create a directory to save the collections
output_directory = f'{base_directory}/{database_name}'
os.makedirs(output_directory, exist_ok=True)

# Custom JSON encoder to handle ObjectId serialization
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, ObjectId):
            return str(obj)
        return super().default(obj)

# Export each collection as a separate JSON file with progress tracking
for collection_name in tqdm(collection_names, desc='Exporting collections', unit='collection'):
    # Retrieve the collection data
    collection_data = list(database[collection_name].find())

    # Generate the output file path
    output_file = os.path.join(output_directory, f'{collection_name}.json')

    # Save the collection data as JSON using the custom encoder
    with open(output_file, 'w') as file:
        json.dump(collection_data, file, indent=4, cls=CustomEncoder)

# Disconnect from MongoDB
client.close()


AttributeError: module 'pymongo.common' has no attribute 'clean_node'