To backup database:  
`mongodump -d=ThreatenedFlora -o=20200713 --gzip`

In [1]:
import pandas as pd

In [2]:
import re

In [3]:
from pymongo import MongoClient
from bson import ObjectId

In [4]:
def mystring(item , depth = 0):
    out = ''
    mydepth = depth
    
    if type(item) == list:
        for it in item:
            out += '\n'
            newdepth = mydepth + 1
            out += mystring(it, newdepth)
    
    elif type(item) == dict:
        for key in item:
            out += '  ' * mydepth + key + ': '
            newdepth = mydepth + 1
            out += mystring(item[key], newdepth)

    else:
        out += '{0}'.format(item) + '\n'
    
    return out

In [5]:
client = MongoClient()

tflora = client.ThreatenedFlora
taxo = tflora.Taxonomy

samples = tflora.Samples

In [78]:
qu = taxo.aggregate([
    {"$match": { "$and":[
        {"genus": "Magnolia"},
        {"specificEpithet": "hernandezii"}
        ]}
    },
    {"$lookup":
        {"localField": "_id",
        "from" : "Samples",
        "foreignField": "taxonID",
        "as": "record"}
    },
    {"$project": {
        "_id": 1,
        "family": 1,
        "genus": 1,
        "specificEpithet": 1,
        "TaxonID": 1,
        "record._id": 1,
        "record.recordedBy": 1,
        "record.recordNumber": 1,
        "record.decimalLatitude": 1,
        "record.decimalLongitude": 1,
        "record.country" : 1,
        "record.stateProvince": 1,
        "record.activated": 1
        }
    }
])

for row in qu:
    print(mystring(row))

_id: 5eff663d7b5e9c69d6a8df61
genus: Magnolia
specificEpithet: hernandezii
family: Magnoliaceae
record: 
    _id: 5eff66707b5e9c69d6a8fbb3
    recordNumber: 4419
    recordedBy: Vargas W.
    country: Colombia
    stateProvince: Risaralda
    decimalLatitude: None
    decimalLongitude: None
    activated: 1

    _id: 5eff66707b5e9c69d6a8fbb4
    recordNumber: 4014
    recordedBy: Vargas W.
    country: Colombia
    stateProvince: Risaralda
    decimalLatitude: None
    decimalLongitude: None
    activated: 1

    _id: 5eff66707b5e9c69d6a8fbbd
    recordNumber: 48
    recordedBy: Gómez C.
    country: Colombia
    stateProvince: Antioquia
    decimalLatitude: 6.3
    decimalLongitude: -75.7333333
    activated: 1

    _id: 5eff66707b5e9c69d6a8fbfe
    recordNumber: 28
    recordedBy: Mariño Y.
    country: Colombia
    stateProvince: Quindío
    decimalLatitude: 4.67759
    decimalLongitude: -75.621417
    activated: 1

    _id: 5eff66707b5e9c69d6a8fc00
    recordNumber: 857
    recorde

In [81]:
samples.update({"_id": ObjectId('5eff66707b5e9c69d6a8fd27')},
    {"$set": {"comment": "Georreferenciado por fuera del pais.", "activated": 0}}
)

  


{'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}

In [80]:
for i in samples.find({"_id": ObjectId('5eff66707b5e9c69d6a8fd27')}):
    print(mystring(i))

_id: 5eff66707b5e9c69d6a8fd27
myindex: 7016
source: Magnolias
occurrenceID: None
basisOfRecord: Espécimen Preservado
institutionCode: JBOTANICOMED
collectionCode: JAUM
catalogNumber: 38834
institutionID: 890980756-2
occurrenceRemarks: Árbol de 30 m de alto, tronco recto. DAP 33 cm, hojas verde oscuro, frutos verde claro
recordNumber: 10227
recordedBy: Devia W.
reproductiveCondition: None
occurrenceStatus: None
disposition: Duplicado en otra colección biológica 
otherCatalogNumbers: TULV 16539
eventDate: 2004-07-01/2004-07-31
eventRemarks: None
habitat: None
continent: SA
country: Colombia
countryCode: CO
stateProvince: Valle del Cauca
county: Calima
municipality: Darién
locality: Vereda San José, finca El Carmen a 4 km perímetro urbano
verbatimLocality: None
minimumElevationInMeters: 2000.0
maximumElevationInMeters: 2000.0
verbatimLatitude: 3°57'31"N
verbatimLongitude: 78°53'34"W
verbatimCoordinateSystem: grados minutos segundos
decimalLatitude: 3.9589826
decimalLongitude: -78.4896024


In [50]:
qu = samples.aggregate([
    {"$match": { "$and": [
        {"decimalLatitude": {"$gt": 13.3, "$lt": 13.4}},
        {"decimalLongitude": {"$gt": -81.3, "$lt": -81.2}}
        ]},
    },
    {"$lookup":
        {"localField": "taxonID",
        "from" : "Taxonomy",
        "foreignField": "_id",
        "as": "name"}
    },
    {"$project": {
        "name._id": 1,
        "name.family": 1,
        "name.genus": 1,
        "name.specificEpithet": 1,
        "TaxonID": 1,
        "recordedBy": 1,
        "recordNumber": 1,
        "decimalLatitude": 1,
        "decimalLongitude": 1,
        "country" : 1,
        "stateProvince": 1,
        "activated": 1
        }
    },
    {"$match": {"name.family": "Arecaceae"}},
    {"$limit": 5}
])

for spp in qu:
    print(mystring(spp))

_id: 5eff66707b5e9c69d6a8f443
recordNumber: 79718
recordedBy: Gentry Alwyn; R. Ortiz
country: Colombia
stateProvince: Archipiélago de San Andrés, Providencia y Santa Catalina
decimalLatitude: 13.3561556
decimalLongitude: -81.221566
activated: 0
name: 
    _id: 5eff663d7b5e9c69d6a8ddb5
    genus: Coccothrinax
    specificEpithet: argentata
    family: Arecaceae



In [39]:
qu = samples.aggregate([
    {"$match": 
        {"taxonID": ObjectId('5eff663d7b5e9c69d6a8e0ad')},
    },
    {"$project": {
        "TaxonID": 1,
        "recordedBy": 1,
        "recordNumber": 1,
        "decimalLatitude": 1,
        "decimalLongitude": 1,
        "country" : 1,
        "stateProvince": 1,
        "activated": 1
        }
    }
])

for spp in qu:
    print(mystring(spp))

_id: 5eff66707b5e9c69d6a8f451
recordNumber: 1159
recordedBy: Barbosa, C.
country: Colombia
stateProvince: Chocó
decimalLatitude: 7.813
decimalLongitude: -77.201
activated: 1

_id: 5eff66707b5e9c69d6a8f60f
recordNumber: 18707
recordedBy: Schultes, R. E.; Cabrera, I.
country: Colombia
stateProvince: Antioquia
decimalLatitude: 7.4108
decimalLongitude: -76.5762
activated: 1

_id: 5eff66707b5e9c69d6a8f610
recordNumber: 9995
recordedBy: Duke James A.
country: Colombia
stateProvince: Antioquia
decimalLatitude: 8.1
decimalLongitude: -76.73
activated: 1

_id: 5eff66707b5e9c69d6a8f611
recordNumber: 582
recordedBy: Jiménez-E. Néstor David; Cristina Estupiñan; Pedro Hernández; Rosalba Ruíz
country: Colombia
stateProvince: Córdoba
decimalLatitude: 7.6658611
decimalLongitude: -76.1020556
activated: 1

_id: 5eff66707b5e9c69d6a8f612
recordNumber: 406
recordedBy: Estupiñán-G. Ana Cristina; David Jiménez-E.; Pedro Hernández; Rosalba Ruíz; Mario Molina
country: Colombia
stateProvince: Córdoba
decimalLati

## Export csv table for analysis

In [25]:
qu = samples.aggregate([
    {"$match": {"$and": [
        {"activated": 1},
        {"decimalLongitude": {"$exists": True, "$ne": None}},
        {"decimalLatitude": {"$exists": True, "$ne": None}}
        ]}
    },
    {"$lookup":
        {"localField": "taxonID",
        "from" : "Taxonomy",
        "foreignField": "_id",
        "as": "name"}
    },
    {"$project": 
        {"name._id": 1,
        "name.family": 1,
        "name.genus": 1,
        "name.specificEpithet": 1,
        "name.project": 1,
        "name.distribution": 1,
        "name.threatStatusScope": 1,
        "TaxonID": 1,
        "recordedBy": 1,
        "recordNumber": 1,
        "decimalLatitude": 1,
        "decimalLongitude": 1}
    },
    {"$match":
        {"$and": [
            {"name.distribution": 'Endemica'},
            {"name.project": "Bosque_seco"}
        ]}
    }#,
    #{"$limit": 5}
])

dat2pan = {'Taxon': [], 'Latitude' : [], 'Longitude': [], 'Collector': [], 'CollectionNumber': []}

for row in qu:
    dat2pan['Latitude'].append(row['decimalLatitude'])
    dat2pan['Longitude'].append(row['decimalLongitude'])
    dat2pan['Collector'].append(row['recordedBy'])
    dat2pan['CollectionNumber'].append(row['recordNumber'])
    species = row['name'][0]['genus'] + ' ' + row['name'][0]['specificEpithet']
    #print(species)
    dat2pan['Taxon'].append(species)
    '''
    for na in spp['name']:
        #print(species, sam['decimalLongitude'], sam['decimalLatitude'])
        species = na['genus'] + " " + na['specificEpithet']
        dat2pan['Taxon'].append(species)
    
    #print(mystring(spp))    
    '''
colls = pd.DataFrame.from_dict(dat2pan)
colls.to_csv('bosque_seco.csv', index=False)

In [20]:
colls.shape

(1861, 5)

In [93]:
qu = taxo.aggregate([
    {"$match": {"family": "Arecaceae"}},
    {"$project": 
        {"_id": 1,
        "genus": 1,
        "specificEpithet": 1,
        'threatStatus': 1,
        'threatCriteria': 1}
    }#,
    #{"$limit": 5}
])

dat2pan = {'Taxon': [], 'Category' : [], 'Criteria': []}

for spp in qu:
    species = spp['genus'] + " " + spp['specificEpithet']
    dat2pan['Taxon'].append(species)
    dat2pan['Category'].append(spp['threatStatus'])
    dat2pan['Criteria'].append(spp['threatCriteria'])

    #print(species, spp['threatCriteria'])
    #print(species, spp['threatStatus'], spp['threatCriteria'])
    #print(mystring(spp))
    
pd.DataFrame.from_dict(dat2pan).to_csv('Arecaceae_categories.csv', index=False)

----

In [38]:
for i in samples.find({
    'taxonID': ObjectId('5eff663d7b5e9c69d6a8e198')}):
    print(i['recordedBy'])

W. Devia; J. Adarve
M. Heredia
W. Vargas; R. Gutiérrez
W. Vargas
W. Vargas; E. Mendez
W. Devia; J. Adarve
A. Gentry
C. Hughes
G. Reina; O. Meneses; J. Perafán
P. Silverstone; N. Paz; M. Heredia
J. E. Ramos; L. H. Ramos
J. E. Ramos; L. H. Ramos
J. E. Ramos; L. H. Ramos
P. Hutchinson; J. Idrobo; J. Kenneth
W. Devia
A. Gentry; P. Keating; M. Heredia; M. Monsalve
C. Ruiz; O. Meneses; G. Reina
J. E. Ramos; C. Reyna
G. Reina
D. L. Hurtado; D. Múnar; C. L. Sandoval; L. Piedrahíta
W. Vargas
None


In [49]:
for i in samples.find({
    'recordedBy': {'$regex': 'Espinal'},
    'recordNumber': {'$regex': '639'}}):
    print(mystring(i))

_id: 5eff66707b5e9c69d6a8e536
myindex: 887
source: Bosque_seco
occurrenceID: None
basisOfRecord: Espécimen preservado
institutionCode: UNAL
collectionCode: COL
catalogNumber: COL98652
institutionID: None
occurrenceRemarks: None
recordNumber: 639
recordedBy: S. Espinal
reproductiveCondition: None
occurrenceStatus: None
disposition: None
otherCatalogNumbers: None
eventDate: None
eventRemarks: None
habitat: None
continent: SA
country: Colombia
countryCode: CO
stateProvince: Antioquia
county: Santa Fé de Antioquia
municipality: None
locality: None
verbatimLocality: None
minimumElevationInMeters: 650.0
maximumElevationInMeters: 750.0
verbatimLatitude: 6º 33' 47" N
verbatimLongitude: 75º 49' 12" W
verbatimCoordinateSystem: grados, minutos, segundos
decimalLatitude: 7.0
decimalLongitude: -75.82
geodeticDatum: WGS84
coordinateUncertaintyInMeters: 220
georeferencedBy: Universidad Nacional de Colombia
georeferencedDate: 2007-02-14
georeferenceProtocol: Método radio-punto; Dead Reckoning; CHAPMAN

In [47]:
samples.update_many({'_id': ObjectId('5eff66707b5e9c69d6a8e4f5')}, 
    {"$set": {"activated": 0, "comment": "Probablemente cultivada."}})

<pymongo.results.UpdateResult at 0x7f343d62eaa0>

In [50]:
taxo.update_one({"_id": ObjectId('5eff663d7b5e9c69d6a8df6a')}, 
    {"$set": {"distribution": "Nativa"}}
)

<pymongo.results.UpdateResult at 0x7fc7a5900500>

In [58]:
taxo.update_many({"$and": [{'project': 'Frailejones'}, {'distribution': 'Nativa'}]}, 
    {"$set": {"threatStatusScope": "National"}}
)

<pymongo.results.UpdateResult at 0x7fc7a58f8780>

In [46]:
for t in taxo.find({'_id': ObjectId('5eff663d7b5e9c69d6a8dd79')}):
    print(mystring(t))

_id: 5eff663d7b5e9c69d6a8dd79
genus: Bulnesia
specificEpithet: carrapo
project: Bosque_seco
kingdom: Plantae
phylum: Tracheophyta
family: Zygophyllaceae
scientificName: Bulnesia carrapo
infraspecificEpithet: None
taxonRank: Especie
scientificNameAuthorship: Killip & Dugand
threatStatus: LC
threatCriteria: None
threatStatusScope: Global
distribution: Endemica
comments: None



In [83]:
taxo.distinct('project')

[None,
 'Bosque_seco',
 'Brom-Ericad',
 'Frailejones',
 'Magnolias',
 'Palmas',
 'Paramo',
 'Vedas',
 'Zamias']

In [19]:
samples.find_one()

{'_id': ObjectId('5eff66707b5e9c69d6a8e1bf'),
 'myindex': 0,
 'source': 'Bosque_seco',
 'occurrenceID': None,
 'basisOfRecord': 'Espécimen preservado',
 'institutionCode': 'UCALDAS',
 'collectionCode': 'FAUC',
 'catalogNumber': 'FAUC6108',
 'institutionID': None,
 'occurrenceRemarks': 'Arbusto de 3-4 m. Flores azules, fruto con semillas rojas. Cultivado como ornamental en el CIAT',
 'recordNumber': None,
 'recordedBy': 'M. De Fraume',
 'reproductiveCondition': 'Floración y Fructificación',
 'occurrenceStatus': None,
 'disposition': None,
 'otherCatalogNumbers': None,
 'eventDate': '1990-06-06',
 'eventRemarks': None,
 'habitat': None,
 'continent': 'SA',
 'country': 'Colombia',
 'countryCode': 'CO',
 'stateProvince': 'Valle del Cauca',
 'county': 'Palmira',
 'municipality': 'Palmira',
 'locality': 'CIAT',
 'verbatimLocality': None,
 'minimumElevationInMeters': 980.0,
 'maximumElevationInMeters': 980.0,
 'verbatimLatitude': None,
 'verbatimLongitude': None,
 'verbatimCoordinateSystem': 

In [102]:
client.close()