In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pymongo

## Extract Victorian Biodiversity Atlas (VBA) fauna data

In [2]:
# Read just the column names in fauna data csv
col_names = pd.read_csv("../data/VBA_2015_2020.csv", nrows = 0).columns
col_names

Index(['RECORD_ID', 'SITE_ID', 'SURVEY_ID', 'PROJECT_ID', 'TAXON_ID',
       'SCI_NAME', 'COMM_NAME', 'RECORDTYPE', 'RELIABILTY', 'TOTALCOUNT',
       'STARTDATE', 'START_YEAR', 'START_MTH', 'ENDDATE', 'END_YEAR',
       'END_MTH', 'LOCN_DESC', 'TAXON_TYPE', 'LONG_DD94', 'LAT_DD94'],
      dtype='object')

In [3]:
# Set data types for columns with data types other than strings
dtypes_dict = {
    "TOTALCOUNT": int,
    "START_YEAR": int,
    "START_MTH": int,
    "END_YEAR": int,
    "END_MTH": int,
    "LONG_DD94": float,
    "LAT_DD94": float
}

In [4]:
# Read in vic fauna csv from 2015 to 2020
fauna_data = pd.read_csv(
    "../data/VBA_2015_2020.csv",
    parse_dates = ["STARTDATE", "ENDDATE"],
    dtype = {col: str for col in col_names if col not in dtypes_dict})
fauna_data.head()

Unnamed: 0,RECORD_ID,SITE_ID,SURVEY_ID,PROJECT_ID,TAXON_ID,SCI_NAME,COMM_NAME,RECORDTYPE,RELIABILTY,TOTALCOUNT,STARTDATE,START_YEAR,START_MTH,ENDDATE,END_YEAR,END_MTH,LOCN_DESC,TAXON_TYPE,LONG_DD94,LAT_DD94
0,8597419,947931,1405903,4377,1557,Paratya australiensis,Common Freshwater Shrimp,Observation,Confirmed,0,2016-12-16,2016,12,NaT,0,0,McCallum Creek-4_7-TR-16-333,"Mussels, decopod crustacea",143.649002,-37.283901
1,9067844,1084677,1776514,5326,10408,Colluricincla harmonica,Grey Shrike-thrush,Observation with supporting evidence,Acceptable,0,2018-08-28,2018,8,2018-09-25,2018,9,345-513-0003 FSQ1,Passerine birds,145.768997,-37.782501
2,8218590,771970,1221401,4366,10991,Turdus merula,Common Blackbird,Seen,Acceptable,0,2015-04-12,2015,4,NaT,0,0,Ocean Acres Bush Park Nature Reserve,Passerine birds,144.287399,-38.315601
3,9047388,1070861,1760792,5326,11242,Wallabia bicolor,Black-tailed Wallaby,Observation with supporting evidence,Acceptable,0,2018-11-14,2018,11,2018-12-14,2018,12,833-518-0004 BUQ1,Mammals,148.848099,-37.601601
4,9359539,1116727,1809370,5543,10525,Cisticola exilis,Golden-headed Cisticola,Seen,Acceptable,1,2015-05-07,2015,5,2015-05-07,2015,5,MANNIBADAR (581481),Passerine birds,143.481903,-37.781799


## Transform VBA fauna data

In [5]:
# Column Renaming
fauna_df = fauna_data.rename(columns={
    "RECORD_ID": "record_id",
    "SITE_ID": "site_id",
    "SURVEY_ID": "survey_id",
    "PROJECT_ID": "project_id",
    "TAXON_ID": "taxon_id",
    "SCI_NAME": "sci_name",
    "COMM_NAME": "comm_name",
    "RECORDTYPE": "recordtype",
    "RELIABILTY": "reliability",
    "TOTALCOUNT": "totalcount",
    "STARTDATE": "start_date",
    "START_YEAR": "start_year",
    "START_MTH": "start_mth",
    "ENDDATE": "end_date",
    "END_YEAR": "end_year",
    "END_MTH": "end_mth",
    "LOCN_DESC": "location_desc",
    "TAXON_TYPE": "taxon_type",
    "LONG_DD94": "long",
    "LAT_DD94": "lat"})

In [6]:
# Test record_id uniqueness
fauna_df.record_id.is_unique

True

In [7]:
print(f"Number of unique record ids: {fauna_df.record_id.nunique()}")

Number of unique record ids: 346829


In [8]:
# Test survey_id uniqueness
fauna_df.survey_id.is_unique

False

In [9]:
print(f"Number of unique survey ids: {fauna_df.survey_id.nunique()}")

Number of unique survey ids: 97240


In [10]:
# Test site_id uniqueness
fauna_df.site_id.is_unique

False

In [11]:
print(f"Number of unique site ids: {fauna_df.site_id.nunique()}")

Number of unique site ids: 55164


In [12]:
# Test project_id uniqueness
fauna_df.project_id.is_unique

False

In [13]:
print(f"Number of unique project ids: {fauna_df.project_id.nunique()}")

Number of unique project ids: 522


In [14]:
# Test taxon_id uniqueness
fauna_df.taxon_id.is_unique

False

In [15]:
print(f"Number of unique taxon ids: {fauna_df.taxon_id.nunique()}")

Number of unique taxon ids: 941


In [16]:
print(f"Number of unique taxon types: {fauna_df.taxon_type.nunique()}")

Number of unique taxon types: 14


In [17]:
# Reorder the columns
fauna_df = fauna_df[["record_id", "survey_id", "site_id", "project_id", "taxon_id", "taxon_type"
                     ,"comm_name", "sci_name", "totalcount", "location_desc", "long", "lat"
                     ,"end_year", "end_mth", "end_date", "start_year", "start_mth", "start_date"
                     ,"recordtype", "reliability"]]
fauna_df.head()

Unnamed: 0,record_id,survey_id,site_id,project_id,taxon_id,taxon_type,comm_name,sci_name,totalcount,location_desc,long,lat,end_year,end_mth,end_date,start_year,start_mth,start_date,recordtype,reliability
0,8597419,1405903,947931,4377,1557,"Mussels, decopod crustacea",Common Freshwater Shrimp,Paratya australiensis,0,McCallum Creek-4_7-TR-16-333,143.649002,-37.283901,0,0,NaT,2016,12,2016-12-16,Observation,Confirmed
1,9067844,1776514,1084677,5326,10408,Passerine birds,Grey Shrike-thrush,Colluricincla harmonica,0,345-513-0003 FSQ1,145.768997,-37.782501,2018,9,2018-09-25,2018,8,2018-08-28,Observation with supporting evidence,Acceptable
2,8218590,1221401,771970,4366,10991,Passerine birds,Common Blackbird,Turdus merula,0,Ocean Acres Bush Park Nature Reserve,144.287399,-38.315601,0,0,NaT,2015,4,2015-04-12,Seen,Acceptable
3,9047388,1760792,1070861,5326,11242,Mammals,Black-tailed Wallaby,Wallabia bicolor,0,833-518-0004 BUQ1,148.848099,-37.601601,2018,12,2018-12-14,2018,11,2018-11-14,Observation with supporting evidence,Acceptable
4,9359539,1809370,1116727,5543,10525,Passerine birds,Golden-headed Cisticola,Cisticola exilis,1,MANNIBADAR (581481),143.481903,-37.781799,2015,5,2015-05-07,2015,5,2015-05-07,Seen,Acceptable


In [18]:
# Overview of the fauna data
fauna_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 346829 entries, 0 to 346828
Data columns (total 20 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   record_id      346829 non-null  object        
 1   survey_id      346829 non-null  object        
 2   site_id        346829 non-null  object        
 3   project_id     346829 non-null  object        
 4   taxon_id       346829 non-null  object        
 5   taxon_type     346829 non-null  object        
 6   comm_name      346829 non-null  object        
 7   sci_name       346829 non-null  object        
 8   totalcount     346829 non-null  int64         
 9   location_desc  346829 non-null  object        
 10  long           346829 non-null  float64       
 11  lat            346829 non-null  float64       
 12  end_year       346829 non-null  int64         
 13  end_mth        346829 non-null  int64         
 14  end_date       52465 non-null   datetime64[ns]
 15  

In [19]:
# We can see that the TOTALCOUNT of some records is 0. Let's have an overview of them
zero_totalcount = fauna_df[fauna_df["totalcount"] == 0]
zero_totalcount.head(10)

Unnamed: 0,record_id,survey_id,site_id,project_id,taxon_id,taxon_type,comm_name,sci_name,totalcount,location_desc,long,lat,end_year,end_mth,end_date,start_year,start_mth,start_date,recordtype,reliability
0,8597419,1405903,947931,4377,1557,"Mussels, decopod crustacea",Common Freshwater Shrimp,Paratya australiensis,0,McCallum Creek-4_7-TR-16-333,143.649002,-37.283901,0,0,NaT,2016,12,2016-12-16,Observation,Confirmed
1,9067844,1776514,1084677,5326,10408,Passerine birds,Grey Shrike-thrush,Colluricincla harmonica,0,345-513-0003 FSQ1,145.768997,-37.782501,2018,9,2018-09-25,2018,8,2018-08-28,Observation with supporting evidence,Acceptable
2,8218590,1221401,771970,4366,10991,Passerine birds,Common Blackbird,Turdus merula,0,Ocean Acres Bush Park Nature Reserve,144.287399,-38.315601,0,0,NaT,2015,4,2015-04-12,Seen,Acceptable
3,9047388,1760792,1070861,5326,11242,Mammals,Black-tailed Wallaby,Wallabia bicolor,0,833-518-0004 BUQ1,148.848099,-37.601601,2018,12,2018-12-14,2018,11,2018-11-14,Observation with supporting evidence,Acceptable
6,9067841,1776514,1084677,5326,10488,Passerine birds,White-browed Scrubwren,Sericornis frontalis,0,345-513-0003 FSQ1,145.768997,-37.782501,2018,9,2018-09-25,2018,8,2018-08-28,Observation with supporting evidence,Acceptable
9,8897604,1686135,1000965,4335,528552,Mammals,Red Fox,Vulpes vulpes,0,Basalt 10,144.098206,-37.2705,2018,2,2018-02-07,2018,1,2018-01-17,Observation with supporting evidence,Acceptable
10,9345216,1799930,1107289,2936,5140,Fish,Dry waterbody,Misc Dry,0,Pig and Whistle Creek-2_23-TR-18-421,147.853394,-37.532299,0,0,NaT,2018,11,2018-11-16,Observation,Confirmed
12,8432377,1292811,840246,4551,10364,Passerine birds,Willie Wagtail,Rhipidura leucophrys,0,Bulla Hill and School Hill,144.8022,-37.634102,0,0,NaT,2015,5,2015-05-18,Observation,Acceptable
13,8956790,1715391,1027944,4836,11003,Mammals,Short-beaked Echidna,Tachyglossus aculeatus,0,New Holland Mouse camera survey_Site_NHM423,147.533798,-38.084,2018,3,2018-03-20,2018,3,2018-03-07,Observation with supporting evidence,Acceptable
15,8994856,1741637,1052493,5326,11115,Mammals,Mountain Brush-tailed Possum,Trichosurus cunninghami,0,298-516-0003,145.521393,-37.442902,2018,8,2018-08-23,2018,7,2018-07-27,Observation with supporting evidence,Acceptable


In [20]:
# Percentage of number of rows with totalcount equal to 0 against total number of rows of the dataframe
(zero_totalcount.shape[0]/fauna_df.shape[0])*100

19.936914156544002

## Filter VBA fauna data against scraped data

In [21]:
# Import the webscraped animal data
scraped_df = pd.read_csv("../data/animal_image_to_merge.csv", dtype="str")
scraped_df.head()

Unnamed: 0,animal_name,image_url,image_alternative,introduction,threat_paragraph
0,Alpine She-oak Skink,https://ZoosVic-Endpoint-Blob-Prod.azureedge.n...,Alpine She-oak Skink sunning it self on a rock...,Found in only a few locations in Victoria and ...,Major threats\nFire is a huge danger to the Al...
1,Baw Baw Frog,https://www.zoo.org.au/media/2052/21295_baw_ba...,Baw Baw Frog resting in bright green moss. Loo...,All estimates point to extinction in the wild ...,Major threats\nThe loss of the Baw Baw Frog is...
2,Brush-tailed Rock-wallaby,https://www.zoo.org.au/media/2045/21882_brush-...,Brush Tailed Rock Wallabies resting in the grass.,"In Victoria, the Brush-tailed Rock-wallaby now...",Major threats \nChanges to habitat and the imp...
3,Eastern Barred Bandicoot,https://www.zoo.org.au/media/2053/4376_eastern...,Small Eastern Barred Bandicoot side view forag...,The Eastern Barred Bandicoot is listed as exti...,The plan for recovery\nZoos Victoria has partn...
4,Giant Burrowing Frog,https://www.zoo.org.au/media/2056/23479_giant_...,Giant Burrowing Frog on wet rocks side view. T...,Although we know that populations of the Giant...,"['Over the next five years, Zoos Victoria will..."


In [22]:
# Extract list of unique animals of interest
species = scraped_df["animal_name"].unique().tolist()

In [23]:
# Filter the fauna data with the species of interest
short_fauna_df = fauna_df[fauna_df["comm_name"].isin(species)]
short_fauna_df.head()

Unnamed: 0,record_id,survey_id,site_id,project_id,taxon_id,taxon_type,comm_name,sci_name,totalcount,location_desc,long,lat,end_year,end_mth,end_date,start_year,start_mth,start_date,recordtype,reliability
102,8261909,1225877,833395,4236,11141,Mammals,Leadbeater's Possum,Gymnobelideus leadbeateri,0,DSS3B,145.841904,-37.8456,0,0,NaT,2016,4,2016-04-24,Observation,Confirmed
195,8590335,1401349,943395,4095,11141,Mammals,Leadbeater's Possum,Gymnobelideus leadbeateri,1,DELWP Case Reference number 2017-0060,146.288498,-37.9226,0,0,NaT,2017,8,2017-08-20,Seen,Confirmed
391,6942095,1102090,766516,4078,11141,Mammals,Leadbeater's Possum,Gymnobelideus leadbeateri,1,Mon1_camera1,145.934403,-37.8186,2015,9,2015-09-29,2015,9,2015-09-08,Observation,Confirmed
426,8596613,1405576,947609,1,10309,Non-passerine birds,Swift Parrot,Lathamus discolor,5,"Mclaughlans Lane Pipetrack, Plenty",145.108398,-37.676399,2017,9,2017-09-25,2017,9,2017-09-25,Observation,Acceptable
538,6870927,1085373,716545,4078,11141,Mammals,Leadbeater's Possum,Gymnobelideus leadbeateri,1,40.2_camera2,146.115204,-37.784802,2015,5,2015-05-11,2015,4,2015-04-17,Observation,Confirmed


In [24]:
# Overview of the fauna data after filtering
short_fauna_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2712 entries, 102 to 346720
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   record_id      2712 non-null   object        
 1   survey_id      2712 non-null   object        
 2   site_id        2712 non-null   object        
 3   project_id     2712 non-null   object        
 4   taxon_id       2712 non-null   object        
 5   taxon_type     2712 non-null   object        
 6   comm_name      2712 non-null   object        
 7   sci_name       2712 non-null   object        
 8   totalcount     2712 non-null   int64         
 9   location_desc  2712 non-null   object        
 10  long           2712 non-null   float64       
 11  lat            2712 non-null   float64       
 12  end_year       2712 non-null   int64         
 13  end_mth        2712 non-null   int64         
 14  end_date       899 non-null    datetime64[ns]
 15  start_year     27

In [25]:
# Check for any extreme values
print(f"Maximum total count is: {short_fauna_df.totalcount.max()}")
print(f"Minimum total count is: {short_fauna_df.totalcount.min()}")
print(f"Maximum longitude is: {short_fauna_df.long.max()}")
print(f"Minimum longitude is: {short_fauna_df.long.min()}")
print(f"Maximum latitude is: {short_fauna_df.lat.max()}")
print(f"Minimum latitude is: {short_fauna_df.lat.min()}")
print(f"Maximum end year is: {short_fauna_df.end_year.max()}")
print(f"Minimum end year is: {short_fauna_df.end_year.min()}")
print(f"Maximum end month is: {short_fauna_df.end_mth.max()}")
print(f"Minimum end month is: {short_fauna_df.end_mth.min()}")
print(f"Maximum start year is: {short_fauna_df.start_year.max()}")
print(f"Minimum start year is: {short_fauna_df.start_year.min()}")
print(f"Maximum start month is: {short_fauna_df.start_mth.max()}")
print(f"Minimum start month is: {short_fauna_df.start_mth.min()}")
print(f"Maximum start date is: {short_fauna_df.start_date.max()}")
print(f"Minimum start date is: {short_fauna_df.start_date.min()}")

Maximum total count is: 128
Minimum total count is: 0
Maximum longitude is: 149.9367981
Minimum longitude is: 140.9933014
Maximum latitude is: -34.5940018
Minimum latitude is: -39.0321007
Maximum end year is: 2020
Minimum end year is: 0
Maximum end month is: 12
Minimum end month is: 0
Maximum start year is: 2020
Minimum start year is: 2015
Maximum start month is: 12
Minimum start month is: 1
Maximum start date is: 2020-03-30 00:00:00
Minimum start date is: 2015-01-01 00:00:00


As can be seen, there are a number of records with total count of 0. They are records of surveys with no sightings of a targeted specie. Hence, we'll remove them.

As there are a lot of null end_date values, their extracted end years and end months equal to 0. Hence we might use start date in our time series visualisation. The null end dates might indicate that a survey hasn't ended up to our group's data extraction.

In [26]:
print(f"The number of records with totalcount of zero: {short_fauna_df[short_fauna_df.totalcount == 0].shape[0]}")

The number of records with totalcount of zero: 623


In [27]:
# Values in location description column
short_fauna_df["location_desc"].unique()

array(['DSS3B', 'DELWP Case Reference number 2017-0060', 'Mon1_camera1',
       ..., '316A', '316B', '316C'], dtype=object)

In [28]:
# Values in record types column
short_fauna_df["recordtype"].unique()

array(['Observation', 'Seen', 'Observation with supporting evidence',
       'Captured and released', 'Indirect evidence', 'Heard', 'Captured'],
      dtype=object)

In [29]:
# Values in reliability column
short_fauna_df["reliability"].unique()

array(['Confirmed', 'Acceptable'], dtype=object)

Values in the location description, record types and reliability columns do not seem to be informative enough. Hence we'll remove these columns. We'll also remove columns project_id and site_id as they are not required for our project's purpose.

In [30]:
# Filter out the records with total count of 0 and remove end_year, end_date, and end_mth columns
final_fauna_df = short_fauna_df[short_fauna_df.totalcount > 0].drop([
    'site_id', 'project_id', "location_desc", 'end_year', 'end_mth', 'end_date', 'recordtype', 'reliability'], axis = 1)
final_fauna_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2089 entries, 195 to 346720
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   record_id   2089 non-null   object        
 1   survey_id   2089 non-null   object        
 2   taxon_id    2089 non-null   object        
 3   taxon_type  2089 non-null   object        
 4   comm_name   2089 non-null   object        
 5   sci_name    2089 non-null   object        
 6   totalcount  2089 non-null   int64         
 7   long        2089 non-null   float64       
 8   lat         2089 non-null   float64       
 9   start_year  2089 non-null   int64         
 10  start_mth   2089 non-null   int64         
 11  start_date  2089 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int64(3), object(6)
memory usage: 212.2+ KB


## Filter webscraped animal image data against VBA fauna data 

In [31]:
final_animal_list = final_fauna_df["comm_name"].unique().tolist()
final_animal_list

["Leadbeater's Possum",
 'Swift Parrot',
 'Mountain Pygmy-possum',
 'New Holland Mouse',
 'Mallee Emu-wren',
 'Hooded Plover',
 'Giant Burrowing Frog',
 'Greater Glider',
 'Regent Honeyeater']

In [32]:
# Number of final animals
len(final_animal_list)

9

In [33]:
# Number of taxon ids
taxon_ids = final_fauna_df["taxon_id"].unique().tolist()
len(taxon_ids)

9

The number of animals is equal to the number of taxon ids. For each animal of interest, there is only one corresponding taxon id.

In [34]:
# Filter the webscraped data to have only the above animals
final_scraped_df = scraped_df[scraped_df["animal_name"].isin(final_animal_list)].copy()
final_scraped_df

Unnamed: 0,animal_name,image_url,image_alternative,introduction,threat_paragraph
4,Giant Burrowing Frog,https://www.zoo.org.au/media/2056/23479_giant_...,Giant Burrowing Frog on wet rocks side view. T...,Although we know that populations of the Giant...,"['Over the next five years, Zoos Victoria will..."
11,Leadbeater's Possum,https://www.zoo.org.au/media/2057/22861_leadbe...,Close up view of the face of a Leadbeater Poss...,"Once thought to be extinct, the Leadbeater's P...",The major threats\nThe loss of hollow-bearing ...
13,Mallee Emu-wren,https://www.zoo.org.au/media/1961/23483_mallee...,Mallee Emu-wren in long dry grass looking at t...,The Mallee Emu-wren is particularly vulnerable...,"['In fact, it was a series of fires that cause..."
14,Mountain Pygmy-possum,https://www.zoo.org.au/media/2058/16910_mounta...,Mountain Pygmy Possum standing on its hind leg...,Mountain Pygmy-possums were thought to be exti...,"The major threats \nClimate change, the loss o..."
15,New Holland Mouse,https://www.zoo.org.au/media/1732/new-holland-...,New Holland Mouse getting a health check wrapp...,The New Holland Mouse is classified as extinct...,The major threats\nThe New Holland Mouse is in...
19,Regent Honeyeater,https://www.zoo.org.au/media/2055/22249_regent...,Regent Honeyeater bird on a branch looking dow...,The Regent Honeyeater has been in decline sinc...,The major threats\nThe loss of the Box-Ironbar...
25,Swift Parrot,https://www.zoo.org.au/media/1960/23484_swift_...,Green Swift Parrot perched on a branch looking...,The Swift Parrot is critically endangered.,['Unless we can solve the issues surrounding i...
27,Greater Glider,https://www.environment.vic.gov.au/__data/asse...,Greater Glider,,Species InformationThe Greater Glider is Austr...
28,Hooded Plover,https://www.environment.vic.gov.au/__data/asse...,Hooded Plover,Species Information\n\nHooded Plovers are a re...,Threats\n\nHooded Plovers often share their oc...


In [35]:
# Overview of the scraped data
final_scraped_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9 entries, 4 to 28
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   animal_name        9 non-null      object
 1   image_url          9 non-null      object
 2   image_alternative  9 non-null      object
 3   introduction       8 non-null      object
 4   threat_paragraph   9 non-null      object
dtypes: object(5)
memory usage: 432.0+ bytes


In [36]:
# Fill the NaN values with None values for the json-converted file to work
final_scraped_df = final_scraped_df.where(final_scraped_df.notnull(), None)
final_scraped_df

Unnamed: 0,animal_name,image_url,image_alternative,introduction,threat_paragraph
4,Giant Burrowing Frog,https://www.zoo.org.au/media/2056/23479_giant_...,Giant Burrowing Frog on wet rocks side view. T...,Although we know that populations of the Giant...,"['Over the next five years, Zoos Victoria will..."
11,Leadbeater's Possum,https://www.zoo.org.au/media/2057/22861_leadbe...,Close up view of the face of a Leadbeater Poss...,"Once thought to be extinct, the Leadbeater's P...",The major threats\nThe loss of hollow-bearing ...
13,Mallee Emu-wren,https://www.zoo.org.au/media/1961/23483_mallee...,Mallee Emu-wren in long dry grass looking at t...,The Mallee Emu-wren is particularly vulnerable...,"['In fact, it was a series of fires that cause..."
14,Mountain Pygmy-possum,https://www.zoo.org.au/media/2058/16910_mounta...,Mountain Pygmy Possum standing on its hind leg...,Mountain Pygmy-possums were thought to be exti...,"The major threats \nClimate change, the loss o..."
15,New Holland Mouse,https://www.zoo.org.au/media/1732/new-holland-...,New Holland Mouse getting a health check wrapp...,The New Holland Mouse is classified as extinct...,The major threats\nThe New Holland Mouse is in...
19,Regent Honeyeater,https://www.zoo.org.au/media/2055/22249_regent...,Regent Honeyeater bird on a branch looking dow...,The Regent Honeyeater has been in decline sinc...,The major threats\nThe loss of the Box-Ironbar...
25,Swift Parrot,https://www.zoo.org.au/media/1960/23484_swift_...,Green Swift Parrot perched on a branch looking...,The Swift Parrot is critically endangered.,['Unless we can solve the issues surrounding i...
27,Greater Glider,https://www.environment.vic.gov.au/__data/asse...,Greater Glider,,Species InformationThe Greater Glider is Austr...
28,Hooded Plover,https://www.environment.vic.gov.au/__data/asse...,Hooded Plover,Species Information\n\nHooded Plovers are a re...,Threats\n\nHooded Plovers often share their oc...


## Load

In [37]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define database and collections
db = client.animal_visual_db
vba_fauna = db.vba_fauna
scraped_fauna = db.scraped_fauna

In [38]:
# Drops collections if available to remove duplicates
vba_fauna.drop()
scraped_fauna.drop()

In [39]:
# Load vba fauna data into the vba_fauna collection
vba_fauna.insert_many(final_fauna_df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x1db11006608>

In [40]:
# Load scraped fauna image and info into the scraped_fauna collection
scraped_fauna.insert_many(final_scraped_df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x1db066ce908>

In [41]:
from pprint import pprint

for record in vba_fauna.find():
    pprint(record)

{'_id': ObjectId('5f67ad47c6206b044a89a4eb'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.92259979999999,
 'long': 146.2884979,
 'record_id': '8590335',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2017, 8, 20, 0, 0),
 'start_mth': 8,
 'start_year': 2017,
 'survey_id': '1401349',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a4ec'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.8185997,
 'long': 145.9344025,
 'record_id': '6942095',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2015, 9, 8, 0, 0),
 'start_mth': 9,
 'start_year': 2015,
 'survey_id': '1102090',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a4ed'),
 'comm_name': 'Swift Parrot',
 'lat': -37.676399200000006,
 'long': 145.10839840000003,
 'record_id': '8596613',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2017, 9, 25, 0, 0)

 'comm_name': "Leadbeater's Possum",
 'lat': -37.8045006,
 'long': 145.892807,
 'record_id': '6942132',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2015, 9, 10, 0, 0),
 'start_mth': 9,
 'start_year': 2015,
 'survey_id': '1102120',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a53c'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.85060120000001,
 'long': 145.85929869999998,
 'record_id': '6942435',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2015, 11, 19, 0, 0),
 'start_mth': 11,
 'start_year': 2015,
 'survey_id': '1102332',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a53d'),
 'comm_name': 'Mountain Pygmy-possum',
 'lat': -37.1511002,
 'long': 146.4402008,
 'record_id': '8490365',
 'sci_name': 'Burramys parvus',
 'start_date': datetime.datetime(2015, 11, 5, 0, 0),
 'start_mth': 11,
 'start_year': 2015,
 'su

 'start_year': 2015,
 'survey_id': '1102201',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a573'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.7540016,
 'long': 146.01220700000002,
 'record_id': '8584381',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2017, 2, 10, 0, 0),
 'start_mth': 2,
 'start_year': 2017,
 'survey_id': '1398048',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a574'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.8260002,
 'long': 145.797699,
 'record_id': '6948980',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2016, 2, 21, 0, 0),
 'start_mth': 2,
 'start_year': 2016,
 'survey_id': '1105996',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a575'),
 'comm_name': 'Mountain Pygmy-possum',
 'lat': -36.9547997,
 'long': 147.1578979,
 're

 'record_id': '6942475',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2015, 11, 24, 0, 0),
 'start_mth': 11,
 'start_year': 2015,
 'survey_id': '1102363',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a5bc'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.8526993,
 'long': 145.8307037,
 'record_id': '8206858',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2016, 1, 18, 0, 0),
 'start_mth': 1,
 'start_year': 2016,
 'survey_id': '1218226',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a5bd'),
 'comm_name': 'Mountain Pygmy-possum',
 'lat': -37.150501299999995,
 'long': 146.4338074,
 'record_id': '8490258',
 'sci_name': 'Burramys parvus',
 'start_date': datetime.datetime(2016, 11, 2, 0, 0),
 'start_mth': 11,
 'start_year': 2016,
 'survey_id': '1342580',
 'taxon_id': '11156',
 'taxon_type': 'Mammals',
 'totalcount': 1

 'start_mth': 5,
 'start_year': 2016,
 'survey_id': '1222743',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a604'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.78599929999999,
 'long': 146.1161041,
 'record_id': '6870924',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2015, 4, 17, 0, 0),
 'start_mth': 4,
 'start_year': 2015,
 'survey_id': '1085372',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a605'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.848800700000005,
 'long': 145.8023071,
 'record_id': '6942397',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2015, 11, 17, 0, 0),
 'start_mth': 11,
 'start_year': 2015,
 'survey_id': '1102306',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a606'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.5942001,


 'long': 145.721405,
 'record_id': '8583973',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2016, 10, 19, 0, 0),
 'start_mth': 10,
 'start_year': 2016,
 'survey_id': '1397760',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a63f'),
 'comm_name': 'Mallee Emu-wren',
 'lat': -35.9796982,
 'long': 141.69970700000002,
 'record_id': '8986186',
 'sci_name': 'Stipiturus mallee',
 'start_date': datetime.datetime(2015, 11, 10, 0, 0),
 'start_mth': 11,
 'start_year': 2015,
 'survey_id': '1734081',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a640'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.8646011,
 'long': 145.8789063,
 'record_id': '8206894',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2016, 1, 21, 0, 0),
 'start_mth': 1,
 'start_year': 2016,
 'survey_id': '1218252',
 'taxon_id': '11141',
 'taxon_type': 'M

 'totalcount': 26}
{'_id': ObjectId('5f67ad47c6206b044a89a686'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.7666016,
 'long': 145.9606018,
 'record_id': '9032547',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2018, 10, 29, 0, 0),
 'start_mth': 10,
 'start_year': 2018,
 'survey_id': '1757286',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a687'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.769699100000004,
 'long': 145.889801,
 'record_id': '6942127',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2015, 9, 10, 0, 0),
 'start_mth': 9,
 'start_year': 2015,
 'survey_id': '1102117',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a688'),
 'comm_name': 'Giant Burrowing Frog',
 'lat': -37.7196007,
 'long': 147.4629059,
 'record_id': '8492998',
 'sci_name': 'Heleioporus australiacus',
 'start_date': datetime.datet

 'survey_id': '1398111',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a6b9'),
 'comm_name': 'New Holland Mouse',
 'lat': -37.933399200000004,
 'long': 147.2807007,
 'record_id': '6918747',
 'sci_name': 'Pseudomys novaehollandiae',
 'start_date': datetime.datetime(2015, 3, 23, 0, 0),
 'start_mth': 3,
 'start_year': 2015,
 'survey_id': '1094510',
 'taxon_id': '11455',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a6ba'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.740798999999996,
 'long': 146.16520690000002,
 'record_id': '8207038',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2016, 3, 3, 0, 0),
 'start_mth': 3,
 'start_year': 2016,
 'survey_id': '1218343',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a6bb'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.6753006,
 'long': 146.5292053,
 'record_id':

 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a706'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.7671013,
 'long': 145.96000669999998,
 'record_id': '9032550',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2018, 10, 29, 0, 0),
 'start_mth': 10,
 'start_year': 2018,
 'survey_id': '1757287',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89a707'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.7055016,
 'long': 146.1417999,
 'record_id': '6942284',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2015, 10, 14, 0, 0),
 'start_mth': 10,
 'start_year': 2015,
 'survey_id': '1102224',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a708'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.851299299999994,
 'long': 146.1036987,
 'record_id': '6870727',
 'sci_name': '

 'start_date': datetime.datetime(2017, 6, 18, 0, 0),
 'start_mth': 6,
 'start_year': 2017,
 'survey_id': '1927609',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a742'),
 'comm_name': 'Hooded Plover',
 'lat': -38.428901700000004,
 'long': 144.18099980000002,
 'record_id': '9989559',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 2, 18, 0, 0),
 'start_mth': 2,
 'start_year': 2018,
 'survey_id': '1929733',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a743'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3552017,
 'long': 142.3347015,
 'record_id': '10052069',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 4, 22, 0, 0),
 'start_mth': 4,
 'start_year': 2018,
 'survey_id': '1954159',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 3}
{'_id': ObjectId('5f67ad47c6206b044a89a744'),
 'comm_name': 'Hoo

{'_id': ObjectId('5f67ad47c6206b044a89a779'),
 'comm_name': 'Hooded Plover',
 'lat': -38.428901700000004,
 'long': 144.18099980000002,
 'record_id': '9985590',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 9, 3, 0, 0),
 'start_mth': 9,
 'start_year': 2017,
 'survey_id': '1928063',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a77a'),
 'comm_name': 'Hooded Plover',
 'lat': -38.428901700000004,
 'long': 144.18099980000002,
 'record_id': '9985594',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 9, 3, 0, 0),
 'start_mth': 9,
 'start_year': 2017,
 'survey_id': '1928064',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a77b'),
 'comm_name': 'Regent Honeyeater',
 'lat': -36.1671982,
 'long': 146.6318054,
 'record_id': '10014718',
 'sci_name': 'Anthochaera phrygia',
 'start_date': datetime.datetime(2017, 4, 12, 0, 0),
 'start

 'survey_id': '1924543',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a7c7'),
 'comm_name': 'Hooded Plover',
 'lat': -38.428901700000004,
 'long': 144.18099980000002,
 'record_id': '9971634',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2016, 12, 4, 0, 0),
 'start_mth': 12,
 'start_year': 2016,
 'survey_id': '1925005',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a7c8'),
 'comm_name': 'Hooded Plover',
 'lat': -38.428901700000004,
 'long': 144.18099980000002,
 'record_id': '9993889',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 10, 10, 0, 0),
 'start_mth': 10,
 'start_year': 2018,
 'survey_id': '1931670',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a7c9'),
 'comm_name': 'Hooded Plover',
 'lat': -38.428901700000004,
 'long': 144.18099980000002,
 'record_id': '9

 'start_mth': 12,
 'start_year': 2018,
 'survey_id': '1964979',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a803'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3917999,
 'long': 142.221405,
 'record_id': '10093407',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2019, 4, 18, 0, 0),
 'start_mth': 4,
 'start_year': 2019,
 'survey_id': '1970703',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 6}
{'_id': ObjectId('5f67ad47c6206b044a89a804'),
 'comm_name': 'Hooded Plover',
 'lat': -37.8005981,
 'long': 148.545105,
 'record_id': '10038713',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 1, 1, 0, 0),
 'start_mth': 1,
 'start_year': 2018,
 'survey_id': '1949018',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a805'),
 'comm_name': 'Hooded Plover',
 'lat': -38.261100799999994,
 'long': 144.57719419999998,
 'record_id

 'start_year': 2019,
 'survey_id': '1974393',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 6}
{'_id': ObjectId('5f67ad47c6206b044a89a845'),
 'comm_name': 'Mallee Emu-wren',
 'lat': -34.694801299999995,
 'long': 142.2705994,
 'record_id': '10101432',
 'sci_name': 'Stipiturus mallee',
 'start_date': datetime.datetime(2019, 7, 30, 0, 0),
 'start_mth': 7,
 'start_year': 2019,
 'survey_id': '1974339',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a846'),
 'comm_name': 'Hooded Plover',
 'lat': -38.408500700000005,
 'long': 144.8108063,
 'record_id': '10073268',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 11, 30, 0, 0),
 'start_mth': 11,
 'start_year': 2018,
 'survey_id': '1963510',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 5}
{'_id': ObjectId('5f67ad47c6206b044a89a847'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3181,
 'long': 144.3480988,
 'record_

 'start_date': datetime.datetime(2018, 4, 9, 0, 0),
 'start_mth': 4,
 'start_year': 2018,
 'survey_id': '1953689',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 35}
{'_id': ObjectId('5f67ad47c6206b044a89a889'),
 'comm_name': 'Swift Parrot',
 'lat': -37.7206001,
 'long': 145.04840090000002,
 'record_id': '10050998',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2018, 4, 11, 0, 0),
 'start_mth': 4,
 'start_year': 2018,
 'survey_id': '1953724',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 10}
{'_id': ObjectId('5f67ad47c6206b044a89a88a'),
 'comm_name': 'Swift Parrot',
 'lat': -37.7206001,
 'long': 145.04840090000002,
 'record_id': '10051000',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2018, 4, 11, 0, 0),
 'start_mth': 4,
 'start_year': 2018,
 'survey_id': '1953725',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a88b'),

 'long': 145.1165924,
 'record_id': '10019833',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2017, 6, 18, 0, 0),
 'start_mth': 6,
 'start_year': 2017,
 'survey_id': '1941597',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 8}
{'_id': ObjectId('5f67ad47c6206b044a89a8d0'),
 'comm_name': 'Swift Parrot',
 'lat': -37.679798100000006,
 'long': 145.1165924,
 'record_id': '10019725',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2017, 6, 17, 0, 0),
 'start_mth': 6,
 'start_year': 2017,
 'survey_id': '1941560',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 15}
{'_id': ObjectId('5f67ad47c6206b044a89a8d1'),
 'comm_name': 'Swift Parrot',
 'lat': -37.679798100000006,
 'long': 145.1165924,
 'record_id': '10020820',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2017, 7, 1, 0, 0),
 'start_mth': 7,
 'start_year': 2017,
 'survey_id': '1941926',
 'taxon_id': '10309',
 'taxon_type': 'No

{'_id': ObjectId('5f67ad47c6206b044a89a90c'),
 'comm_name': 'Hooded Plover',
 'lat': -38.713699299999995,
 'long': 143.7447052,
 'record_id': '9993535',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 10, 2, 0, 0),
 'start_mth': 10,
 'start_year': 2018,
 'survey_id': '1931522',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a90d'),
 'comm_name': 'Hooded Plover',
 'lat': -38.713699299999995,
 'long': 143.7447052,
 'record_id': '9993536',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 10, 2, 0, 0),
 'start_mth': 10,
 'start_year': 2018,
 'survey_id': '1931523',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a90e'),
 'comm_name': 'Swift Parrot',
 'lat': -38.319999700000004,
 'long': 144.3114929,
 'record_id': '10001193',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2019, 4, 26, 0, 0),
 'start_mth': 4,

{'_id': ObjectId('5f67ad47c6206b044a89a94a'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3564987,
 'long': 142.31089780000002,
 'record_id': '10006904',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 1, 21, 0, 0),
 'start_mth': 1,
 'start_year': 2017,
 'survey_id': '1936822',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a94b'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3564987,
 'long': 142.31089780000002,
 'record_id': '10021556',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 7, 8, 0, 0),
 'start_mth': 7,
 'start_year': 2017,
 'survey_id': '1942161',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 8}
{'_id': ObjectId('5f67ad47c6206b044a89a94c'),
 'comm_name': 'Hooded Plover',
 'lat': -38.7773018,
 'long': 143.6645966,
 'record_id': '10002127',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2019, 7, 3, 0, 0),
 'start_mth': 7,
 'start

{'_id': ObjectId('5f67ad47c6206b044a89a998'),
 'comm_name': 'Hooded Plover',
 'lat': -38.7528992,
 'long': 143.6651001,
 'record_id': '10001627',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2019, 5, 23, 0, 0),
 'start_mth': 5,
 'start_year': 2019,
 'survey_id': '1934463',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89a999'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3564987,
 'long': 142.31089780000002,
 'record_id': '10010919',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 3, 4, 0, 0),
 'start_mth': 3,
 'start_year': 2017,
 'survey_id': '1938296',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 5}
{'_id': ObjectId('5f67ad47c6206b044a89a99a'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3564987,
 'long': 142.31089780000002,
 'record_id': '10007969',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 1, 30, 0, 0),
 'start_mth': 1,
 'star

 'start_year': 2016,
 'survey_id': '1925275',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 4}
{'_id': ObjectId('5f67ad47c6206b044a89a9d8'),
 'comm_name': 'Hooded Plover',
 'lat': -38.2837982,
 'long': 144.4264984,
 'record_id': '10000222',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2019, 3, 13, 0, 0),
 'start_mth': 3,
 'start_year': 2019,
 'survey_id': '1933857',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 5}
{'_id': ObjectId('5f67ad47c6206b044a89a9d9'),
 'comm_name': 'Mallee Emu-wren',
 'lat': -34.6911011,
 'long': 142.2727051,
 'record_id': '10016075',
 'sci_name': 'Stipiturus mallee',
 'start_date': datetime.datetime(2017, 4, 24, 0, 0),
 'start_mth': 4,
 'start_year': 2017,
 'survey_id': '1940149',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 15}
{'_id': ObjectId('5f67ad47c6206b044a89a9da'),
 'comm_name': 'Mallee Emu-wren',
 'lat': -34.6911011,
 'long': 142.2727051,
 'record_id': '10023827',
 'sc

 'long': 144.4264984,
 'record_id': '9974605',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2016, 12, 7, 0, 0),
 'start_mth': 12,
 'start_year': 2016,
 'survey_id': '1925239',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 5}
{'_id': ObjectId('5f67ad47c6206b044a89aa1b'),
 'comm_name': 'Hooded Plover',
 'lat': -38.2837982,
 'long': 144.4264984,
 'record_id': '9974701',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2016, 12, 7, 0, 0),
 'start_mth': 12,
 'start_year': 2016,
 'survey_id': '1925251',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 5}
{'_id': ObjectId('5f67ad47c6206b044a89aa1c'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3564987,
 'long': 142.31089780000002,
 'record_id': '10009924',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 2, 20, 0, 0),
 'start_mth': 2,
 'start_year': 2017,
 'survey_id': '1937898',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}

 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89aa52'),
 'comm_name': 'Hooded Plover',
 'lat': -38.2837982,
 'long': 144.4264984,
 'record_id': '9985220',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 7, 31, 0, 0),
 'start_mth': 7,
 'start_year': 2017,
 'survey_id': '1927879',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89aa53'),
 'comm_name': 'Hooded Plover',
 'lat': -37.8795013,
 'long': 147.9911957,
 'record_id': '10005169',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 1, 5, 0, 0),
 'start_mth': 1,
 'start_year': 2017,
 'survey_id': '1936172',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 5}
{'_id': ObjectId('5f67ad47c6206b044a89aa54'),
 'comm_name': 'Hooded Plover',
 'lat': -38.457199100000004,
 'long': 144.10729980000002,
 'record_id': '9833796',
 'sci_name': 'Thinornis cucullatus',
 'start_date

{'_id': ObjectId('5f67ad47c6206b044a89aa8f'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3689003,
 'long': 144.7572937,
 'record_id': '10080944',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2019, 1, 15, 0, 0),
 'start_mth': 1,
 'start_year': 2019,
 'survey_id': '1966224',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89aa90'),
 'comm_name': 'Swift Parrot',
 'lat': -38.044899,
 'long': 144.1750946,
 'record_id': '9799952',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2015, 7, 12, 0, 0),
 'start_mth': 7,
 'start_year': 2015,
 'survey_id': '1911532',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 6}
{'_id': ObjectId('5f67ad47c6206b044a89aa91'),
 'comm_name': 'Swift Parrot',
 'lat': -38.00719829999999,
 'long': 144.52949519999999,
 'record_id': '10001071',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2019, 4, 22, 0, 0),
 'start_mth': 4,
 '

 'comm_name': 'Mallee Emu-wren',
 'lat': -34.686199200000004,
 'long': 142.2709961,
 'record_id': '10030095',
 'sci_name': 'Stipiturus mallee',
 'start_date': datetime.datetime(2017, 10, 23, 0, 0),
 'start_mth': 10,
 'start_year': 2017,
 'survey_id': '1945736',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89aae0'),
 'comm_name': 'Hooded Plover',
 'lat': -38.3973007,
 'long': 144.79620359999998,
 'record_id': '10007663',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2017, 1, 27, 0, 0),
 'start_mth': 1,
 'start_year': 2017,
 'survey_id': '1937067',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 4}
{'_id': ObjectId('5f67ad47c6206b044a89aae1'),
 'comm_name': 'Hooded Plover',
 'lat': -38.4291,
 'long': 144.1802063,
 'record_id': '9975974',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2016, 12, 11, 0, 0),
 'start_mth': 12,
 'start_year': 2016,
 'survey_id': '192537

 'start_date': datetime.datetime(2018, 5, 20, 0, 0),
 'start_mth': 5,
 'start_year': 2018,
 'survey_id': '1955279',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 4}
{'_id': ObjectId('5f67ad47c6206b044a89ab24'),
 'comm_name': 'Swift Parrot',
 'lat': -37.7164001,
 'long': 145.05540469999997,
 'record_id': '10051388',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2018, 4, 15, 0, 0),
 'start_mth': 4,
 'start_year': 2018,
 'survey_id': '1953875',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89ab25'),
 'comm_name': 'Swift Parrot',
 'lat': -37.7164001,
 'long': 145.05540469999997,
 'record_id': '10051390',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2018, 4, 15, 0, 0),
 'start_mth': 4,
 'start_year': 2018,
 'survey_id': '1953876',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89ab26'),


 'survey_id': '1970386',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 6}
{'_id': ObjectId('5f67ad47c6206b044a89ab61'),
 'comm_name': 'Hooded Plover',
 'lat': -38.281299600000004,
 'long': 144.4328003,
 'record_id': '10000438',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2019, 3, 26, 0, 0),
 'start_mth': 3,
 'start_year': 2019,
 'survey_id': '1933952',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 3}
{'_id': ObjectId('5f67ad47c6206b044a89ab62'),
 'comm_name': 'Mallee Emu-wren',
 'lat': -34.7555008,
 'long': 142.33839419999998,
 'record_id': '10052236',
 'sci_name': 'Stipiturus mallee',
 'start_date': datetime.datetime(2018, 4, 22, 0, 0),
 'start_mth': 4,
 'start_year': 2018,
 'survey_id': '1954204',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89ab63'),
 'comm_name': 'Mallee Emu-wren',
 'lat': -34.7555008,
 'long': 142.33839419999998,
 'record_id': '100672

 'start_year': 2017,
 'survey_id': '1941147',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 10}
{'_id': ObjectId('5f67ad47c6206b044a89ab96'),
 'comm_name': 'Mallee Emu-wren',
 'lat': -34.7555008,
 'long': 142.33839419999998,
 'record_id': '10018666',
 'sci_name': 'Stipiturus mallee',
 'start_date': datetime.datetime(2017, 5, 28, 0, 0),
 'start_mth': 5,
 'start_year': 2017,
 'survey_id': '1941148',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 10}
{'_id': ObjectId('5f67ad47c6206b044a89ab97'),
 'comm_name': 'Mallee Emu-wren',
 'lat': -34.7555008,
 'long': 142.33839419999998,
 'record_id': '10060173',
 'sci_name': 'Stipiturus mallee',
 'start_date': datetime.datetime(2018, 8, 4, 0, 0),
 'start_mth': 8,
 'start_year': 2018,
 'survey_id': '1957657',
 'taxon_id': '10527',
 'taxon_type': 'Passerine birds',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89ab98'),
 'comm_name': 'Mallee Emu-wren',
 'lat': -34.7555008,
 'long': 142.33839419

 'totalcount': 4}
{'_id': ObjectId('5f67ad47c6206b044a89abe8'),
 'comm_name': 'Hooded Plover',
 'lat': -38.267601,
 'long': 144.50619509999999,
 'record_id': '9995963',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 12, 4, 0, 0),
 'start_mth': 12,
 'start_year': 2018,
 'survey_id': '1932480',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89abe9'),
 'comm_name': 'Hooded Plover',
 'lat': -38.35670089999999,
 'long': 142.3213959,
 'record_id': '10052018',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 4, 21, 0, 0),
 'start_mth': 4,
 'start_year': 2018,
 'survey_id': '1954120',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89abea'),
 'comm_name': 'Hooded Plover',
 'lat': -38.281299600000004,
 'long': 144.4326935,
 'record_id': '9991018',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 5, 16, 0, 0)

 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2019, 8, 25, 0, 0),
 'start_mth': 8,
 'start_year': 2019,
 'survey_id': '1864333',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89ac2d'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.774601000000004,
 'long': 145.9665985,
 'record_id': '9585640',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2019, 5, 19, 0, 0),
 'start_mth': 5,
 'start_year': 2019,
 'survey_id': '1831551',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89ac2e'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.770401,
 'long': 145.9615021,
 'record_id': '9585649',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2019, 6, 8, 0, 0),
 'start_mth': 6,
 'start_year': 2019,
 'survey_id': '1831560',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 2}
{'_id': ObjectId('5f

 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2019, 12, 9, 0, 0),
 'start_mth': 12,
 'start_year': 2019,
 'survey_id': '1985724',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89ac77'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.7793999,
 'long': 145.9398956,
 'record_id': '10142463',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2019, 12, 17, 0, 0),
 'start_mth': 12,
 'start_year': 2019,
 'survey_id': '1985725',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89ac78'),
 'comm_name': 'Swift Parrot',
 'lat': -36.9902,
 'long': 142.7312012,
 'record_id': '9083089',
 'sci_name': 'Lathamus discolor',
 'start_date': datetime.datetime(2019, 6, 9, 0, 0),
 'start_mth': 6,
 'start_year': 2019,
 'survey_id': '1780930',
 'taxon_id': '10309',
 'taxon_type': 'Non-passerine birds',
 'totalcount': 4}
{'_id': ObjectId('5f67ad47c62

{'_id': ObjectId('5f67ad47c6206b044a89acb8'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.3860016,
 'long': 145.93739319999997,
 'record_id': '10132799',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2019, 10, 31, 0, 0),
 'start_mth': 10,
 'start_year': 2019,
 'survey_id': '1982563',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89acb9'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.3865013,
 'long': 145.93780519999999,
 'record_id': '10132802',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2019, 10, 31, 0, 0),
 'start_mth': 10,
 'start_year': 2019,
 'survey_id': '1982564',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89acba'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.6118011,
 'long': 146.23019409999998,
 'record_id': '10132841',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datet

{'_id': ObjectId('5f67ad47c6206b044a89acfc'),
 'comm_name': "Leadbeater's Possum",
 'lat': -37.7818985,
 'long': 145.9407959,
 'record_id': '10183294',
 'sci_name': 'Gymnobelideus leadbeateri',
 'start_date': datetime.datetime(2019, 12, 24, 0, 0),
 'start_mth': 12,
 'start_year': 2019,
 'survey_id': '2014819',
 'taxon_id': '11141',
 'taxon_type': 'Mammals',
 'totalcount': 1}
{'_id': ObjectId('5f67ad47c6206b044a89acfd'),
 'comm_name': 'Hooded Plover',
 'lat': -38.1132011,
 'long': 141.12049869999998,
 'record_id': '10198952',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 9, 19, 0, 0),
 'start_mth': 9,
 'start_year': 2018,
 'survey_id': '2022955',
 'taxon_id': '10138',
 'taxon_type': 'Waders',
 'totalcount': 2}
{'_id': ObjectId('5f67ad47c6206b044a89acfe'),
 'comm_name': 'Hooded Plover',
 'lat': -38.104599,
 'long': 141.1004028,
 'record_id': '10198955',
 'sci_name': 'Thinornis cucullatus',
 'start_date': datetime.datetime(2018, 9, 19, 0, 0),
 'start_mth': 9,

In [42]:
for record in scraped_fauna.find():
    pprint(record)

{'_id': ObjectId('5f67ad47c6206b044a89ad14'),
 'animal_name': 'Giant Burrowing Frog',
 'image_alternative': 'Giant Burrowing Frog on wet rocks side view. The frog '
                      'is dark brown with yellow lips and spots on its side.',
 'image_url': 'https://www.zoo.org.au/media/2056/23479_giant_burrowing_frog_-_credit_required_offsite1.jpg?anchor=center&mode=crop&quality=75&width=2000&height=570&rnd=132131643480000000',
 'introduction': 'Although we know that populations of the Giant Burrowing '
                 'Frog are in decline, this elusive digger is one of '
                 'Victoria’s most poorly understood species.',
 'threat_paragraph': "['Over the next five years, Zoos Victoria will carry out "
                     'important on-the-ground surveys to discover more about '
                     'the Giant Burrowing Frog and its remote habitat. In the '
                     'meantime, we are securing the wellbeing of the '
                     "individual frogs in our

In [43]:
type(vba_fauna.find())

pymongo.cursor.Cursor

In [44]:
type(scraped_fauna.find())

pymongo.cursor.Cursor

## Test aggregations by animal names

In [45]:
# Aggregate total sightings by each animal (represented in common names, science names, taxon ids and taxon types) over 5 years
metadata = list(
    vba_fauna.aggregate(
    [
          {
                  "$group" : {
                      "_id" :"$comm_name",
                      "science_name": { "$first": "$sci_name" },
                      "taxon_id": { "$first": "$taxon_id" },
                      "taxon_type": { "$first": "$taxon_type" },
                      "totalSightings": { "$sum": "$totalcount" },
                  }
          }
    ]))

metadata

[{'_id': "Leadbeater's Possum",
  'science_name': 'Gymnobelideus leadbeateri',
  'taxon_id': '11141',
  'taxon_type': 'Mammals',
  'totalSightings': 654},
 {'_id': 'Swift Parrot',
  'science_name': 'Lathamus discolor',
  'taxon_id': '10309',
  'taxon_type': 'Non-passerine birds',
  'totalSightings': 2695},
 {'_id': 'Mountain Pygmy-possum',
  'science_name': 'Burramys parvus',
  'taxon_id': '11156',
  'taxon_type': 'Mammals',
  'totalSightings': 823},
 {'_id': 'New Holland Mouse',
  'science_name': 'Pseudomys novaehollandiae',
  'taxon_id': '11455',
  'taxon_type': 'Mammals',
  'totalSightings': 119},
 {'_id': 'Mallee Emu-wren',
  'science_name': 'Stipiturus mallee',
  'taxon_id': '10527',
  'taxon_type': 'Passerine birds',
  'totalSightings': 722},
 {'_id': 'Hooded Plover',
  'science_name': 'Thinornis cucullatus',
  'taxon_id': '10138',
  'taxon_type': 'Waders',
  'totalSightings': 3208},
 {'_id': 'Giant Burrowing Frog',
  'science_name': 'Heleioporus australiacus',
  'taxon_id': '130

In [46]:
# Aggregate records by animal name
records_by_animal = list(vba_fauna.aggregate([
    {
        "$group" : {
            "_id" : "$comm_name",
            "record_id": { "$push": "$record_id" },
            "survey_id": { "$push": "$survey_id" },
            "number_sightings": { "$push": "$totalcount" },
            "long": { "$push": "$long" },
            "lat": { "$push": "$lat" },
            "start_year": { "$push": "$start_year" },
            "start_mth": { "$push": "$start_mth" },
            "start_date": { "$push": "$start_date" }
        }
    }
]))

records_by_animal[2]

{'_id': 'Mountain Pygmy-possum',
 'record_id': ['8490354',
  '8490261',
  '8490379',
  '8490365',
  '6814274',
  '8490248',
  '8490355',
  '8490264',
  '8490362',
  '8490378',
  '8490258',
  '8490359',
  '8490375',
  '8490352',
  '8490255',
  '8490356',
  '8617169',
  '8490369',
  '8490267',
  '8490240',
  '8617165',
  '8490251',
  '8490237',
  '8490242',
  '8617178',
  '8617172',
  '8926477',
  '8926480',
  '8926482',
  '8926485',
  '8926461',
  '8926464',
  '8926467',
  '8926470',
  '8926473',
  '8926475'],
 'survey_id': ['1342588',
  '1342581',
  '1342599',
  '1342593',
  '1078936',
  '1342577',
  '1342589',
  '1342582',
  '1342592',
  '1342598',
  '1342580',
  '1342591',
  '1342597',
  '1342587',
  '1342579',
  '1342590',
  '1410385',
  '1342594',
  '1342583',
  '1342574',
  '1410384',
  '1342578',
  '1342573',
  '1342575',
  '1410388',
  '1410386',
  '1705217',
  '1705218',
  '1705219',
  '1705220',
  '1705211',
  '1705212',
  '1705213',
  '1705214',
  '1705215',
  '1705216'],
 'n