In [1]:
# Module used to connect Python to MongoDB
import pymongo

In [2]:
# Dependencies -cont'd
import pandas as pd

In [3]:
# Read country info provided on https://countrycode.org/ and convert it to Pandas DataFrame
cntry_code_df = pd.read_csv('../data/rawdata/cntry_code.csv', encoding='UTF-8')

# Preview "animal_df"
cntry_code_df.head()

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES
0,Afghanistan,93,AF / AFG
1,Albania,355,AL / ALB
2,Algeria,213,DZ / DZA
3,American Samoa,1-684,AS / ASM
4,Andorra,376,AD / AND


In [4]:
# Setup connection to MongoDB using default port 27017
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [5]:
# Establish "EndangeredAnimalDB" Database and "animal_facts" Collection
db = client.EndangeredAnimalDB
coll = db.animal_facts

In [6]:
# Retrieve all documents in "animal_facts" Collection
documents = coll.find()

# List to store animal common names on each document from "animal_facts" Collection
name_list = []

# Set the initial value of document counter to 0
docu_ct = 0

# Loop through all documents
for docu in documents:
    # Add one to document counter
    docu_ct += 1
    # Append animal common name on each document to "name_list"
    name_list.append(docu['Common_Name'])
    
# Make sure no animal common name is missing from the documents 
if len(name_list) == docu_ct:
    # Print total document counts
    print(f'There are {docu_ct} documents in "animal_facts" Collection.\n')
    print(f'All documents have animal common names.')

There are 69 documents in "animal_facts" Collection.

All documents have animal common names.


In [7]:
# Checklist to store cntry names from "animal_facts" that are different as shown in 'cntry_code_df'
cntry_chkl = []

# Loop through all documents from "animal_facts"
for i in range(docu_ct):
    docu = coll.find_one({'Common_Name': name_list[i]})
    
    try:
        # Search for inconsistent names in "Native_Extant_Resident_Cntry"
        im = docu['Native_Extant_Resident_Cntry']
        [cntry_chkl.append(cntry) for cntry in im if cntry not in list(cntry_code_df.COUNTRY)\
            and cntry not in cntry_chkl]      
    # Set exception for that with no "Native_Extant_Resident_Cntry"
    except KeyError:
        pass
    
    try:
        # Search for inconsistent names in "Native_Extant_Cntry"
        im = docu['Native_Extant_Cntry']
        [cntry_chkl.append(cntry) for cntry in im if cntry not in list(cntry_code_df.COUNTRY)\
            and cntry not in cntry_chkl]
    # Set exception for that with no "Native_Extant_Cntry"    
    except KeyError:
        pass

# Check the length of "cntry_chkl"
len(cntry_chkl)

107

In [8]:
cntry_chkl

['Congo',
 'Congo, The Democratic Republic of the',
 "Côte d'Ivoire",
 'Tanzania, United Republic of',
 'Bonaire, Sint Eustatius and Saba (Saba, Sint Eustatius, Bonaire)',
 'Cocos (Keeling) Islands',
 'Curaçao',
 'French Guiana',
 'Guadeloupe',
 "Korea, Democratic People's Republic of",
 'Korea, Republic of',
 'Martinique',
 'Micronesia, Federated States of ',
 'Norfolk Island',
 'Réunion',
 'Saint Barthélemy',
 'Saint Helena, Ascension and Tristan da Cunha',
 'Saint Martin (French part)',
 'Sao Tomé and Principe',
 'Sint Maarten (Dutch part)',
 'Syrian Arab Republic',
 'Taiwan, Province of China',
 'Timor-Leste',
 'United States Minor Outlying Islands',
 'Venezuela, Bolivarian Republic of',
 'Viet Nam',
 'Virgin Islands, British',
 'Virgin Islands, U.S.',
 'Bolivia, Plurinational States of',
 'Russian Federation',
 'United States (Alaska)',
 'Brunei Darussalam',
 'Falkland Islands (Malvinas)',
 'Indonesia (Kalimantan)',
 'Malaysia (Sarawak, Sabah)',
 'Equatorial Guinea (Equatorial Gui

In [9]:
# Dict to store inconsistency cntry names
cntry_change = {}

# Names from "animal_facts" will be set as 'key' with corresponding "cntry_code_df" ones as 'value'
cntry_change[cntry_chkl[0]] = 'Republic of the Congo'
cntry_change[cntry_chkl[1]] = 'Democratic Republic of the Congo'
cntry_change[cntry_chkl[2]] = 'Ivory Coast'
cntry_change[cntry_chkl[3]] = 'Tanzania'
cntry_change[cntry_chkl[4]] = 'Netherlands Antilles'
cntry_change[cntry_chkl[5]] = 'Cocos Islands'
cntry_change[cntry_chkl[6]] = 'Curacao'
cntry_change[cntry_chkl[7]] = 'French Guiana'
cntry_change[cntry_chkl[8]] = 'Saint Martin'
cntry_change[cntry_chkl[9]] = 'North Korea'
cntry_change[cntry_chkl[10]] = 'South Korea'
cntry_change[cntry_chkl[11]] = 'Martinique'
cntry_change[cntry_chkl[12]] = 'Micronesia'
cntry_change[cntry_chkl[13]] = 'Antarctica'
cntry_change[cntry_chkl[14]] = 'Reunion'
cntry_change[cntry_chkl[15]] = 'Saint Barthelemy'
cntry_change[cntry_chkl[16]] = 'Saint Helena'
cntry_change[cntry_chkl[17]] = 'Saint Martin'
cntry_change[cntry_chkl[18]] = 'Sao Tome and Principe'
cntry_change[cntry_chkl[19]] = 'Sint Maarten'
cntry_change[cntry_chkl[20]] = 'Syria'
cntry_change[cntry_chkl[21]] = 'Taiwan'
cntry_change[cntry_chkl[22]] = 'East Timor'
cntry_change[cntry_chkl[23]] = 'United States Minor Outlying Islands'
cntry_change[cntry_chkl[24]] = 'Venezuela'
cntry_change[cntry_chkl[25]] = 'Vietnam'
cntry_change[cntry_chkl[26]] = 'British Virgin Islands'
cntry_change[cntry_chkl[27]] = 'U.S. Virgin Islands'
cntry_change[cntry_chkl[28]] = 'Bolivia'
cntry_change[cntry_chkl[29]] = 'Russia'
cntry_change[cntry_chkl[30]] = 'United States'
cntry_change[cntry_chkl[31]] = 'Brunei'
cntry_change[cntry_chkl[32]] = 'Falkland Islands'
cntry_change[cntry_chkl[33]] = 'Indonesia'
cntry_change[cntry_chkl[34]] = 'Malaysia'
cntry_change[cntry_chkl[35]] = 'Equatorial Guinea'
cntry_change[cntry_chkl[36]] = 'Bouvet Island'
cntry_change[cntry_chkl[37]] = 'French Southern Territories'
cntry_change[cntry_chkl[38]] = 'Heard Island and McDonald Islands'
cntry_change[cntry_chkl[39]] = 'Mauritius'
cntry_change[cntry_chkl[40]] = 'Saint Helena'
cntry_change[cntry_chkl[41]] = 'Seychelles'
cntry_change[cntry_chkl[42]] = 'Falkland Islands'
cntry_change[cntry_chkl[43]] = 'Ecuador'
cntry_change[cntry_chkl[44]] = 'China'
cntry_change[cntry_chkl[45]] = 'Australia'
cntry_change[cntry_chkl[46]] = 'United States'
cntry_change[cntry_chkl[47]] = 'New Zealand'
cntry_change[cntry_chkl[48]] = 'Laos'
cntry_change[cntry_chkl[49]] = 'Netherlands Antilles'
cntry_change[cntry_chkl[50]] = 'Iran'
cntry_change[cntry_chkl[51]] = 'Canada'
cntry_change[cntry_chkl[52]] = 'Russia'
cntry_change[cntry_chkl[53]] = 'Netherlands Antilles'
cntry_change[cntry_chkl[54]] = 'China'
cntry_change[cntry_chkl[55]] = 'India'
cntry_change[cntry_chkl[56]] = 'Indonesia'
cntry_change[cntry_chkl[57]] = 'Mexico'
cntry_change[cntry_chkl[58]] = 'Angola'
cntry_change[cntry_chkl[59]] = 'Macedonia'
cntry_change[cntry_chkl[60]] = 'Canada'
cntry_change[cntry_chkl[61]] = 'Mexico'
cntry_change[cntry_chkl[62]] = 'United States'
cntry_change[cntry_chkl[63]] = 'China'
cntry_change[cntry_chkl[64]] = 'Malaysia'
cntry_change[cntry_chkl[65]] = 'United States'
cntry_change[cntry_chkl[66]] = 'Canada'
cntry_change[cntry_chkl[67]] = 'Canada'
cntry_change[cntry_chkl[68]] = 'United States'
cntry_change[cntry_chkl[69]] = 'Indonesia'
cntry_change[cntry_chkl[70]] = 'Malaysia'
cntry_change[cntry_chkl[71]] = 'Brazil'
cntry_change[cntry_chkl[72]] = 'French Southern Territories'
cntry_change[cntry_chkl[73]] = 'Portugal'
cntry_change[cntry_chkl[74]] = 'Russia'
cntry_change[cntry_chkl[75]] = 'Spain'
cntry_change[cntry_chkl[76]] = 'India'
cntry_change[cntry_chkl[77]] = 'Japan'
cntry_change[cntry_chkl[78]] = 'Papua New Guinea'
cntry_change[cntry_chkl[79]] = 'Yemen'
cntry_change[cntry_chkl[80]] = 'Australia'
cntry_change[cntry_chkl[81]] = 'Netherlands Antilles'
cntry_change[cntry_chkl[82]] = 'Equatorial Guinea'
cntry_change[cntry_chkl[83]] = 'French Southern Territories'
cntry_change[cntry_chkl[84]] = 'India'
cntry_change[cntry_chkl[85]] = 'Indonesia'
cntry_change[cntry_chkl[86]] = 'Japan'
cntry_change[cntry_chkl[87]] = 'Malaysia'
cntry_change[cntry_chkl[88]] = 'Mexico'
cntry_change[cntry_chkl[89]] = 'New Zealand'
cntry_change[cntry_chkl[90]] = 'Saint Helena'
cntry_change[cntry_chkl[91]] = 'United States'
cntry_change[cntry_chkl[92]] = 'United States Minor Outlying Islands'
cntry_change[cntry_chkl[93]] = 'Venezuela'
cntry_change[cntry_chkl[94]] = 'India'
cntry_change[cntry_chkl[95]] = 'Mexico'
cntry_change[cntry_chkl[96]] = 'United States'
cntry_change[cntry_chkl[97]] = 'Australia'
cntry_change[cntry_chkl[98]] = 'British Indian Ocean Territory'
cntry_change[cntry_chkl[99]] = 'Disputed Territory'
cntry_change[cntry_chkl[100]] = 'French Polynesia'
cntry_change[cntry_chkl[101]] = 'India'
cntry_change[cntry_chkl[102]] = 'Kiribati'
cntry_change[cntry_chkl[103]] = 'United States Minor Outlying Islands'
cntry_change[cntry_chkl[104]] = 'France'
cntry_change[cntry_chkl[105]] = 'Sao Tome and Principe'
cntry_change[cntry_chkl[106]] = 'Venezuela'

In [10]:
# Loop through animal list
for animal in name_list:

    try:
        # Check if "Native_Extant_Resident_Cntry" exists and loop through it for cntry if true ...
        for cntry in coll.find_one({'Common_Name': animal})['Native_Extant_Resident_Cntry']:
            try:
                # Check if cntry is the one need to get its name changed
                if cntry_change[cntry]:
                    # If cntry name needs to be changes, first pop it off the array
                    coll.update_many(
                        {'Common_Name': animal},
                        {
                            '$pull': {'Native_Extant_Resident_Cntry': cntry}                        
                        }
                    )
                    # Check if the substituted name has already been in the array
                    if cntry_change[cntry] not in coll.\
                        find_one({'Common_Name': animal})['Native_Extant_Resident_Cntry']:
                        # If it is a new name, push it into the array 
                        coll.update_many(
                            {'Common_Name': animal},
                            {
                                '$push': {'Native_Extant_Resident_Cntry': cntry_change[cntry]}                        
                            }
                        )
            # Set exception for cntry that does not need to change name
            except:
                pass
    # Set exception in which "Native_Extant_Resident_Cntry" does not exist
    except:
        pass

In [11]:
# Loop through animal list
for animal in name_list:

    try:
        # Check if "Native_Extant_Cntry" exists and loop through it for cntry if true ...
        for cntry in coll.find_one({'Common_Name': animal})['Native_Extant_Cntry']:
            try:
                # Check if cntry is the one need to get its name changed
                if cntry_change[cntry]:
                    # If cntry name needs to be changes, first pop it off the array
                    coll.update_many(
                        {'Common_Name': animal},
                        {
                            '$pull': {'Native_Extant_Cntry': cntry}                        
                        }
                    )
                    # Check if the substituted name has already been in the array
                    if cntry_change[cntry] not in coll.\
                        find_one({'Common_Name': animal})['Native_Extant_Cntry']:
                        # If it is a new name, push it into the array 
                        coll.update_many(
                            {'Common_Name': animal},
                            {
                                '$push': {'Native_Extant_Cntry': cntry_change[cntry]}                        
                            }
                        )
            # Set exception for cntry that does not need to change name
            except:
                pass
    # Set exception in which "Native_Extant_Cntry" does not exist
    except:
        pass