In [1]:
# Module used to connect Python to MongoDB
import pymongo

In [2]:
# Dependencies -cont'd
import pandas as pd

In [3]:
# Read country info provided on https://countrycode.org/ and convert it to Pandas DataFrame
cntry_code_df = pd.read_csv('../../data/cntry_code.csv', encoding='UTF-8')

# Preview "animal_df"
cntry_code_df.head()

Unnamed: 0,COUNTRY,COUNTRY CODE,ISO CODES
0,Afghanistan,93,AF / AFG
1,Albania,355,AL / ALB
2,Algeria,213,DZ / DZA
3,American Samoa,1-684,AS / ASM
4,Andorra,376,AD / AND


In [4]:
# Setup connection to MongoDB using default port 27017
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [5]:
# Establish "EndangeredAnimalDB" Database and "animal_facts" Collection
db = client.EndangeredAnimalDB
coll = db.animal_facts

In [6]:
# Retrieve all documents in "animal_facts" Collection
documents = coll.find()

# Checklist to store country names from "animal_facts" that are different as shown in 'cntry_code_df'
cntry_chkl = []

# Loop through all documents
for docu in documents:
    
    for key in docu:
        # Retrieve keys for country names in "animal_facts" collection
        if key != "_id" and key != "Common_Name":
            # Append "cntry_chkl" with unique country names that are different from 'cntry_code_df'
            [cntry_chkl.append(cntry) for cntry in docu[key] if cntry not in list(cntry_code_df.COUNTRY) and \
                 cntry not in cntry_chkl]

# Preview "cntry_chkl"        
cntry_chkl

['Congo',
 'Congo, The Democratic Republic of the',
 "Côte d'Ivoire",
 'Tanzania, United Republic of',
 'Bonaire, Sint Eustatius and Saba (Saba, Sint Eustatius, Bonaire)',
 'Cocos (Keeling) Islands',
 'Curaçao',
 'French Guiana',
 'Guadeloupe',
 "Korea, Democratic People's Republic of",
 'Korea, Republic of',
 'Martinique',
 'Micronesia, Federated States of ',
 'Norfolk Island',
 'Réunion',
 'Saint Barthélemy',
 'Saint Helena, Ascension and Tristan da Cunha',
 'Saint Martin (French part)',
 'Sao Tomé and Principe',
 'Sint Maarten (Dutch part)',
 'Syrian Arab Republic',
 'Taiwan, Province of China',
 'Timor-Leste',
 'United States Minor Outlying Islands',
 'Venezuela, Bolivarian Republic of',
 'Viet Nam',
 'Virgin Islands, British',
 'Virgin Islands, U.S.',
 'Bolivia, Plurinational States of',
 'Indonesia (Jawa)',
 'Iran, Islamic Republic of',
 'Russian Federation',
 'United States (Alaska)',
 'United States (Aleutian Is.)',
 'Macedonia, the former Yugoslav Republic of',
 'Indonesia (Ka

In [7]:
# Dict to store inconsistency cntry names
cntry_change = {}

# Names from "animal_facts" will be set as 'key' with corresponding "cntry_code_df" ones as 'value'
cntry_change[cntry_chkl[0]] = 'Republic of the Congo'
cntry_change[cntry_chkl[1]] = 'Democratic Republic of the Congo'
cntry_change[cntry_chkl[2]] = 'Ivory Coast'
cntry_change[cntry_chkl[3]] = 'Tanzania'
cntry_change[cntry_chkl[4]] = 'Netherlands Antilles'
cntry_change[cntry_chkl[5]] = 'Cocos Islands'
cntry_change[cntry_chkl[6]] = 'Curacao'
cntry_change[cntry_chkl[7]] = 'French Guiana'
cntry_change[cntry_chkl[8]] = 'Saint Martin'
cntry_change[cntry_chkl[9]] = 'North Korea'
cntry_change[cntry_chkl[10]] = 'South Korea'
cntry_change[cntry_chkl[11]] = 'Martinique'
cntry_change[cntry_chkl[12]] = 'Micronesia'
cntry_change[cntry_chkl[13]] = 'Antarctica'
cntry_change[cntry_chkl[14]] = 'Reunion'
cntry_change[cntry_chkl[15]] = 'Saint Barthelemy'
cntry_change[cntry_chkl[16]] = 'Saint Helena'
cntry_change[cntry_chkl[17]] = 'Saint Martin'
cntry_change[cntry_chkl[18]] = 'Sao Tome and Principe'
cntry_change[cntry_chkl[19]] = 'Sint Maarten'
cntry_change[cntry_chkl[20]] = 'Syria'
cntry_change[cntry_chkl[21]] = 'Taiwan'
cntry_change[cntry_chkl[22]] = 'East Timor'
cntry_change[cntry_chkl[23]] = 'United States Minor Outlying Islands'
cntry_change[cntry_chkl[24]] = 'Venezuela'
cntry_change[cntry_chkl[25]] = 'Vietnam'
cntry_change[cntry_chkl[26]] = 'British Virgin Islands'
cntry_change[cntry_chkl[27]] = 'U.S. Virgin Islands'
cntry_change[cntry_chkl[28]] = 'Bolivia'
cntry_change[cntry_chkl[29]] = 'Indonesia'
cntry_change[cntry_chkl[30]] = 'Iran'
cntry_change[cntry_chkl[31]] = 'Russia'
cntry_change[cntry_chkl[32]] = 'United States'
cntry_change[cntry_chkl[33]] = 'United States'
cntry_change[cntry_chkl[34]] = 'Macedonia'
cntry_change[cntry_chkl[35]] = 'Indonesia'
cntry_change[cntry_chkl[36]] = 'Laos'
cntry_change[cntry_chkl[37]] = 'Malaysia'
cntry_change[cntry_chkl[38]] = 'United States'
cntry_change[cntry_chkl[39]] = 'Brunei'
cntry_change[cntry_chkl[40]] = 'Falkland Islands'
cntry_change[cntry_chkl[41]] = 'Brazil'
cntry_change[cntry_chkl[42]] = 'French Southern Territories'
cntry_change[cntry_chkl[43]] = 'Portugal'
cntry_change[cntry_chkl[44]] = 'Russia'
cntry_change[cntry_chkl[45]] = 'Falkland Islands'
cntry_change[cntry_chkl[46]] = 'Spain'
cntry_change[cntry_chkl[47]] = 'Indonesia'
cntry_change[cntry_chkl[48]] = 'Malaysia'
cntry_change[cntry_chkl[49]] = 'Equatorial Guinea'
cntry_change[cntry_chkl[50]] = 'Netherlands Antilles'
cntry_change[cntry_chkl[51]] = 'Canada'
cntry_change[cntry_chkl[52]] = 'India'
cntry_change[cntry_chkl[53]] = 'Japan'
cntry_change[cntry_chkl[54]] = 'Mauritius'
cntry_change[cntry_chkl[55]] = 'Papua New Guinea'
cntry_change[cntry_chkl[56]] = 'Yemen'
cntry_change[cntry_chkl[57]] = 'Brazil'
cntry_change[cntry_chkl[58]] = 'French Southern Territories'
cntry_change[cntry_chkl[59]] = 'Antarctica'
cntry_change[cntry_chkl[60]] = 'Saint Helena'
cntry_change[cntry_chkl[61]] = 'Seychelles'
cntry_change[cntry_chkl[62]] = 'Ecuador'
cntry_change[cntry_chkl[63]] = 'China'
cntry_change[cntry_chkl[64]] = 'Australia'
cntry_change[cntry_chkl[65]] = 'United States'
cntry_change[cntry_chkl[66]] = 'Australia'
cntry_change[cntry_chkl[67]] = 'Netherlands Antilles'
cntry_change[cntry_chkl[68]] = 'Equatorial Guinea'
cntry_change[cntry_chkl[69]] = 'French Southern Territories'
cntry_change[cntry_chkl[70]] = 'India'
cntry_change[cntry_chkl[71]] = 'Indonesia'
cntry_change[cntry_chkl[72]] = 'Japan'
cntry_change[cntry_chkl[73]] = 'Malaysia'
cntry_change[cntry_chkl[74]] = 'Mexico'
cntry_change[cntry_chkl[75]] = 'New Zealand'
cntry_change[cntry_chkl[76]] = 'Saint Helena'
cntry_change[cntry_chkl[77]] = 'United States'
cntry_change[cntry_chkl[78]] = 'United States Minor Outlying Islands'
cntry_change[cntry_chkl[79]] = 'Venezuela'
cntry_change[cntry_chkl[80]] = 'India'
cntry_change[cntry_chkl[81]] = 'Mexico'
cntry_change[cntry_chkl[82]] = 'United States'
cntry_change[cntry_chkl[83]] = 'New Zealand'
cntry_change[cntry_chkl[84]] = 'Australia'
cntry_change[cntry_chkl[85]] = 'British Indian Ocean Territory'
cntry_change[cntry_chkl[86]] = 'Disputed Territory'
cntry_change[cntry_chkl[87]] = 'French Polynesia'
cntry_change[cntry_chkl[88]] = 'India'
cntry_change[cntry_chkl[89]] = 'Kiribati'
cntry_change[cntry_chkl[90]] = 'United States Minor Outlying Islands'
cntry_change[cntry_chkl[91]] = 'France'
cntry_change[cntry_chkl[92]] = 'Netherlands Antilles'
cntry_change[cntry_chkl[93]] = 'United States'
cntry_change[cntry_chkl[94]] = 'Portugal'
cntry_change[cntry_chkl[95]] = 'Spain'
cntry_change[cntry_chkl[96]] = 'United States'
cntry_change[cntry_chkl[97]] = 'Sao Tome and Principe'
cntry_change[cntry_chkl[98]] = 'Venezuela'
cntry_change[cntry_chkl[99]] = 'Canada'
cntry_change[cntry_chkl[100]] = 'United States'
cntry_change[cntry_chkl[101]] = 'Canada'
cntry_change[cntry_chkl[102]] = 'Canada'
cntry_change[cntry_chkl[103]] = 'Russia'
cntry_change[cntry_chkl[104]] = 'Canada'
cntry_change[cntry_chkl[105]] = 'Mexico'
cntry_change[cntry_chkl[106]] = 'United States'
cntry_change[cntry_chkl[107]] = 'Ecuador'
cntry_change[cntry_chkl[108]] = 'Costa Rica'
cntry_change[cntry_chkl[109]] = 'China'
cntry_change[cntry_chkl[110]] = 'India'
cntry_change[cntry_chkl[111]] = 'Indonesia'
cntry_change[cntry_chkl[112]] = 'China'
cntry_change[cntry_chkl[113]] = 'Malaysia'
cntry_change[cntry_chkl[114]] = 'Mexico'
cntry_change[cntry_chkl[115]] = 'Angola'

In [8]:
# Retrieve all documents in "animal_facts" Collection
documents = coll.find()

# Loop through all documents
for docu in documents:
    
    # List holding keys for country names
    key_list = []
    # "Common_Name" is not included in "key_list"
    [key_list.append(key) for key in docu if key != "Common_Name"]
    # Grab id info for current "docu"
    key_id = docu[key_list[0]]
    # Pop id key out of "key_list"
    key_list = key_list[1:]
    
    # Loop through "key_list"
    for key in key_list:

        # Read country list of current key in "docu"
        for cntry in docu[key]:
                      
            try:
                # Test if individual country in country list needs to be replaced
                if cntry_change[cntry]:
                    # If true, first pop the country name out of country list
                    coll.update_many(
                        {"_id": key_id},
                        {
                            '$pull': {key: cntry}
                        }
                    )
                    # Check if the substituted name has already been in country list
                    if cntry_change[cntry] not in coll.find_one({"_id": key_id})[key]:
                        # If it is a new name, push it to country list in mongoDB
                        coll.update_many(
                            {"_id": key_id},
                            {
                                '$push': {key: cntry_change[cntry]}
                            }
                        )
            # Set exception for country that does not need to change name
            except KeyError:
                pass          
