In [1]:
from pymongo import MongoClient
from pymongo import errors
from pprint import pprint
import numpy as np
import regex as re

In [2]:
def connect_database():
    client = MongoClient('localhost', 27017)
    return client

In [3]:
client = connect_database()

In [4]:
client.list_database_names()

['Kitchen_test',
 'Perfume_Database',
 'WorldDevelopmentIndicators',
 'admin',
 'config',
 'local']

In [5]:
perfume_database = client.Perfume_Database

In [6]:
perfume_database.list_collection_names()

['Cleared_Crawled_Perfumes',
 'Fragrances_Backup_02/22/2024',
 'Scraped_Perfumes',
 'Extra01',
 'Fragrances_Backup_02/24/2024',
 'Fragrances_Backup_02/16/2024',
 'Extra03',
 'Metadata',
 'Perfumes',
 'Extra02',
 'Fragrances',
 'Fragrances_Quality',
 'Crawled_Perfumes']

In [7]:
fragrances_collection = perfume_database.Fragrances

## Clearing Fragrances Dataset

trim and lowercase company names

In [35]:
pipeline = []

In [36]:
clean_name = {
    '$set': {
        'name': {
            '$toLower': {
                '$trim': {
                    'input': {
                        '$toString': '$name'
                    }
                }
            }
        }
    }
}

In [37]:
clean_company = {
    '$set': {
        'company': {
            '$toLower': {
                '$trim': {
                    'input': {
                        '$toString': '$company'
                    }
                }
            }
        }
    }
}

In [38]:
merge = {
    "$merge": "Fragrances"
}

In [39]:
pipeline.extend([clean_name, clean_company, merge])

In [40]:
pipeline

[{'$set': {'name': {'$toLower': {'$trim': {'input': {'$toString': '$name'}}}}}},
 {'$set': {'company': {'$toLower': {'$trim': {'input': {'$toString': '$company'}}}}}},
 {'$merge': 'Fragrances'}]

In [41]:
fragrances_collection.count_documents({})

4563

In [42]:
fragrances_collection.aggregate(pipeline)

<pymongo.command_cursor.CommandCursor at 0x273ff8901d0>

In [43]:
fragrances_collection.count_documents({})

4563

## Detect Duplicates

In [8]:
duplicate_detection_pipeline = []

In [9]:
group_by_name_company = {
    '$group': {
        '_id': {
            'name': '$name', 
            'company': '$company'
        }, 
        'ids': {
            '$push': '$_id'
        },
        'count': {
            '$count': {}
        }
    }
}

In [10]:
find_duplicates = {
    '$match': {
        'count': {
            '$gt': 1
        }
    }
}

In [11]:
duplicate_detection_pipeline.extend([group_by_name_company, find_duplicates])

In [27]:
result = fragrances_collection.aggregate(duplicate_detection_pipeline)

In [28]:
to_delete_ids = []

In [29]:
for doc in result:
    to_delete_ids.extend(doc['ids'][1:])

In [30]:
to_delete_ids

[ObjectId('65c3d3942be6a0a6d64f648e'),
 ObjectId('65a6842180d8b716415b35cf'),
 ObjectId('65a6842780d8b716415b35d2'),
 ObjectId('65a6842d80d8b716415b35d5'),
 ObjectId('65c3d3942be6a0a6d64f61d2'),
 ObjectId('65c3d3942be6a0a6d64f6338'),
 ObjectId('65c3d3942be6a0a6d64f618a'),
 ObjectId('65a75c9a96d56388983d0998')]

In [31]:
delete_query = {
    '_id': {
        '$in': to_delete_ids
    }
}

In [32]:
test_res = fragrances_collection.find(delete_query)

In [33]:
for i in test_res:
    pprint(i)

{'_id': ObjectId('65a6842180d8b716415b35cf'),
 'base notes': ['Oakmoss', 'Patchouli', 'Amber'],
 'company': 'frapin',
 'description': 'The Orchid Man by Frapin is a Aromatic fragrance for women '
                'and men. The Orchid Man was launched in 2015. The nose behind '
                'this fragrance is Jérôme Epinette. Top notes are Bergamot and '
                'Black Pepper; middle notes are Leather and Jasmine; base '
                'notes are Oakmoss, Patchouli and Amber. The Orchid Man was a '
                'nickname of the French boxer Georges Carpentier, who was a '
                'multi-talented person. After the boxing ring, he took to the '
                'stage. Carpentier the showman went from Paris to Hollywood. '
                'He experienced the heights of Wall Street and then its '
                'collapse. A modern man who was always ready for new '
                'adventures, the Orchid Man opened one of the first cocktail '
                'bars in 

In [34]:
fragrances_collection.delete_many(delete_query)

DeleteResult({'n': 8, 'ok': 1.0}, acknowledged=True)

deleting was applied. double check duplicates

In [None]:
duplicate_detection_pipeline.append({'$count': 'count'})

In [45]:
try:
    fragrances_collection.aggregate(duplicate_detection_pipeline).next()
except StopIteration:
    print("None")

None


## Address Consistency Issues

### Rating Field 
 There are documents with values of "NA". since they are not available, the field is going to be removed. using update and unset operator.

In [28]:
rating_toBeRemoved = {"rating": "NA"}

In [30]:
remove_rating_field = {'$unset': {"rating": {}}}

In [31]:
fragrances_collection.count_documents(rating_toBeRemoved)

12

In [32]:
result = fragrances_collection.update_many(rating_toBeRemoved, remove_rating_field)

In [33]:
pprint(result)

UpdateResult({'n': 12, 'nModified': 12, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)


In [34]:
fragrances_collection.count_documents(rating_toBeRemoved)

0

### Link and URL
Since they are the same fields, the values of url are added to an array and renamed to 

In [96]:
update_pipeline = []

In [97]:
documents_with_url = {'$match': {'url': {'$exists': True}}}

In [98]:
push_url_to_link = { 
    '$set': {
     'link': {
       '$cond': { 
         'if': { '$isArray': "$link" },
         'then': { '$concatArrays': ["$link", ["$url"]] },
         'else': ["$url"]
       }
     }
    }
}

In [103]:
remove_url = {
    '$unset': 'url'
}

In [107]:
merge = {
    '$merge': {
        'into': 'Fragrances',
        'on': '_id',
        'whenMatched': 'replace'
    }
}

In [117]:
count = {'$count': 'count'}

In [118]:
update_pipeline = [documents_with_url, push_url_to_link, remove_url,count]

In [119]:
update_pipeline

[{'$match': {'url': {'$exists': True}}},
 {'$set': {'link': {'$cond': {'if': {'$isArray': '$link'},
     'then': {'$concatArrays': ['$link', ['$url']]},
     'else': ['$url']}}}},
 {'$unset': 'url'},
 {'$count': 'count'}]

Count the documents that are going to be affected by the pipeline

In [120]:
pprint(fragrances_collection.aggregate(update_pipeline).next())

{'count': 2260}


Create the pipeline to merge and replace with new values

In [121]:
update_pipeline = [documents_with_url, push_url_to_link, remove_url, merge]

In [123]:
res = fragrances_collection.aggregate(update_pipeline)

In [127]:
with_url = {'url': {'$exists': True}}

Check for the number of documents having the url field, after the aggregation applied.

In [136]:
fragrances_collection.count_documents(with_url)

0

### Notes 

A pipeline for counting unique notes:

In [8]:
def count_notes(collection): 
    processed_notes_pipeline_count = [
    {
        '$project': {
            'all_notes': {
                '$concatArrays': [
                    {
                        '$ifNull': [
                            '$top notes', []
                        ]
                    }, {
                        '$ifNull': [
                            '$base notes', []
                        ]
                    }, {
                        '$ifNull': [
                            '$middle notes', []
                        ]
                    }, {
                        '$ifNull': [
                            '$notes', []
                        ]
                    }
                ]
            }
        }
    }, {
        '$set': {
            'length': {
                '$size': '$all_notes'
            }
        }
    }, {
        '$sort': {
            'length': -1
        }
    }, {
        '$unwind': {
            'path': '$all_notes'
        }
    }, {
        '$group': {
            '_id': '$all_notes'
            , 
            'count': {
                '$sum': 1
            }, 
            'variants': {
                '$push': '$all_notes'
            }
        }
    }, {
        '$count': 'count'
    }
]
    print(f"The number of unique lowercased and trimmed notes are: {fragrances_collection.aggregate(processed_notes_pipeline_count).next()['count']}")

In [9]:
count_notes(fragrances_collection)

The number of unique lowercased and trimmed notes are: 3671


A pipeline to trim and lowercase all notes.

In [22]:
def preprocess_notes(field, collection):
    initial_note_cleaning = [
    {
        '$match': {
            field: {
                '$exists': True
            }
        }
    }, {
        '$set': {
            field: {
                '$map': {
                    'input': f'${field}', 
                    'as': 'note', 
                    'in': {
                        '$trim': {
                            'input': {
                                '$toLower': '$$note'
                            }
                        }
                    }
                }
            }
        }
    }, {
        '$merge': {
            'into': 'Fragrances', 
            'on': '_id', 
            'whenMatched': 'replace', 
            'whenNotMatched': 'discard'
        }
    }
]
    collection.aggregate(initial_note_cleaning)

Clean notes field by field

In [23]:
preprocess_notes("top notes", fragrances_collection)

In [24]:
count_notes(fragrances_collection)

The number of unique lowercased and trimmed notes are: 4678


In [25]:
preprocess_notes("middle notes", fragrances_collection)

In [26]:
count_notes(fragrances_collection)

The number of unique lowercased and trimmed notes are: 4600


In [27]:
preprocess_notes("base notes", fragrances_collection)

In [28]:
count_notes(fragrances_collection)

The number of unique lowercased and trimmed notes are: 4427


In [29]:
preprocess_notes("notes", fragrances_collection)

In [30]:
count_notes(fragrances_collection)

The number of unique lowercased and trimmed notes are: 3671


Eavluate notes syntactic accuracy, define pipeline to extract values with charachters other than alphabet, numbers, comma and space

In [32]:
extract_abnormal_notes_pipeline = [
    {
        '$project': {
            'all_notes': {
                '$concatArrays': [
                    {
                        '$ifNull': [
                            '$top notes', []
                        ]
                    }, {
                        '$ifNull': [
                            '$base notes', []
                        ]
                    }, {
                        '$ifNull': [
                            '$middle notes', []
                        ]
                    }, {
                        '$ifNull': [
                            '$notes', []
                        ]
                    }
                ]
            }
        }
    }, {
        '$unwind': {
            'path': '$all_notes'
        }
    }, {
        '$group': {
            '_id': '$all_notes'
        }
    }, {
        '$project': {
            'note': '$_id', 
            '_id': 0
        }
    }, {
        '$match': {
            'note': {
                '$not': {
                    '$regex': re.compile(r"^[a-zA-Z][a-zA-Z ']+[a-zA-Z]$")
                }
            }
        }
    }
]

In [33]:
abnormal_notes = fragrances_collection.aggregate(extract_abnormal_notes_pipeline)

In [34]:
abnormal_notes_list = [doc["note"] for doc in abnormal_notes]

Abnormalities detected:
- Unrelated sentence in note field
- the charachters . ( ) : and some unknown characters probabily resulted from encoding mismatch
- "and" was a seperator of some notes instead of comma

In [36]:
abnormal_notes_list

['matÃ© absolute',
 'jasmin (jasmin absolute',
 'kara-karounde from guinea',
 'rhubarb (wood)',
 'matÃ©.',
 '100% pure oud',
 'madagascan vanilla absolute & white ambergris tincture.',
 'vanilla and labdanum.',
 'guaiac wood)',
 'sandal 100k',
 'chinese osmanthus absolute 1%',
 'castoreum and musk.',
 'musk.',
 'and white musk.',
 'opoponax (sweet myrrh)',
 'spray paint (aerosols)',
 'black suede click here for ingredients Ã\x97close tuscan leather travel atomizer by tom ford private blend ingredients please be aware that ingredient lists may change or vary from time to time.  please refer to the ingredient list on the product package you receive for the most up to date list of ingredients.',
 'taÃ¯f rose essential oil',
 'gardenia (and other lovely things-- the mystery is part of the glamour)',
 'indonesian bouya (oud)',
 'tonka bean and tokaji aszu wine.',
 'amber gold: tangerine',
 'cedar and olibanum.',
 'zdravetz herb.',
 'orris sur cÃ¨dre',
 '7% natural ambergris',
 'accord eudor

#### Fix Encoding

In [42]:
# fixed_encoding_abnormal_notes = [note.encode('latin1', 'ignore').decode('utf-8', 'ignore') for note in abnormal_notes_list]

In [43]:
# fixed_encoding_abnormal_notes

['maté absolute',
 'jasmin (jasmin absolute',
 'kara-karounde from guinea',
 'rhubarb (wood)',
 'maté.',
 '100% pure oud',
 'madagascan vanilla absolute & white ambergris tincture.',
 'vanilla and labdanum.',
 'guaiac wood)',
 'sandal 100k',
 'chinese osmanthus absolute 1%',
 'castoreum and musk.',
 'musk.',
 'and white musk.',
 'opoponax (sweet myrrh)',
 'spray paint (aerosols)',
 'black suede click here for ingredients ×close tuscan leather travel atomizer by tom ford private blend ingredients please be aware that ingredient lists may change or vary from time to time.  please refer to the ingredient list on the product package you receive for the most up to date list of ingredients.',
 'taïf rose essential oil',
 'gardenia (and other lovely things-- the mystery is part of the glamour)',
 'indonesian bouya (oud)',
 'tonka bean and tokaji aszu wine.',
 'amber gold: tangerine',
 'cedar and olibanum.',
 'zdravetz herb.',
 'orris sur cèdre',
 '7% natural ambergris',
 'accord eudora',
 '& 

with encoding fixed, a pipeline for getting notes without abnormal charachters ( ) . : | 

Fix encoding with going over them in annomilies

In [19]:
pipeline = [
    {
        '$match': {
            'notes': {
                '$exists': True
            }
        }
    }, {
        '$set': {
            'unparsed_notes': {
                '$replaceAll': {
                    'input': '$unparsed_notes', 
                    'find': '-', 
                    'replacement': ' '
                }
            }
        }
    }, {
        '$set': {
            'unparsed_notes': {
                '$replaceAll': {
                    'input': '$unparsed_notes', 
                    'find': '.', 
                    'replacement': ''
                }
            }
        }
    }, {
        '$set': {
            'unparsed_notes': {
                '$replaceAll': {
                    'input': '$unparsed_notes', 
                    'find': '(', 
                    'replacement': ' '
                }
            }
        }
    }, {
        '$set': {
            'unparsed_notes': {
                '$replaceAll': {
                    'input': '$unparsed_notes', 
                    'find': ')', 
                    'replacement': ' '
                }
            }
        }
    }, {
        '$match': {
            'unparsed_notes': {
                '$not': {
                    '$regex': re.compile(r"^[a-zA-Z ][a-zA-Z0-9 ,]*[a-zA-Z ]$")
                }
            }
        }
    }
]

In [45]:
cursor = fragrances_collection.aggregate(pipeline)

In [46]:
for perfume in cursor: 
    
    edited_perfume = perfume
    edited_perfume["unparsed_notes"] = perfume["unparsed_notes"].encode('latin1', 'ignore').decode('utf-8', 'ignore')
    update = {"$set": {
        "unparsed_notes": edited_perfume["unparsed_notes"]
    }}

    
    result = fragrances_collection.update_one({"_id": perfume['_id']}, update)
    pprint(f"Updated {perfume['_id']}, result is: {result}")
    

("Updated 65c3d3942be6a0a6d64f5ebc, result is: UpdateResult({'n': 1, "
 "'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)")
("Updated 65c3d3942be6a0a6d64f5eed, result is: UpdateResult({'n': 1, "
 "'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)")
("Updated 65c3d3942be6a0a6d64f5f0d, result is: UpdateResult({'n': 1, "
 "'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)")
("Updated 65c3d3942be6a0a6d64f5f10, result is: UpdateResult({'n': 1, "
 "'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)")
("Updated 65c3d3942be6a0a6d64f5f33, result is: UpdateResult({'n': 1, "
 "'nModified': 0, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)")
("Updated 65c3d3942be6a0a6d64f5f8d, result is: UpdateResult({'n': 1, "
 "'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)")
("Updated 65c3d3942be6a0a6d64f5fbc, result is: UpdateResult({'n': 1, "
 "'nModified': 1, 'ok': 1.0, 'updatedExisting':

#### Remove Extra Charachters and sentences

In [199]:
result = fragrances_collection.find({"unparsed_notes": {"$exists": True}})

In [200]:
target_perfumes = list(result)

In [201]:
pprint(target_perfumes[1:5])

[{'_id': ObjectId('65c3d3942be6a0a6d64f5eb7'),
  'company': 'di ser',
  'description': ' A tribute to the expanse of space extending from the sky to '
                 'the flower and fruit laden earth below, Sola, the newest '
                 'offering from DI SER and perfumer Yasuyuki Shinohara, '
                 'showcases the natural flora of Hokkaido, Japan with locally '
                 'sourced and sustainably harvested ingredients from DI SERs '
                 'own gardens. Starting with aromatic notes of freshly '
                 'harvested Hokkaido lavender, lemongrass and yuzu, the '
                 'uplifting scent moves to a floral middle of Japanese rose, '
                 'magnolia, geranium and jasmine before settling on a '
                 'spiritual bed of frankincense and myrrh. The spiritual and '
                 'meditative scent evokes an imagery of a peaceful universe in '
                 'complete harmony with the bounties of nature and of peace '
   

In [202]:
def fix_encoding(text):
    
    return text.encode('latin1', 'ignore').decode('utf-8', 'ignore')

In [203]:
def clean_nonrelevant_texts(text):
    
    edited_text = text
    
    if re.search(r"click", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"click.*", "", edited_text, flags=re.IGNORECASE)
        
    if re.search(r"recent.*", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"recent.*", "", edited_text, flags=re.IGNORECASE)
        
    if re.search(r"\band\b", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"\band\b", ",", edited_text, flags=re.IGNORECASE)
        
    if re.search(r"&", edited_text):
        edited_text = re.sub(r"&", ",", edited_text)
        
#     Remove Quotes
    if re.search(r"\"[^\"]*\"", edited_text):
        edited_text = re.sub(r"\"[^\"]*\"", "", edited_text)
        
# #     Remove misc texts
    if re.search(r": No animals were.*", edited_text, re.IGNORECASE):
        edited_text = re.sub(r": No animals were.*", "", edited_text, flags=re.IGNORECASE)
        
    if re.search(r"this fragrance contains no animal products", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"this fragrance contains no animal products", "", edited_text, flags=re.IGNORECASE)
        
    if re.search(r", dozens.*", edited_text, re.IGNORECASE):
        edited_text = re.sub(r", dozens.*", "", edited_text, flags=re.IGNORECASE)
        
    if re.search(r"we're.*", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"we're.*", "", edited_text, flags=re.IGNORECASE)
        
    if re.search(r"(top:)|(base:)", edited_text):
        edited_text = re.sub(r"(top:)|(base:)", "", edited_text)
        
    if re.search(r"11 select  but undisclosed!", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"11 select  but undisclosed!", "", edited_text, flags=re.IGNORECASE)
        
    if re.search(r"four kinds!", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"four kinds!", "", edited_text, flags=re.IGNORECASE)
        
    if re.search(r"does not contain any actual animal products", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"does not contain any actual animal products", "", edited_text, flags=re.IGNORECASE)
        
    if re.search(r"\*", edited_text):
        edited_text = re.sub(r"\*", ",", edited_text)  
        
    if re.search(r"\r", edited_text):
        edited_text = re.sub(r"\r", ",", edited_text)
        
    if re.search(r"\s\s+", edited_text):
        edited_text = re.sub(r"\s\s+", " ", edited_text)
        
    return edited_text

In [204]:
def clean_numeric_nonrelevant(text):
    
    edited_text = text
    
    if re.search(r"\d+%", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"\d+%", "", text, flags=re.IGNORECASE)

    return edited_text

In [205]:
def clean_parentheses(text):
    
    edited_text = text
    
    if re.search(r"[()]+", edited_text, re.IGNORECASE):
        edited_text = re.sub(r"\([^()]+\)", "", text, flags=re.IGNORECASE)
        
    return edited_text

In [206]:
def clean_punctuation(text):
    
    edited_text = text
    
    if re.search(r"\.", edited_text):
        edited_text = re.sub(r"\.", "", edited_string)
        
    if re.search(r"-", edited_text):
        edited_text = re.sub(r"-", " ", edited_text)
        
    if re.search(r";", edited_text):
        edited_text = re.sub(r";", ",", edited_text)
        
    if re.search(r":", edited_text):
        edited_text = re.sub(r":", ",", edited_text)
        
    return edited_text

In [207]:
for perfume in target_perfumes:
    unparsed_notes = perfume["unparsed_notes"]
    
    edited_string = fix_encoding(unparsed_notes)
    
    edited_string = clean_nonrelevant_texts(edited_string)
    edited_string = clean_numeric_nonrelevant(edited_string)
    edited_string = clean_parentheses(edited_string)
    edited_string = clean_punctuation(edited_string)

    parsed_notes = edited_string.split(',')
    parsed_notes = [note.strip() for note in parsed_notes if not len(note.strip()) == 0]

    
    perfume["notes"] = parsed_notes
    perfume.pop("unparsed_notes", None)
    
    result = fragrances_collection.replace_one({"_id": perfume["_id"]}, perfume)
    print(result)

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)
UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExi

### Trim and lower case all notes

In [9]:
trim_lowercase_allNotes_pipeline = [
    {
        '$set': {
            'top_notes': {
                '$map': {
                    'input': '$top notes', 
                    'as': 'top_note', 
                    'in': {
                        '$toLower': {
                            '$trim': {
                                'input': '$$top_note'
                            }
                        }
                    }
                }
            }
        }
    }, {
        '$set': {
            'middle notes': {
                '$map': {
                    'input': '$middle notes', 
                    'as': 'middle_note', 
                    'in': {
                        '$toLower': {
                            '$trim': {
                                'input': '$$middle_note'
                            }
                        }
                    }
                }
            }
        }
    }, {
        '$set': {
            'base notes': {
                '$map': {
                    'input': '$base notes', 
                    'as': 'base_note', 
                    'in': {
                        '$toLower': {
                            '$trim': {
                                'input': '$$base_note'
                            }
                        }
                    }
                }
            }
        }
    }, {
        '$set': {
            'notes': {
                '$map': {
                    'input': '$notes', 
                    'as': 'note', 
                    'in': {
                        '$toLower': {
                            '$trim': {
                                'input': '$$note'
                            }
                        }
                    }
                }
            }
        }
    }, {
        '$merge': {
            'into': 'Fragrances', 
            'on': '_id', 
            'whenMatched': 'replace', 
            'whenNotMatched': 'discard'
        }
    }
]