In [1]:
import numpy as np
import pandas as pd
import glob, re, string

# for webscraping from cosdna
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

from string_grouper import match_strings, match_most_similar, group_similar_strings, StringGrouper
# from sklearn.feature_extraction.text import TfidfVectorizer
# from scipy.sparse import csr_matrix
# import sparse_dot_topn.sparse_dot_topn as sdt

cosmetic ingredient database: https://public.opendatasoft.com/explore/dataset/cosmetic-ingredient-database-ingredients-and-fragrance-inventory/table/

cosdna: https://cosdna.com/

INCI Decoder: https://incidecoder.com/

# import and quick cleanup

In [2]:
file_list = glob.glob('./data/*.csv')
file_list

['./data\\brands.csv',
 './data\\recommendations_2020-05-05.csv',
 './data\\responses_2020-05-05.csv',
 './data\\responses_modified_2020-05-05.csv']

In [3]:
responses = pd.read_csv(file_list[2])

In [4]:
new_columns = [
    'timestamp',
    'email_address',
    'name',
    'order_number',
    'skin_type',
    'cannot_contain',
    'skin_concern',
    'am_routine',
    'pm_routine',
    'climate',
    'skin_sensitivities',
    'used_retinoids',
    'used_acids',
    'prone_to_breakouts',
    'miscellaneous',
    'permission'
]

responses.columns = new_columns
responses.columns

Index(['timestamp', 'email_address', 'name', 'order_number', 'skin_type',
       'cannot_contain', 'skin_concern', 'am_routine', 'pm_routine', 'climate',
       'skin_sensitivities', 'used_retinoids', 'used_acids',
       'prone_to_breakouts', 'miscellaneous', 'permission'],
      dtype='object')

In [5]:
responses.drop(columns=['email_address','name'], inplace=True)
responses.head()

Unnamed: 0,timestamp,order_number,skin_type,cannot_contain,skin_concern,am_routine,pm_routine,climate,skin_sensitivities,used_retinoids,used_acids,prone_to_breakouts,miscellaneous,permission
0,4/30/2020 12:43:32,12846,Combination,,"Acne, Pigmentation, Fine lines/wrinkles","Trader Joe’s “all in one” cleanser, Bioderma S...","Trader Joe’s “all in one” cleanser, Bioderma S...","Temperate, a lovely 70F (lucky!)",I broke out from Cosrx snail serum and from Ma...,Differing 0.1% for four years,I prefer toners with acids as masks with AHA s...,I have mild to moderate acne that I have been ...,"I have a super oily forehead, oily nose and c...",Yes
1,4/30/2020 12:45:25,12837,Combination,No allergies,"Acne, Fine lines/wrinkles, Skin Texture (rough...",1. Wash with Biore Charcoal Acne Clearing Clea...,Collapse into bed,"Tropical, humid",No.,No.,No.,Yes. They are usually cystic and around my cyc...,I need help.,Yes
2,4/30/2020 12:50:26,12845,Oily,No.,"Acne, Pigmentation",Cleanser (Drunk Elephant Beste No. 9 Jelly Cle...,Oil cleanser (Dermalogica Precleanse); Cleanse...,Desert dry heat,No.,No.,I've used 2% BHA on and off; and daily I use a...,"Yes, occasional small whiteheads and blackhead...",I'm Black so hyperpigmentation and skin discol...,Yes
3,4/30/2020 13:02:43,12850,Normal,No,"Fine lines/wrinkles, Skin Texture (roughness, ...","Splash of water, occasional Paula's Choice enr...",Occasional Palmer's facial cleansing oil (if w...,"Tropical, humid",No,Almost through the purging/peeling phase with ...,I've tried (and still own) The Ordinary red pe...,"Not really - a very rare pimple, I tend more o...",I turn 43 in a month and am really trying to g...,Yes
4,4/30/2020 13:08:20,12842,Combination,"Ethylhexyl palmitate, strong sulfates, chemica...","Acne, Skin Texture (roughness, dullness), Other","Osea Ocean Cleanser, Renee Rouleau (RR) Elderb...","Renee Rouleau (RR) Luxe Mint Cleanser, Renee R...","Temperate, a lovely 70F (lucky!)",Overall I have fairly sensitive skin with horm...,"No, but I would like to start to address early...",Azelaic acid has been a good acid for me (not ...,"My acne is adult onset, primarily hormonal, an...","I just moved to Chicago, so lots of temperatur...",Yes


In [6]:
responses['skin_concern'] = responses['skin_concern'].str.lower().str.split(', ')
responses['skin_concern'][:5]

0            [acne, pigmentation, fine lines/wrinkles]
1    [acne, fine lines/wrinkles, skin texture (roug...
2                                 [acne, pigmentation]
3    [fine lines/wrinkles, skin texture (roughness,...
4    [acne, skin texture (roughness, dullness), other]
Name: skin_concern, dtype: object

In [None]:
%%html
<style>
table {float:left}
</style>

# selenium webscrape

## (for later consideration) local database structure?
products schema

|field|description|dtype|
|---|---|---|
|product_id|primary key|int|
|product_name|name of product|str|
|brand_id|brand of product|int|
|product_category_id|category of product|int|
|discontinued|boolean|boolean (0 or 1)|

contents schema

|field|description|dtype|
|---|---|---|
|product_id|product id|int|
|compound_id|compound id|int|
|concentration|amount of ingredient found in product|decimal|

compounds schema

|field|description|dtype|
|---|---|---|
|compound_id|compound id|int|
|compound_name|(common) name of compound|str|
|pubchem_cid|pubchem compound identification|int|
|active|boolean|boolean (0 or 1)|

brands schema

|field|description|dtype|
|---|---|---|
|brand_id|brand id|int|
|brand_name|brand name|str|
|brand_address|brand address|str|

In [26]:
# replace with path of chromedriver.exe location
CHROMEDRIVER_PATH = 'C:/chromedriver_win32/chromedriver.exe'

# open chrome
DRIVER = webdriver.Chrome(CHROMEDRIVER_PATH)

In [None]:
# brands = [
#     'La Roche Posay',
#     'Cerave', 
#     'Cetaphil', 
#     'Timeless Skincare', 
#     'Skinceuticals', 
#     'Stratia Skincare', 
#     'Paula’s Choice', 
#     'CosRX', 
#     'The Ordinary', 
#     'Drunk Elephant', 
#     'Klairs', 
#     'Inkey List', 
#     'Farmacy ', 
#     'First Aid Beauty'
# ]

# brands = set([brand.lower() for brand in brands])
# brands

In [8]:
# get search url from product name
def search_url(product_name, sort='featured'):
    print(product_name)
    product_name = str(product_name).translate(str.maketrans('', '', string.punctuation)).lower()
    product_name = product_name.replace(' ', '+')
#     return 'https://incidecoder.com/search?query=' + product_name
    return 'https://cosdna.com/eng/product.php?q=' + product_name + '&sort=' + sort

In [27]:
test = 'la roche posay antihelios mineral'
url = search_url('la roche posay antihelios mineral')
print(url)

la roche posay antihelios mineral
https://cosdna.com/eng/product.php?q=la+roche+posay+antihelios+mineral&sort=featured


In [28]:
DRIVER.get(url)

In [11]:
def top_result_url(driver):
    try:
        top = driver.find_element_by_xpath(("//table[@class='table table-hover']/tbody/tr/td[1]/a"))
        top_url = top.get_attribute('href')
        return top_url
    except:
        print('no results on cosdna')
        product_name = input('enter new search: ')
        driver.get(search_url(product_name))
        return top_result_url(driver)

In [31]:
top_url = top_result_url(DRIVER)
top_url

'https://cosdna.com/eng/cosmetic_ea81439268.html'

In [35]:
re.findall(".*cosmetic_(.*).html", top_url)

['ea81439268']

In [12]:
DRIVER.get(top_result_url(DRIVER))

In [13]:
# use string-grouper later

def add_to(example, array):
    
    if example not in array:
        array.add(example)
        print(f'{example} was added to array')

In [14]:
def product_name(driver):
    
    # populate brand and product list
    brand_name = driver.find_element_by_class_name('brand-name').text.lower()
    prod_name = driver.find_element_by_class_name('prod-name').text.lower()
    full_name = brand_name + ' ' + prod_name
    full_name = full_name.strip()
    
    print(full_name)

In [15]:
product_name(DRIVER)

la roche-posay antihelios mineral sunscreen spf 50


In [16]:
def ingredients_table(driver):
    table = driver.find_element_by_class_name('chem-list')
    rows = table.find_elements_by_tag_name('tr')
    
    ingredients = []
    for row in rows:
        ingredient_dict = {}
        
        ingredient = row.find_elements_by_tag_name('td')[0].text.strip().lower()
        ingredient_dict['ingredient'] = ingredient
        
        try:
            function = row.find_elements_by_tag_name('td')[1]
            if 'sunscreen' in function.text.lower().split(', '):
                try:
                    uva = re.search("uv[ab]\d", function.find_elements_by_tag_name('img')[0].get_attribute('src'))[0]
                    uvb = re.search("uv[ab]\d", function.find_elements_by_tag_name('img')[1].get_attribute('src'))[0]
                    function = function.text.strip().lower().split(',')
                    function.append(uva)
                    function.append(uvb)
                except:
                    function = function.text.strip().lower().split(',')
            else:
                function = function.text.strip().lower().split(',')
        except:
            function = None
        ingredient_dict['function'] = function
        ingredients.append(ingredient_dict)
    
    ingredients = pd.DataFrame(ingredients)
    return ingredients

In [17]:
ingredients_table(DRIVER)

Unnamed: 0,ingredient,function
0,titanium dioxide 6% (sunscreen),"[pigment, sunscreen, uva2, uvb4]"
1,zinc oxide 5% (sunscreen),"[astringent, sunscreen, uva4, uvb4]"
2,water,[solvent]
3,dimethicone,[emollient]
4,isododecane,"[solvent, fragrance]"
5,c12-15 alkyl benzoate,[emollient]
6,undecane,[emollient]
7,triethylhexanoin,"[synthetic ester, emollient]"
8,isohexadecane,"[solvent, emollient]"
9,nylon-12,[viscosity control]


In [18]:
def full_product_search(product_name, driver, sort='featured'):
    driver.get(search_url(product_name, sort))
    driver.get(top_result_url(driver))
#     product_name(driver)
    brand_name = driver.find_element_by_class_name('brand-name').text.lower()
    prod_name = driver.find_element_by_class_name('prod-name').text.lower()
    print(str(brand_name + ' ' + prod_name).strip())
    print()
    return ingredients_table(driver)

In [19]:
full_product_search(test, DRIVER)

la roche posay antihelios mineral
la roche-posay antihelios mineral sunscreen spf 50



Unnamed: 0,ingredient,function
0,titanium dioxide 6% (sunscreen),"[pigment, sunscreen, uva2, uvb4]"
1,zinc oxide 5% (sunscreen),"[astringent, sunscreen, uva4, uvb4]"
2,water,[solvent]
3,dimethicone,[emollient]
4,isododecane,"[solvent, fragrance]"
5,c12-15 alkyl benzoate,[emollient]
6,undecane,[emollient]
7,triethylhexanoin,"[synthetic ester, emollient]"
8,isohexadecane,"[solvent, emollient]"
9,nylon-12,[viscosity control]


In [None]:
class Product():
    """
    Create a product object containing various features scraped from cosdna.com
    
    Parameters
    ----------
    
    driver : current selenium.webdriver instance
    
    name : str, (default=None)
        name of product
        
    cosdna_id : str, (default=None)
        10-digit identifier at the end of the cosdna url
        
        
    Attributes
    ----------
    
    name : str
        user-defined name of Product
        
    cosdna_name : str
        full name of product as it appears on cosdna
        
    brand : str
        name of brand as it appears on cosdna
        
    product : str
        name of product as it appears on cosdna
        
    cosdna_id : str
        10-digit identifier at the end of the cosdna url
        
    cosdna_url : str
        full cosdna_url
        
    ingredients : dict
        full list of ingredients and their listed functions from cosdna
        
    linked : bool
        whether Product() has been linked with a cosdna listing
        
    synced : bool
        whether Product() has collected information from linked cosdna listing
    """
    
    def __init__(self, driver, name=None, cosdna_id=None):
        self.driver = driver 
        self.name = name
        self.cosdna_id = cosdna_id
        if cosdna_id != None:
            self.cosdna_url = 'https://cosdna.com/eng/cosmetic_' + cosdna_id + '.html'
            self.linked = True
        else:
            self.cosdna_url = None
            self.linked = False
        self.synced = False
    
    
    def link(self, name=None, sort='featured'):
        """
        Searches for top match on cosdna
        
        cosdna_url
        cosdna_id
        
        Parameters
        ----------
        
        name : str, (default=None)
            full name of product 
            if blank: uses name assigned when creating Product() instance
            if not blank: updates name
            
        Returns
        -------
        
        self : object
        """
        
        # if name=None, use existing name. if not, update name
        if name is not None:
            self.name = name
        
        # search for name in cosdna.com
        name = str(self.name).translate(str.maketrans('', '', string.punctuation)).lower()
        name = name.replace(' ', '+')
        search_url = 'https://cosdna.com/eng/product.php?q=' + name + '&sort=' + sort
        self.driver.get(search_url)
        
        # try to get the top result. if successful, changed linked to True
        try:
            top = self.driver.find_element_by_xpath(("//table[@class='table table-hover']/tbody/tr/td[1]/a"))
            self.cosdna_url = top.get_attribute('href')
            self.cosdna_id = re.findall(".*cosmetic_(.*).html", self.cosdna_url)[0]
            self.linked = True
            return self
        except:
            print('no results on cosdna')
            name = input('enter new search: ')
            if name == 'break':
                return self
            else:
                return self.link(name=name)
        
        
    def sync(self, cosdna_url=None):
        """
        Collects information from linked cosdna site:
        
        cosdna_url
        cosdna_id
        brand
        product
        cosdna_name
        ingredients
        synced
        
        Parameters
        ----------
        
        cosdna_url : str, (default=None)
            full cosdna url of product. 
            if blank: uses linked cosdna_url
            if not blank: updates cosdna_url
            
        Returns
        -------
        
        self : object
        """
        # navigate to url and get cosdna_id
        if self.cosdna_url == None:
            if new_url == None:
                return 'create instance with cosdna_id or .link() instance with cosdna_url'
            else:
                self.cosdna_url = cosdna_url
        self.cosdna_id = re.findall(".*cosmetic_(.*).html", self.cosdna_url)[0]
        self.driver.get(self.cosdna_url)

        # get brand and product information from page
        self.brand = self.driver.find_element_by_class_name('brand-name').text.lower()
        self.product = self.driver.find_element_by_class_name('prod-name').text.lower()
        cosdna_name = self.brand + ' ' + self.product
        self.cosdna_name = cosdna_name.strip()

        # get ingredients information from ingredients table
        self.ingredients = {}
        table = self.driver.find_element_by_class_name('chem-list')
        rows = table.find_elements_by_tag_name('tr')
        for row in rows:
            ingredient = row.find_elements_by_tag_name('td')[0].text.strip().lower()
            if 'no results' in ingredient:
                ingredient = row.find_elements_by_class_name('text-muted')[0].text.strip().lower()
                function = None
            else:
                try:
                    function_cell = row.find_elements_by_tag_name('td')[1]
                    function = function_cell.text.strip().lower().split(',')
                    if 'sunscreen' in function:
                        try:
                            uva = re.search("uv[ab]\d", function_cell.find_elements_by_tag_name('img')[0].get_attribute('src'))[0]
                            uvb = re.search("uv[ab]\d", function_cell.find_elements_by_tag_name('img')[1].get_attribute('src'))[0]
                            function.append(uva)
                            function.append(uvb)
                        except:
                            continue
                except:
                    function = None
            self.ingredients[ingredient] = function
        # change state to True
        self.synced = True
        return self
        
        
    def link_sync(self, sort='featured'):
        """
        Should operate the same way as .link().sync()
        """
        self.link(sort=sort)
        self.sync(self.cosdna_url)
        return self

# raw counter for routine

In [20]:
from collections import Counter

In [21]:
sample_routine = [
    'stratia velvet cleansing milk', 
    'the ordinary natural moisturizing factors', 
    'stratia liquid gold', 
    'chemist confessions better oil',
    'la roche posay antihelios mineral'
]

routine_dict = {}

compounds = []
for i in range(len(sample_routine)):
    ingredients = full_product_search(sample_routine[i], DRIVER)
    compounds += list(ingredients['ingredient'])
    
counts = Counter(compounds)
print(counts)

stratia velvet cleansing milk
stratia velvet cleansing milk

the ordinary natural moisturizing factors
the ordinary natural moisturizing factors + ha

stratia liquid gold
stratia liquid gold

chemist confessions better oil
chemist confessions the better oil moisture basics nourishing emollients

la roche posay antihelios mineral
la roche-posay antihelios mineral sunscreen spf 50

Counter({'water': 3, 'glycerin': 3, 'propylene glycol': 3, 'tocopherol': 3, 'allantoin': 2, 'disodium edta': 2, 'diazolidinyl urea': 2, 'cetyl alcohol': 2, 'sodium hyaluronate': 2, 'linoleic acid': 2, 'carbomer': 2, 'pentylene glycol': 2, 'phenoxyethanol': 2, 'hippophae rhamnoides seed oil': 2, 'dimethicone': 2, 'squalane': 2, 'olive oil peg-7 esters': 1, 'caprylic/caprictriglycerides': 1, 'oryza sativa bran oil': 1, 'cetearyl alcohol': 1, 'glyceryl stearate': 1, 'polyacrylamide': 1, 'c13-14 isoparaffin': 1, 'laureth-7': 1, 'ceteareth-20': 1, 'matricaria recutita': 1, 'aloe barbadensis leaf juice': 1, 'potassi

## current goals:
- create local database so that we're not scraping all the time?
- find way to group together ingredients that mean the same thing (use string grouper? need master list)

# inspired by countvectorizer

In [22]:
# this definitely needs to be an object

def routine_vectorizer(routine, driver):
    
    routine_ingredients = []
    all_ingredients = []
    
    for product in routine:
        ingredients = list(full_product_search(product, driver)['ingredient'])
        routine_ingredients.append(ingredients)
        all_ingredients += ingredients
        
    counts = Counter(all_ingredients)
    
    product_vectors = []
    for product_ingredients in routine_ingredients:
        product_vector = []
        for ingredient in all_ingredients:
            if ingredient in product_ingredients:
                product_vector.append(1)
            else:
                product_vector.append(0)
        product_vectors.append(product_vector)
        
    return (all_ingredients, counts, product_vectors)

In [23]:
def product_isolator(ingredient, routine, all_ingredients, product_vectors):
    isolated_products = []
    for i, product_vector in enumerate(product_vectors):
        if product_vector[all_ingredients.index(ingredient)] == 1:
            isolated_products.append(routine[i])
    return isolated_products

In [45]:
routine = [
    'Biore Charcoal Acne Clearing Cleanser', 
    "kiehl's powerful strength line-reducing concentrate 12.5", 
    'Neutrogena Rapid Clear Spot Gel',
    'Roc Retinol Correxion eye cream', 
    'Body Merry Retinol Surge Moisturizer'
]

all_ingredients, counts, product_vectors = routine_vectorizer(routine, DRIVER)

Biore Charcoal Acne Clearing Cleanser
biore charcoal acne clearing cleanser

kiehl's powerful strength line-reducing concentrate 12.5
kiehl's powerful-strength line-reducing concentrate 12.5% vitamin c

Neutrogena Rapid Clear Spot Gel
neutrogena rapid clear stubborn acne spot gel

Roc Retinol Correxion eye cream
roc retinol correxion® eye cream 0.5oz(15ml)

Body Merry Retinol Surge Moisturizer
body merry retinol surge moisturizer



In [46]:
print(counts)

Counter({'glycerin': 4, 'disodium edta': 3, 'ethylhexylglycerin': 2, 'sodium hydroxide': 2, 'water': 2, 'glyceryl stearate': 2, 'panthenol': 2, 'polysorbate 20': 2, 'retinol': 2, 'phenoxyethanol': 2, 'salicylic acid 1%': 1, 'sodium laureth sulfate': 1, 'cocamidopropyl betaine': 1, 'sorbitol': 1, 'laureth-4 carboxylic acid': 1, 'acrylates/c10-30 alkyl acrylate crosspolymer': 1, 'sodium benzoate': 1, 'fragrance': 1, 'menthol': 1, 'polyquaternium-39': 1, 'charcoal powder': 1, 'propylene glycol': 1, 'dimethicone': 1, 'ascorbic acid': 1, 'ethylhexyl palmitate': 1, 'cetyl peg/ppg-10/1 dimethicone': 1, 'dimethicone crosspolymer': 1, 'ascorbyl glucoside': 1, 'cyclohexasiloxane': 1, 'hydroxyethylpiperazine ethane sulfonic acid': 1, 'lauroyl lysine': 1, 'acrylonitrile/methyl methacrylate/vinylidene chloride copolymer': 1, 'polysilicone-11': 1, 'adenosine': 1, 'hydrolyzed hyaluronic acid': 1, 'limonene': 1, 'isobutane': 1, 'citrus aurantium dulcis peel oil': 1, 'lemon peel oil': 1, 'citral': 1, '

In [50]:
product_isolator('panthenol', routine, all_ingredients, product_vectors)

['Roc Retinol Correxion eye cream', 'Body Merry Retinol Surge Moisturizer']

# fast text matching

In [52]:
all_ingredients_df = pd.DataFrame(all_ingredients)
all_ingredients_df

Unnamed: 0,0
0,salicylic acid 1%
1,sodium laureth sulfate
2,cocamidopropyl betaine
3,sorbitol
4,glycerin
...,...
93,pentylene glycol
94,alcohol
95,lecithin
96,phenoxyethanol


In [57]:
string_grouper = StringGrouper(all_ingredients_df[0])
string_grouper = string_grouper.fit()

In [58]:
string_grouper = string_grouper.add_match('ascorbic acid', 'ascorbyl glucoside')
string_grouper.get_groups()

0          salicylic acid 1%
1     sodium laureth sulfate
2     cocamidopropyl betaine
3                   sorbitol
4                   glycerin
               ...          
93          pentylene glycol
94                   alcohol
95                  lecithin
96            phenoxyethanol
97        ethylhexylglycerin
Name: 0, Length: 98, dtype: object

In [None]:


# PRODUCTS = {}

# brands = pd.read_csv('./data\\brands.csv')
# brands.head()

# matches = match_strings(brands['brand_name'])
# matches[matches.left_side != matches.right_side].head()

In [None]:
# test = 'LA ROCHE-POSAY'

# def ngrams(string, n=3):
#     string = string.encode('ascii', errors='ignore').decode()
#     string = string.lower()
    
#     chars_to_remove = [")","(",".","|","[","]","{","}","'"]
#     rx = '[' + re.escape(''.join(chars_to_remove)) + ']'
#     string = re.sub(rx, '', string)
    
#     string = string.replace('&', 'and')
#     string = string.replace(',', ' ')
#     string = string.replace('-', ' ')
#     string = re.sub(' +',' ',string).strip()
#     string = ' '+ string +' '
#     string = re.sub(r'[,-./]|\sBD',r'', string)
    
#     ngrams = zip(*[string[i:] for i in range(n)])
            
#     return [''.join(ngram) for ngram in ngrams]

In [None]:
# # cosine similarity
# # https://bergvca.github.io/2017/10/14/super-fast-string-matching.html

# def cosine_similarity_top(A, B, ntop, lower_bound=0):
#     A = A.tocsr()
#     B = B.tocsr()
#     M, _ = A.shape
#     _, N = B.shape
    
#     idx.dtype = np.int32
    
#     nnz_max = M * ntop
    
#     indptr = np.zeros(M+1, dtype=idx_dtype)
#     indices = np.zeros(nnz_max, dtype=idx_dtype)
#     data = np.zeros(nnz_max, dtype=A.dtype)
    
#     sdt.sparse_dot_topn(
#         M, N, np.asarray(A.indptr, dtype=idx_dtype),
#         np.asarray(A.indices, dtype=idx_dtype),
#         A.data,
#         np.asarray(B.indptr, dtype=idx_dtype),
#         np.asarray(B.indices, dtype=idx_dtype),
#         B.data,
#         ntop,
#         lower_bound,
#         indptr, indices, data)
    
#     return csr_matrix((data,indices,indptr),shape=(M,N))

# sample routine analysis

In [None]:
products = pd.Series(list(products))
products

In [None]:
sample_routine = pd.Series([
    'Skinscript Green Tea Cleanser', 
    'Skinscript Cucumber Toner', 
    'SkinCeuticals CE Ferulic', 
    'Josie Maran Argan Oil',
    'Armada Face and Body Shield 60 sunblock'
])

compounds
for i in range(len(sample_routine)):
    ingredients = full_product_search(sample_routine[i], DRIVER)
    for ingredient in ingredients:
        

In [None]:
matches = match_strings(sample_routine, products, min_similarity=0.1)
matches
# pd.DataFrame({'sample_routine': sample_routine, 'products': products})

In [None]:
matches = match_most_similar(products, sample_routine, min_similarity=0.1)
pd.DataFrame({'sample_routine': sample_routine, 'matches': matches})

to visit later: 'Armada Face and Body Shield 60 sunblock' does not return any results but 'Armada Face and Body Shield 60' does. fuzzy text?

fuzzywuzzy vs. fuzzyset

In [None]:
matches.head()

In [None]:
products

# fuzzy string comparison