In [1]:
import requests
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time
from webdriver_manager.chrome import ChromeDriverManager
import selenium
import json
import flask
from flask import Flask, render_template, request
import random
from werkzeug.datastructures import MultiDict

In [2]:
dataPath = 'data/PerfumesFinal.json'
graphPath = 'data/fragraph.json'
templatePath = 'userinput3.html'

In [3]:
class Graph:
    '''
    A class of objects to represent directed graph data structure.
    '''
    def __init__(self):
        self.nodes = {}
        self.neighbors = {}

    def addNode(self, newKey, newVal):
        '''
        Add a node/vertex to the directed graph by assigning the value of the node to its key/name.
        '''
        self.nodes[newKey] = newVal
        self.neighbors[newKey] = []
        
    def removeNode(self, key):
        '''
        Remove a node and all its connections from the directed graph by its key.
        '''
        if key not in self.nodes:
            print('The node {} does not exist.'.format(key))
        else:
            del self.nodes[key]
            del self.neighbors[key]

    def addEdge(self, key1, key2):
        '''
        Add a neighbor to the list of neighbors of a given node.
        '''
        if key1 not in self.nodes:
            print('The node {} does not exist.'.format(key1))
        elif key2 not in self.nodes:
            print('The node {} does not exist.'.format(key2))
        elif key1 == key2:
            print('A node cannot be its own neighbor.')
        elif key2 in self.neighbors[key1]:
            pass
        else:
            self.neighbors[key1].append(key2)

    def description(self):
        '''
        Print the neighbor list of each node in the directed graph.
        '''
        if len(self.nodes) == 0:
            print('The graph is empty.')
        else:
            print('Node: List of Neighbors')
            for node, neighbors in self.neighbors.items():
                if neighbors == []:
                    print('{}: < No neighbors >'.format(node))
                else:
                    print('{}: {}'.format(node, neighbors))


In [4]:
class Fragrance:
    '''
    A class of objects to represent a perfume/cologne.
    '''
    def __init__(self, parse):
        self.name = parse[0]
        self.brand = parse[1]
        self.gender = parse[2]
        self.accords = parse[3]
        self.top = parse[4]
        self.mid = parse[5]
        self.base = parse[6]
        self.rating = parse[7]
        self.votes = parse[8]
        self.longevity = parse[9]
        self.sillage = parse[10]
        self.value = parse[11]
        self.pic = parse[12]
        self.description = parse[13]
    
    def popularity(self):
        '''
        A metric to measure the popularity of the fragrance as the total number of 'favorable' votes:
        
        popularity 'p' = rating (%) * number of votes
        
        Returns:
        p: Number of favorable votes (int)
        '''
        
        try:
            return round(self.rating*0.2*self.votes)
        except:
            return 0
    
    def primaryAccord(self):
        '''
        Ascertain the dominant accord of the fragrance.
        
        Returns:
        acc: primary accord (str)
        '''
        try:
            pa = list(self.accords.keys())[0].lower()
        except:
            pa = 'no accords'
            
        return pa


In [5]:
class Object:
    def toJSON(self):
        '''
        Prepare abstract data structures to be saved in a JSON cache.
        '''
        return json.dumps(self, default=lambda o: o.__dict__, indent=4)

In [6]:
def activateBrowser():
    '''
    Initialize the browser (Google Chrome).
    
    Parameters:
    None
    
    Returns:
    Open browser
    '''
    executable_path = {"executable_path": ChromeDriverManager().install()}
    return Browser("chrome", **executable_path, headless=False)

In [7]:
def pageSetup():
    '''
    Load the search page on Fragrantica.com and set it up to obtain URLs of the perfume pages.
    
    By default, the Fragrantica search page displays only 30 listings.
    Initially I had implemented crawling, which automated the clicking of 'See more perfumes'.
    The page would sometimes be redirected to a 'Verify you are human' page, and so
    I decided to collect URLs of the perfume pages and automated their scraping.
    Once the user calls this function, he/she is requested to manually select 'See more perfumes'
    on the search page to maximize the number of URLs collected. Each click adds 30 listings.
    
    Parameters:
    None
    
    Returns:
    browser: Active Chrome WebDriver
    '''
    browser = activateBrowser()
    url = "https://www.fragrantica.com/search/"
    browser.visit(url)
    
    return browser

In [8]:
def getURLs(browser):
    '''
    Get a list of URLs of perfume pages on Fragrantica to scrape.
    
    Call this function after manually selecting 'See more perfumes' on the Fragrantica search page.
    
    By default, the Fragrantica search page displays only 30 listings.
    Initially I had implemented crawling, which automated the clicking of 'See more perfumes'.
    Unfortunately, the page would sometimes get redirected to a 'Verify you are human' page.
    Hence, I ditched the crawling and decided to collect URLs of the perfume pages and scrape them sequentially.
    
    Parameters:
    browser: Active Chrome WebDriver
    
    Returns:
    URLs: list of URLs of perfume pages on Fragrantica
    '''
    html = browser.html
    soup = bs(html, "html.parser")
    links = soup.find_all('div', class_='card-section')
    URLs = []
    for i in range(int(len(links)/2)):
        URLs.append(links[2*i + 1].a['href'])
        
    return URLs

In [9]:
def fragScrape(urls):
    '''
    Scrape perfume pages on Fragrantica to obtain fragrance data.
    
    Parameters:
    1. urls: list of URLs obtained from getURLs()
    
    Returns:
    perfumesDict: list of dictionaries containing the scraped, unprocessed perfume data
    '''
    perfumesDict = []
    
    for i in range(len(urls)):
        
        browser = activateBrowser()
        url = urls[i]
        
        try:
            browser.visit(url)
        except:
            break
        
        html = browser.html
        soup = bs(html, "html.parser")
        
        #Getting the fragrance name
        name = soup.find_all("div", class_="cell small-12")[3].find_all("b")[0].get_text()
        
        #Getting the fragrance brand
        brand = soup.find_all("div", class_="cell small-12")[3].find_all("b")[1].get_text()
        
        #Getting the targeted/suggested fragrance gender
        gender = soup.find("small").get_text()
        
        #Getting the fragrance rating and number of votes
        try:
            rating = float(soup.find("p", class_="info-note").find_all("span")[0].get_text())
            votes = int(soup.find("p", class_="info-note").find_all("span")[2].get_text().replace(',', ''))
        except:
            rating = 0
            votes = 0
            print(f"{name} does not have a ranking")

        #Getting the main accords of the fragrance
        try:
            main_accords = soup.find_all("div", class_="cell accord-box")
            accords_dict = {}
            for m in range(len(main_accords)):
                accord_name = main_accords[m].get_text()
                accord_value = float(main_accords[m].find("div", class_="accord-bar")["style"].rsplit("width: ")[1].strip("%;"))
                accords_dict[accord_name] = accord_value
        except:
            accords_dict = {}
            print(f"{name} has no accords.")

        #Getting the fragrance notes
        notes = soup.find_all("div", attrs={"style": "display: flex; justify-content: center; text-align: center; flex-flow: row wrap; align-items: flex-end; padding: 0.5rem;"})

        if len(notes) == 3:
            number = 2
            top_notes_list = []
            middle_notes_list = []
            base_notes_list = []

            for n in range(len(notes[0].find_all("span", class_="link-span"))):
                top_notes_list.append(notes[0].find_all("div")[number].get_text())
                number += 3

            number = 2
            for p in range(len(notes[1].find_all("span", class_="link-span"))):
                middle_notes_list.append(notes[1].find_all("div")[number].get_text())
                number += 3

            number = 2
            for q in range(len(notes[2].find_all("span", class_="link-span"))):
                base_notes_list.append(notes[2].find_all("div")[number].get_text())
                number += 3
                
        elif len(notes) == 2:
            number = 2
            top_notes_list = []
            middle_notes_list = []
            base_notes_list = []

            for r in range(len(notes[0].find_all("span", class_="link-span"))):
                top_notes_list.append(notes[0].find_all("div")[number].get_text())
                number += 3

            number = 2
            for s in range(len(notes[1].find_all("span", class_="link-span"))):
                middle_notes_list.append(notes[1].find_all("div")[number].get_text())
                number += 3
                
        elif len(notes) == 1:
            number = 2
            top_notes_list = []
            middle_notes_list = []
            base_notes_list = []

            for v in range(len(notes[0].find_all("span", class_="link-span"))):
                middle_notes_list.append(notes[0].find_all("div")[number].get_text())
                number += 3
                
        else:
            top_notes_list = []
            middle_notes_list = []
            base_notes_list = []
        
        #Getting the public votes
        voting = soup.find_all("div", class_="cell small-1 medium-1 large-1")

        #Getting the votes on the fragrance longevity
        long_v_weak = int(voting[0].get_text())
        long_weak = int(voting[1].get_text())
        long_moderate = int(voting[2].get_text())
        long_long_last = int(voting[3].get_text())
        long_eternal = int(voting[4].get_text())

        #Getting the votes on the fragrance sillage
        sill_intimate = int(voting[5].get_text())
        sill_moderate = int(voting[6].get_text())
        sill_strong = int(voting[7].get_text())
        sill_enormus = int(voting[8].get_text())

        #Getting the votes on the fragrance price value
        value_w_over = int(voting[14].get_text())
        value_over = int(voting[15].get_text())
        value_ok = int(voting[16].get_text())
        value_good = int(voting[17].get_text())
        value_great = int(voting[18].get_text())
        
        #Getting the link for a picture of the fragrance
        pic = soup.find_all("div", class_="cell small-12")[1].find("img")["src"]
        
        #Getting the description of the fragrance
        try:
            description = soup.find_all("div", class_="cell small-12")[3].get_text()
        except:
            description = "NA"
            print(f"{name} does not have a description.")

        #Loading the perfume data into a dictionary
        perfume_dict = {"name": name,
                        "brand": brand,
                        "gender": gender,
                        "rating": rating,
                        "votes": votes,
                        "accords": accords_dict,
                        "top": top_notes_list,
                        "mid": middle_notes_list,
                        "base": base_notes_list,
                        "longevity":   {"very weak": long_v_weak,
                                        "weak": long_weak,
                                        "moderate": long_moderate,
                                        "long lasting": long_long_last,
                                        "eternal": long_eternal},
                        "sillage":     {"intimate": sill_intimate,
                                        "moderate": sill_moderate,
                                        "strong": sill_strong,
                                        "enormous": sill_enormus},
                        "value": {"way overpriced": value_w_over,
                                        "overpriced": value_over,
                                        "ok": value_ok,
                                        "good value": value_good,
                                        "great value": value_great},
                        "pic": pic,
                        "description": description}

        perfumesDict.append(perfume_dict)
        time.sleep(5)
    
    return perfumesDict

In [10]:
def FragDict(data):
    '''
    Function to process the scraped perfume data (one data point at a time).
    
    Parameters:
    data: Unprocessed, scraped perfume data (dict)
    
    Returns:
    fragdict: Processed perfume data ready to be saved in a JSON cache (dict)
    '''
    fragdict = {}
    
    fragdict['name'] = data['name']
    fragdict['brand'] = data['brand']
    fragdict['gender'] = data['gender']
    fragdict['accords'] = data['accords']
    fragdict['top'] = data['top']
    fragdict['mid'] = data['mid']
    fragdict['base'] = data['base']
    fragdict['rating'] = data['rating']
    fragdict['votes'] = data['votes']
    
    for j, val in data['longevity'].items():
        if val == max(data['longevity'].values()):
            break
    fragdict['longevity'] = j
    
    for j, val in data['sillage'].items():
        if val == max(data['sillage'].values()):
            break
    fragdict['sillage'] = j
    
    for j, val in data['value'].items():
        if val == max(data['value'].values()):
            break
    fragdict['value'] = j
    
    fragdict['pic'] = data['pic']
    fragdict['description'] = data['description'].split(sep = 'Read about this')[0][:-1]
        
    return fragdict

In [11]:
def notesaccordsLower(data):
    '''
    Turns all the frangrance notes into lower case and load in a dictionary to save in a JSON cache for later use.
    
    Parameters:
    data: list of dictionaries of processed perfume data
    
    Returns:
    data: Same data with fragrance notes in lower case, to be saved in a JSON cache.
    '''
    for i in range(len(data)):
        for j in range(len(data[i]['top'])):
            data[i]['top'][j] = data[i]['top'][j].lower()
        for j in range(len(data[i]['mid'])):
            data[i]['mid'][j] = data[i]['mid'][j].lower()
        for j in range(len(data[i]['base'])):
            data[i]['base'][j] = data[i]['base'][j].lower()
            
    return data

In [13]:
#Scraping function calls
#Increase the value of the int 'lim_dat' to scrape more results.
#Remove multiline comment and run.
'''
lim_dat = 1
b1 = pageSetup()
u1 = getURLs(b1)
d1 = fragScrape(u1[:lim_dat])
fdict = []
for i in range(len(d1)):
    fdict.append(FragDict(d1[i]))
d2save = notesaccordsLower(fdict)
'''

'\nlim_dat = 1\nb1 = pageSetup()\nu1 = getURLs(b1)\nd1 = fragScrape(u1[:lim_dat])\nfdict = []\nfor i in range(len(d1)):\n    fdict.append(FragDict(d1[i]))\nd2save = notesaccordsLower(fdict)\n'

In [14]:
def save_cache(dataToSave, fileName):
    '''
    Function to save the scraped data in a JSON cache.

    Parameters
    ----------
    1. dataToSave: Scraped perfume data, saved in a list of dictionaries

    2. fileName: User defined JSON file name with extension (str)

    Returns
    -------
    None
    '''
    fw = open(fileName, 'w')
    dm = json.dumps(dataToSave)
    fw.write(dm)
    fw.close()
    
#save_cache(d2save, dataPath)

In [15]:
def open_cache(cacheName):
    '''
    Function to open the JSON cache file (if it exists) and load the json file into perfumes_list.
    If the cache file does not exist, an empty list is returned.

    Parameters
    ----------
    cacheName: file path (str)

    Returns
    -------
    perfumes_list: list of dictionaries (the data of each perfume is saved in a dictionary)
    '''
    try:
        cache_file = open(cacheName, 'r')
        cache_contents = cache_file.read()
        perfumes_list = json.loads(cache_contents)
        cache_file.close()
    except:
        perfumes_list = []
        
    return perfumes_list


In [16]:
def load_cache(data, k):
    '''
    Reads perfume data loaded from the cache and returns attributes of the perfume at the k^th index.
    
    Parameters:
    1. data: perfume data loaded from cache (dict)
    2. k: index of perfume in loaded data (int)
    
    Returns: A list of the following attributes of the perfume in a relevant format:
    name, brand, gender, accords, top, mid, base, rating, votes, longevity, sillage, value, pic, description
    '''
    name = data[k]['name']
    brand = data[k]['brand']
    gender = data[k]['gender']
    accords = data[k]['accords']
    top = data[k]['top']
    mid = data[k]['mid']
    base = data[k]['base']
    rating = data[k]['rating']
    votes = data[k]['votes']
    longevity = data[k]['longevity']
    sillage = data[k]['sillage']
    value = data[k]['value']
    pic = data[k]['pic']
    description = data[k]['description']
    
    if name == brand:
        name = brand + " (Perfume)"
        
    return name, brand, gender, accords, top, mid, base, rating, votes, longevity, sillage, value, pic, description


In [17]:
#Loading data from the cache and creating a list of objects of the class 'Fragrance'

cachedData = open_cache(dataPath)
frag = []
for i in range(len(cachedData)):
    frag.append(Fragrance(load_cache(cachedData, i)))

In [18]:
def dictOfAttributes(frag):
    '''
    Get a dictionary of all attributes in the perfume data.
    
    Parameters:
    frag: a list of objects of the class 'Fragrance'
    
    Returns:
    dictAtt: a dict of attributes
    '''
    brands = []
    gens = []
    accs = []
    tops = []
    mids = []
    bases = []
    allnotes = []
    longs = []
    sills = []
    vals = []

    for item in frag:

        #Brand nodes
        if item.brand not in brands:
            brands.append(item.brand)

        #Gender nodes
        if item.gender not in gens:
            gens.append(item.gender)

        #Accord nodes
        if item.primaryAccord() not in accs:
            accs.append(item.primaryAccord())

        #Top note nodes
        for n in item.top:
            if n not in tops:
                tops.append(n)

        #Middle note nodes
        for n in item.mid:
            if n not in mids:
                mids.append(n)

        #Base note nodes
        for n in item.base:
            if n not in bases:
                bases.append(n)

        #Longevity nodes
        if item.longevity not in longs:
            longs.append(item.longevity)

        #Sillage nodes
        if item.sillage not in sills:
            sills.append(item.sillage)

        #Price value nodes
        if item.value not in vals:
            vals.append(item.value)

    #List containing all the fragrance notes, i.e., the union of the top, mid, and base notes
    allnotes = tops
    for n in mids:
        if n not in allnotes:
            allnotes.append(n)
    for n in bases:
        if n not in allnotes:
            allnotes.append(n)
            
    dictAtt = {
        'brands' : brands,
        'gens' : gens,
        'accs' : accs,
        'tops' : tops,
        'mids' : mids,
        'bases' : bases,
        'allnotes' : allnotes,
        'longs' : longs,
        'sills' : sills,
        'vals' : vals
    }
    
    return dictAtt

In [19]:
dictAtt = dictOfAttributes(frag)

brands = dictAtt['brands']
gens = dictAtt['gens']
accs = dictAtt['accs']
tops = dictAtt['tops']
mids = dictAtt['mids']
bases = dictAtt['bases']
allnotes = dictAtt['allnotes']
longs = dictAtt['longs']
sills = dictAtt['sills']
vals = dictAtt['vals']

In [20]:
#Creating the graph of fragrances:
fragraph = Graph()

#Adding the attribute nodes to fragraph

for item in brands:
    fragraph.addNode(item,item)
    
for item in gens:
    fragraph.addNode(item,item)

for item in accs:
    fragraph.addNode(item,item)

for item in tops:
    fragraph.addNode(item + " (TOP)",item)

for item in mids:
    fragraph.addNode(item + " (MID)",item)

for item in bases:
    fragraph.addNode(item + " (BASE)",item)

for item in allnotes:
    fragraph.addNode(item + " (ALL)",item)

for item in longs:
    fragraph.addNode(item,item)

for item in sills:
    fragraph.addNode(item,item)

for item in vals:
    fragraph.addNode(item,item)
    
att_list = list(fragraph.nodes.keys())

#Adding the data point (fragrances) nodes to fragraph
name_list = []
for item in frag:
    name_list.append(item.name)
    fragraph.addNode(item.name, item)

In [21]:
#Adding the edges to fragraph
for item in frag:
    nam = item.name
    bd = item.brand
    gn = item.gender
    ac = item.primaryAccord()
    tp = item.top
    md = item.mid
    bs = item.base
    lg = item.longevity
    sl = item.sillage
    vl = item.value
    
    fragraph.addEdge(bd, nam)
    fragraph.addEdge(gn, nam)
    fragraph.addEdge(ac, nam)
    fragraph.addEdge(lg, nam)
    fragraph.addEdge(sl, nam)
    fragraph.addEdge(vl, nam)
    
    for n in tp:
        fragraph.addEdge(n + " (TOP)", nam)
        fragraph.addEdge(n + " (ALL)", nam)
    for n in md:
        fragraph.addEdge(n + " (MID)", nam)
        fragraph.addEdge(n + " (ALL)", nam)
    for n in bs:
        fragraph.addEdge(n + " (BASE)", nam)
        fragraph.addEdge(n + " (ALL)", nam)

In [22]:
def FragLoad(data):
    '''
    Reads perfume data loaded from the cache and returns attributes of the perfume at the k^th index.
    
    Parameters:
    1. data: perfume data loaded from cache (dict)
    2. k: index of perfume in loaded data (int)
    
    Returns: A list of the following attributes of the perfume in a relevant format:
    name, brand, gender, accords, top, mid, base, rating, votes, longevity, sillage, value, pic, description
    '''
    name = data['name']
    brand = data['brand']
    gender = data['gender']
    accords = data['accords']
    top = data['top']
    mid = data['mid']
    base = data['base']
    rating = data['rating']
    votes = data['votes']
    longevity = data['longevity']
    sillage = data['sillage']
    value = data['value']
    pic = data['pic']
    description = data['description']
        
    return name, brand, gender, accords, top, mid, base, rating, votes, longevity, sillage, value, pic, description


In [23]:
def GraphDeconstruct(fragraph, att_list, name_list):
    '''
    Deconstruct the graph into its nodes and adjacency list.
    
    Parameters:
    fragraph: object of the class Graph
    
    Returns:
    decGraph: dict
    '''
    #Loading data from the cache
        
    nodes_att = {}
    nodes_frag = {}
    
    for item in att_list:
        nodes_att[item] = fragraph.nodes[item]
        
    for item in name_list:
        nodes_frag[item] = fragraph.nodes[item]
        
    nodes = {'att': nodes_att, 'frag': nodes_frag}
    
    neigh = {}
    
    for item in att_list:
        neigh[item] = fragraph.neighbors[item]
    
    graph = [nodes, neigh]
    
    return graph

In [24]:
def GraphConstruct(graphPath):
    '''
    Load the graph from the JSON cache into the Graph data structure.
    
    Parameters:
    graphPath: Path of the JSON graph cache with extension
    
    Returns:
    fragraph: Object of the Graph class with the data from the JSON cache
    '''
    graphlist = eval(open_cache(graphPath))
    attributes = graphlist[0]['att']
    frag = graphlist[0]['frag']
    neigh = graphlist[1]
    
    #Creating the graph of fragrances:
    fragraph = Graph()

    #Adding the attribute nodes to fragraph
    for k, v in attributes.items():
        fragraph.addNode(k, v)

    #Adding the fragrance nodes to fragraph
    for k, v in frag.items():
        fg = Fragrance(FragLoad(v))
        fragraph.addNode(k, fg)
        
    #Adding the edges to fragraph
    for k, v in neigh.items():
        for n in v:
            fragraph.addEdge(k, n)
    
    return fragraph

In [25]:
def SubGraph(graph, attributes):
    '''
    Get a subgraph of the original graph containing only the attributes listed
    and the fragrances corresponding to them as its nodes.
    
    Parameters:
    1. graph: an object of the class Graph
    2. attributes: a dictionary of attributes
    
    Returns:
    subgraph: an object of the class Graph, containing only the attributes listed
    and the corresponding fragrances as its nodes.
    '''
    if len(attributes) == 0:
        return graph
    
    pdict = {
        'brands' : [],
        'gens' : [],
        'accs' : [],
        'tops' : [],
        'mids' : [],
        'bases' : [],
        'allnotes' : [],
        'longs' : [],
        'sills' : [],
        'vals' : []
    }
    plist = []
    
    try:
        if attributes['brands'] == []:
            pdict['brands'] = list(graph.nodes.keys())
        else:
            for item in attributes['brands']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['brands'].append(n)
    except:
        pdict['brands'] = list(graph.nodes.keys())
        
        
    
    try:
        if attributes['gens'] == []:
            pdict['gens'] = list(graph.nodes.keys())
        else:   
            for item in attributes['gens']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['gens'].append(n)
    except:
        pdict['gens'] = list(graph.nodes.keys())
        
        
            
    try:
        if attributes['accs'] == []:
            pdict['accs'] = list(graph.nodes.keys())
        else:
            for item in attributes['accs']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['accs'].append(n)
    except:
        pdict['accs'] = list(graph.nodes.keys())
        
        
    
    try:
        if attributes['longs'] == []:
            pdict['longs'] = list(graph.nodes.keys())
        else:
            for item in attributes['longs']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['longs'].append(n)
    except:
        pdict['longs'] = list(graph.nodes.keys())
        
        
            
    try:
        if attributes['sills'] == []:
            pdict['sills'] = list(graph.nodes.keys())
        else:
            for item in attributes['sills']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['sills'].append(n)
    except:
        pdict['sills'] = list(graph.nodes.keys())
        
        
            
    try:
        if attributes['vals'] == []:
            pdict['vals'] = list(graph.nodes.keys())
        else:
            for item in attributes['vals']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['vals'].append(n)
    except:
        pdict['vals'] = list(graph.nodes.keys())
        
        
            
    try:
        if attributes['tops'] == []:
            pdict['tops'] = list(graph.nodes.keys())
        else:
            for item in attributes['tops']:
                plist.append(item)
                pdict['tops'].append(list(graph.neighbors[item]))
            lt = pdict['tops'][0]
            for i in range(1, len(pdict['tops'])):
                lt = list(set(pdict['tops'][i])&set(lt))
            pdict['tops'] = lt
    except:
        pdict['tops'] = list(graph.nodes.keys())
        
        
                
    try:
        if attributes['mids'] == []:
            pdict['mids'] = list(graph.nodes.keys())
        else:
            for item in attributes['mids']:
                plist.append(item)
                pdict['mids'].append(list(graph.neighbors[item]))
            lt = pdict['mids'][0]
            for i in range(1, len(pdict['mids'])):
                lt = list(set(pdict['mids'][i])&set(lt))
            pdict['mids'] = lt
    except:
        pdict['mids'] = list(graph.nodes.keys())
        
        
                
    try:
        if attributes['bases'] == []:
            pdict['bases'] = list(graph.nodes.keys())
        else:
            for item in attributes['bases']:
                plist.append(item)
                pdict['bases'].append(list(graph.neighbors[item]))
            lt = pdict['bases'][0]
            for i in range(1, len(pdict['bases'])):
                lt = list(set(pdict['bases'][i])&set(lt))
            pdict['bases'] = lt
    except:
        pdict['bases'] = list(graph.nodes.keys())
        
        
                
    try:
        if attributes['allnotes'] == []:
            pdict['allnotes'] = list(graph.nodes.keys())
        else:
            for item in attributes['allnotes']:
                plist.append(item)
                pdict['allnotes'].append(list(graph.neighbors[item]))
            lt = pdict['allnotes'][0]
            for i in range(1, len(pdict['allnotes'])):
                lt = list(set(pdict['allnotes'][i])&set(lt))
            pdict['allnotes'] = lt
    except:
        pdict['allnotes'] = list(graph.nodes.keys())
                
    names = list(set(pdict['brands'])&set(pdict['gens'])&set(pdict['accs'])&
                 set(pdict['longs'])&set(pdict['sills'])&set(pdict['vals'])&
                 set(pdict['tops'])&set(pdict['mids'])&set(pdict['bases'])&set(pdict['allnotes']))
    
    subgraph = Graph()
    for item in plist:
        val = graph.nodes[item]
        subgraph.addNode(item,val)
        
    for item in names:
        val = graph.nodes[item]
        subgraph.addNode(item,val)
        
    for i in plist:
        for j in names:
            subgraph.addEdge(i,j)
            
    return subgraph

In [26]:
def inputAttributes(frag):
    '''
    Get a dictionary of all input attributes.
    
    Parameters:
    frag: a list of objects of the class 'Fragrance'
    
    Returns:
    fd: dict of input attributes
    '''
    fd = dictOfAttributes(frag)
    for item in list(fd.keys()):
        att = fd[item]
        if item == 'brands' or item == 'accs' or item == 'tops' or item == 'mids' or item == 'bases':
            att.sort()
        fd[item] = att
    return fd

In [27]:
def FragNameNode(graph):
    '''
    '''
    namelist = []
    nodelist = []
    for item in graph.neighbors:
        for n in graph.neighbors[item]:
            if n not in namelist:
                namelist.append(n)
                nodelist.append(graph.nodes[n])
                
    return nodelist, namelist

In [28]:
def FragRecommender(graph, attributes):
    '''
    Get the list of fragrances (objects of the class Fragrance) containing only the given attributes.
    
    Parameters:
    1. graph: an object of the class Graph
    2. attributes: a dictionary of attributes
    
    Returns:
    subgraph: an object of the class Graph, containing only the attributes listed
    and the corresponding fragrances as its nodes.
    '''
    nodelist, namelist = FragNameNode(graph)
    
    if len(attributes) == 0:
        return nodelist
            
    pdict = {
        'brands' : [],
        'gens' : [],
        'accs' : [],
        'tops' : [],
        'mids' : [],
        'bases' : [],
        'allnotes' : [],
        'longs' : [],
        'sills' : [],
        'vals' : []
    }
    plist = []
    
    try:
        if attributes['brands'] == []:
            pdict['brands'] = namelist
        else:
            for item in attributes['brands']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['brands'].append(n)
    except:
        pdict['brands'] = namelist
        
        
    
    try:
        if attributes['gens'] == []:
            pdict['gens'] = namelist
        else:   
            for item in attributes['gens']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['gens'].append(n)
    except:
        pdict['gens'] = namelist
        
        
            
    try:
        if attributes['accs'] == []:
            pdict['accs'] = namelist
        else:
            for item in attributes['accs']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['accs'].append(n)
    except:
        pdict['accs'] = namelist
        
        
    
    try:
        if attributes['longs'] == []:
            pdict['longs'] = namelist
        else:
            for item in attributes['longs']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['longs'].append(n)
    except:
        pdict['longs'] = namelist
        
        
            
    try:
        if attributes['sills'] == []:
            pdict['sills'] = namelist
        else:
            for item in attributes['sills']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['sills'].append(n)
    except:
        pdict['sills'] = namelist
        
        
            
    try:
        if attributes['vals'] == []:
            pdict['vals'] = namelist
        else:
            for item in attributes['vals']:
                plist.append(item)
                for n in graph.neighbors[item]:
                    pdict['vals'].append(n)
    except:
        pdict['vals'] = namelist
        
        
            
    try:
        if attributes['tops'] == []:
            pdict['tops'] = namelist
        else:
            for item in attributes['tops']:
                plist.append(item)
                pdict['tops'].append(list(graph.neighbors[item]))
            lt = pdict['tops'][0]
            for i in range(1, len(pdict['tops'])):
                lt = list(set(pdict['tops'][i])&set(lt))
            pdict['tops'] = lt
    except:
        pdict['tops'] = namelist
        
        
                
    try:
        if attributes['mids'] == []:
            pdict['mids'] = namelist
        else:
            for item in attributes['mids']:
                plist.append(item)
                pdict['mids'].append(list(graph.neighbors[item]))
            lt = pdict['mids'][0]
            for i in range(1, len(pdict['mids'])):
                lt = list(set(pdict['mids'][i])&set(lt))
            pdict['mids'] = lt
    except:
        pdict['mids'] = namelist
        
        
                
    try:
        if attributes['bases'] == []:
            pdict['bases'] = namelist
        else:
            for item in attributes['bases']:
                plist.append(item)
                pdict['bases'].append(list(graph.neighbors[item]))
            lt = pdict['bases'][0]
            for i in range(1, len(pdict['bases'])):
                lt = list(set(pdict['bases'][i])&set(lt))
            pdict['bases'] = lt
    except:
        pdict['bases'] = namelist
        
        
                
    try:
        if attributes['allnotes'] == []:
            pdict['allnotes'] = namelist
        else:
            for item in attributes['allnotes']:
                plist.append(item)
                pdict['allnotes'].append(list(graph.neighbors[item]))
            lt = pdict['allnotes'][0]
            for i in range(1, len(pdict['allnotes'])):
                lt = list(set(pdict['allnotes'][i])&set(lt))
            pdict['allnotes'] = lt
    except:
        pdict['allnotes'] = namelist
                
    names = list(set(pdict['brands'])&set(pdict['gens'])&set(pdict['accs'])&
                 set(pdict['longs'])&set(pdict['sills'])&set(pdict['vals'])&
                 set(pdict['tops'])&set(pdict['mids'])&set(pdict['bases'])&set(pdict['allnotes']))
    
    perfRec = []
    for item in names:
        perfRec.append(graph.nodes[item])
            
    return perfRec

In [29]:
def outputHTML(inpDict):
    '''
    Prepare the fragrance recommendations based on the user input.
    '''
    graph = GraphConstruct(graphPath)
    perf = FragRecommender(graph, inpDict)
    allperf = FragRecommender(graph, {})
    
    if len(perf) == len(allperf):
        frags = sortFrags(allperf, 7, 'for men') + sortFrags(allperf, 7, 'for women') + sortFrags(allperf, 6, 'for women and men')
        random.shuffle(frags)
        if len(frags) > 20:
            frags = frags[:20]
        outstr = "No selection? I got this! Check These Out..."
    elif len(perf) == 0:
        frags = sortFrags(allperf, 7, 'for men') + sortFrags(allperf, 7, 'for women') + sortFrags(allperf, 6, 'for women and men')
        random.shuffle(frags)
        if len(frags) > 20:
            frags = frags[:20]
        outstr = "No Hits... But Check These Out!"
    else:
        frags = sortFrags(perf, k = None, gen = None)
        outstr = "Your Recommendations"
    
    return [outstr, frags]

In [30]:
def sortFrags(nodes, k = None, gen = None):
    '''
    Obtain a list of the k most popular fragrances in the database.
    
    Parameters:
    k: valid integer
    
    Returns:
    nodes: list of objects of the class Fragrance
    '''
    if k == None:
        k = min(20, len(nodes))
        
    if gen != None:
        new = []
        for item in nodes:
            if item.gender == gen:
                new.append(item)
        nodes = new

    for i in range(len(nodes)-1):
        for j in range(i+1, len(nodes)):
            if nodes[j].popularity() > nodes[i].popularity():
                temp = nodes[j]
                nodes[j] = nodes[i]
                nodes[i] = temp
                
    return nodes[:k]

In [31]:
def formParse(a):
    '''
    Parse the request form.
    
    Parameters:
    a: Immutable list of tuples - raw user input attributes
    
    Return:
    inpDict: dict of processed user input attributes
    '''
    inpDict = {
        'brands' : [],
        'gens' : [],
        'accs' : [],
        'tops' : [],
        'mids' : [],
        'bases' : [],
        'allnotes' : [],
        'longs' : [],
        'sills' : [],
        'vals' : []
    }
    keylist = list(a.keys())
    
    for item in keylist:
        inpDict[item] = a.getlist(item)
        
    for i in range(len(inpDict['tops'])):
        inpDict['tops'][i] += ' (TOP)'
        
    for i in range(len(inpDict['mids'])):
        inpDict['mids'][i] += ' (MID)'
        
    for i in range(len(inpDict['bases'])):
        inpDict['bases'][i] += ' (BASE)'
        
    for i in range(len(inpDict['allnotes'])):
        inpDict['allnotes'][i] += ' (ALL)'
        
    if inpDict['allnotes'] != []:
        inpDict['tops'] = []
        inpDict['mids'] = []
        inpDict['bases'] = []
        
    return inpDict

In [55]:
app = Flask(__name__)
fd = inputAttributes(frag)
graphPath = "data/fragraph.json"
graph = GraphConstruct(graphPath)

@app.route('/inputPage')
def inputPage():
    
    return render_template('userinput3.html', fd = fd, result_list=None) 

@app.route('/outputPage', methods=["POST"])
def outputPage():
    
    rawinp = request.form
    inpDict = formParse(rawinp)
    outHTML = outputHTML(inpDict)    
    
    return render_template("userinput3.html", fd = fd, result_list=outHTML)

if __name__ == '__main__':
    app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [21/Apr/2023 12:11:44] "[33mGET / HTTP/1.1[0m" 404 -
127.0.0.1 - - [21/Apr/2023 12:11:48] "GET /inputPage HTTP/1.1" 200 -
127.0.0.1 - - [21/Apr/2023 12:12:35] "POST /outputPage HTTP/1.1" 200 -
127.0.0.1 - - [21/Apr/2023 12:13:37] "POST /outputPage HTTP/1.1" 200 -
