# Imports

In [6]:
import pandas as pd
from tabulate import tabulate

# Experimental Settings

**Datasets**: Yelp reviews (yelp), Indeed answers (indeed), Amazon Electronics (electronics), Amazon Groceries (grocery)

**Embeddings**: Each dataset has three different embeddings: mask_vec, avg_context_vec, avg_concat_mask_vec. These embeddings share the same corpus and keyphrase list. For example, for _indeeda_
1. **mask_vec**: Has corel embeddings. dataset -> Indeeda-corel. 
2. **avg_context_vec**: Has average sentence embeddings for keywords (where keyword has been masked in a sentence). dataset -> Indeeda-meg-ac
3. **avg_concat_mask_vec**: Has average tokenized embeddings from last 4 layers for keywords (where keyword has been masked in a sentence). dataset -> Indeeda-meg-pt

**Embedding parameters**:

no. of sentences sampled per keyword: 750

bert-model: 
/home/ubuntu/users/nikita/models/bert_finetuned_lm/indeed_reviews_ques_ans
or,
bert-base-uncased


**Clustering algorithms**:

KMeans: dim = 768 (3072 for avg_concat_mask_vec), cluster size = {800, 100}, output_filename=kmeans_{size}.csv

KNN: dim = 768 (3072 for avg_concat_mask_vec), neighbors = {5, 10, 15, 20, 25}, output_filename=knn_{neighbors}.csv

# KMeans

In [245]:
def tabulate_results_kmeans(dataset, query, size):
    print('query: {}, cluster size: {}'.format(query, size))
    corel_kmeans = pd.read_csv('../../data/'+dataset+'-corel/intermediate/kmeans_'+str(size)+'.csv')
    meg_ac_kmeans = pd.read_csv('../../data/'+dataset+'-meg-ac/intermediate/kmeans_'+str(size)+'.csv')
    meg_pt_kmeans = pd.read_csv('../../data/'+dataset+'-meg-pt/intermediate/kmeans_'+str(size)+'.csv')
    get_cluster_kmeans(query, {"mask_vec": corel_kmeans, "avg_context_vec": meg_ac_kmeans, "avg_concat_mask_vec": meg_pt_kmeans})

In [241]:
def get_cluster_kmeans(query, dfs):
    all_neighbors = {}
    for encoding, df in dfs.items():
        entities = []
        match = df[df['entity'] == query]
        if len(match) > 0:
            clus_id = match.iloc[0]['clus_id']
            cluster = df[df['clus_id'] == clus_id]
            entities = cluster['entity'].tolist()
            entities.remove(query)
        all_neighbors[encoding] = entities
    print(tabulate(all_neighbors, headers='keys'))

## Indeed Answers

In [242]:
dataset = 'indeeda'

### Drug Test

In [243]:
query = 'drug test'

In [246]:
tabulate_results_kmeans(dataset, query, 800)

query: drug test, cluster size: 800
mask_vec                   avg_context_vec             avg_concat_mask_vec
-------------------------  --------------------------  -------------------------
ua                         desecration                 driving test
rotating shift             sex change                  onsite interview
cafeteria                  school district             urine test
sales rep                  criminal background check   473 exam
backround check            final paycheck              teller position
fork lift                  tps                         role play
mystery                    review eligible             math test
half pay                   employment agency           personality test
medical emergency          truck driver                appeal
criminal background check  review ready                puzzle
bee                        sine                        ua
temp service               airline industry            assessment test
dictatorship

In [247]:
tabulate_results_kmeans(dataset, query, 1000)

query: drug test, cluster size: 1000
mask_vec                     avg_context_vec        avg_concat_mask_vec
---------------------------  ---------------------  ------------------------
test                         tps                    dot physical
backround check              test                   performance review
saliva                       5 panel                math test
white paper                  pensacola florida      bg check
commission structure         rectal                 back ground check
retirement plan              finger prints          back round check
hand book                    urination              online application
swab test                    cheek swab             annual review
lunch break                  oral drug test         final interview
uniform policy               oral swab              probation period
cafeteria                    urine test             assessment test
fingerprint                  urine                  grace period
finger pr

### Dress Code

In [248]:
query = 'dress code'

In [249]:
tabulate_results_kmeans(dataset, query, 800)

query: dress code, cluster size: 800
mask_vec                     avg_context_vec        avg_concat_mask_vec
---------------------------  ---------------------  ---------------------------
typing speed                 proper uniform         typing speed
initial training             business casual        kinks
performance review           comfortable clothing   quarterly review
assessment test              suspenders             constitution
exact amount                 tunic                  rehire process
90 day probation period      professional           uniform policy
dress attire                 casual clothes         catapult
goldfish                     dress clothes          maximum age
pat test                     casual attire          open door policy
3rd interview                parrot                 customer base
pension plan                 leopard                solution
overnight shift              plain dress            bureaucracy
friday / saturday            jumpsu

In [250]:
tabulate_results_kmeans(dataset, query, 1000)

query: dress code, cluster size: 1000
mask_vec              avg_context_vec      avg_concat_mask_vec
--------------------  -------------------  ---------------------------
art                   dress clothes        uniform policy
main reason           cap                  exact date
sake                  cart attendants      sales goals
adjectives            deer valley          employee assistance program
adventure             watch                nature
golden rule           hard hat             company policy
attendance policy     uniform policy       pay rate
shopping experience   caps                 working conditions
organization          horace               hourly wages
culture               sps                  hourly rate
actress               car hops             legislation
gravity               uniform shirts       producer
economy               personal appearance  vacation policy
worst place           dress casual         minimum wages
brand extension       cover       

### Hiring Age

In [251]:
query = 'hiring age'

In [252]:
tabulate_results_kmeans(dataset, query, 800)

query: hiring age, cluster size: 800
mask_vec             avg_context_vec       avg_concat_mask_vec
-------------------  --------------------  ---------------------
minimum age          ova                   starting pay
minimal age          minimum age           legal working age
density              youngest age          starting wage
age range            deptford              parade
youngest age         minimal age           minimum age
legal age            age requirement       starting salary
age requirement      17 year olds          decimal
legal working age    hazardous material    starting rate
succession planning  baling                age requirement
                     age limit             starting wages
                     years old             youngest age
                     age range             starting point
                     legal working age     maximum wage
                     legal age             minimal age
                     yrs old               age 

In [253]:
tabulate_results_kmeans(dataset, query, 1000)

query: hiring age, cluster size: 1000
mask_vec             avg_context_vec       avg_concat_mask_vec
-------------------  --------------------  ---------------------
youngest age         years old             minimal age
wild card            brandenburg kentucky  age requirement
rehire process       age requirement       age range
legal age            age limit             legal age
age limit            youngest age          youngest age
age range            minimal age           legal working age
legal working age    legal age             age limit
moreno valley        age range             minimum age
minimum age          legal working age
age requirement      mascot
succession planning  ova
kicker               retirement age
minimal age          minimum age
maximum age          deptford
office lady          shelton washington


### Dental Benefits

In [254]:
query = 'dental benefits'

In [255]:
tabulate_results_kmeans(dataset, query, 800)

query: dental benefits, cluster size: 800
mask_vec                  avg_context_vec            avg_concat_mask_vec
------------------------  -------------------------  ---------------------
adp                       commuter                   carpool
credit history            dental insurance           dental insurance
excellent service         ad&d                       dental / vision
previous experience       domestic partners          mining industry
aflac                     blue shield                psychiatrist
special needs             life insurance             disability insurance
food stamps               united healthcare          savings accounts
managerial positions      blue cross blue shield     401 k
transport                 health care insurance      surrogacy
medical marijuana         dentist                    vision insurance
state law                 profit sharing             stock market
great customer service    dental and vision          dental vision
finger

In [256]:
tabulate_results_kmeans(dataset, query, 1000)

query: dental benefits, cluster size: 1000
mask_vec                     avg_context_vec              avg_concat_mask_vec
---------------------------  ---------------------------  ---------------------
cross training               obama                        thrift savings plan
quality service              arizona state                401 k
design                       blue cross blue shield       ad&d
customer experience          telecommuting                disability insurance
customer service             benefits package             life insurance
customer relations           lifetime                     psychological testing
talent management            ivf                          metlife
medical dental               domestic partners            aflac
basic math                   tuition reimbursement        savings accounts
team building                maternity leave              welding
customer satisfaction        healthcare                   federal records
genesis          

### Company

In [257]:
query = 'company'

In [258]:
tabulate_results_kmeans(dataset, query, 800)

query: company, cluster size: 800
mask_vec                      avg_context_vec               avg_concat_mask_vec
----------------------------  ----------------------------  ----------------------------
city                          door dash                     fair labor standards act
convenience store             jurisdiction                  erc
bones                         corporation                   washington post
department store              meijer                        post office
restaurant                    young age                     private sector
commercial banking            pepsico                       district managers
super center                  distribution centers          lgbt community
local store                   casino                        united states postal service
headquarters                  united states                 eeoc
county                        dc                            home office
channel                       g4s             

In [259]:
tabulate_results_kmeans(dataset, query, 1000)

query: company, cluster size: 1000
mask_vec              avg_context_vec               avg_concat_mask_vec
--------------------  ----------------------------  -------------------------
bakery                amazon fulfillment            middle class
restaurant            hiring practices              cia
commissary            independent                   fair labor standards act
department            aldi                          postal service
house                 insurance company             private sector
city                  mayor                         sweatshops
dressing room         advertising                   team dynamics
garden                capital                       federal government
pharmacy              co   workers                  bottom line
milford connecticut   private                       millennium
warehouse             trump                         stereo
rain                  doral                         economy
department store      amazons        

## Yelp Reviews

In [260]:
dataset = 'yelp'

### Food

In [261]:
query = 'food'

In [262]:
tabulate_results_kmeans(dataset, query, 800)

query: food, cluster size: 800
mask_vec                    avg_context_vec               avg_concat_mask_vec
--------------------------  ----------------------------  ---------------------
nasi kuning                 authentic korean food         wine selection
portion size                southern_comfort food         whiskey selection
concept                     so many choices               d cor
tap beer selection          local bakery                  draft_beer list
packaging                   shitty customer_service       outdoor setting
whiskey list                draft_beer selection          caesar_salad dressing
vocals                      new york bagels               turnover rate
graphic design              lebanese food                 dining environment
cigarette smell             whiskey selection             draft selection
typeface                    decent_sized portions         customer services
salsa music                 hard_working people           ordering syst

In [264]:
tabulate_results_kmeans(dataset, query, 1000)

query: food, cluster size: 1000
mask_vec                    avg_context_vec                  avg_concat_mask_vec
--------------------------  -------------------------------  ------------------------
ambient music               draft_beer list                  monolith
vantage point               waiter andrew                    pharmacy staff
vibes                       first class                      color and texture
view                        symbiotic                        ventilation system
user interface              exceptionally busy               weekend_brunch menu
wine list                   somewhat slow                    atmosphere
stunning view               vandal                           four star rating
window view                 octagon                          value proposition
garden centre               standing_room only               enema
soundproofing               overly attentive                 store layout
diner vibe                  prestige         

### Restaurant (C)

In [408]:
query = 'restaurant'

In [266]:
tabulate_results_kmeans(dataset, query, 800)

query: restaurant, cluster size: 800
mask_vec                 avg_context_vec           avg_concat_mask_vec
-----------------------  ------------------------  ----------------------------
gallery                  luxor                     mammal
wsm                      twilight                  independent business
newest location          beaten path               local dive_bar
shopping centre          gondola                   donut shop
chevron                  rio buffet                bakery
swap meet                palazzo hotel             brazilian steakhouse
meat dept                hell hole                 diner
arcade                   carnival buffet           local sports_bar
general store            music factory             steakhouse
mansion                  hotel                     british pub
vieux montr al           strip hotels              lichen
lobby casino             texas station             style diner
gate                     riviera                   et

In [267]:
tabulate_results_kmeans(dataset, query, 1000)

query: restaurant, cluster size: 1000
mask_vec              avg_context_vec                  avg_concat_mask_vec
--------------------  -------------------------------  ---------------------
grocer                completely different             bakery
college town          steak n shake                    mall
hogwarts              yacht                            bake shop
nightclub             dominoes                         nightclub
pawn shop             authentic_italian pizza          motel
museum                donut shop                       theater
coffee roaster        sushi samba                      movie theatre
retirement community  miyako                           casino resort
taqueria              hangover cure                    italian bistro
music festival        tehran                           casino hotel
cookhouse             hookah                           library
mansion               patchouli                        feline
hyatt hotel           hot_dog joi

### Atmosphere

In [268]:
query = 'atmosphere'

In [269]:
tabulate_results_kmeans(dataset, query, 800)

query: atmosphere, cluster size: 800
mask_vec                    avg_context_vec               avg_concat_mask_vec
--------------------------  ----------------------------  ---------------------
nasi kuning                 gastropub                     wine selection
portion size                quick and painless            whiskey selection
concept                     beautiful space               food
tap beer selection          healthy food choices          d cor
packaging                   decent wine_list              draft_beer list
whiskey list                authentic vietnamese_cuisine  outdoor setting
vocals                      cozy setting                  caesar_salad dressing
graphic design              enjoyable dining_experience   turnover rate
cigarette smell             neighbourhood pub             dining environment
typeface                    an adorable                   draft selection
salsa music                 infrastructure                customer services
op

In [270]:
tabulate_results_kmeans(dataset, query, 1000)

query: atmosphere, cluster size: 1000
mask_vec                    avg_context_vec               avg_concat_mask_vec
--------------------------  ----------------------------  ------------------------
ambient music               minimalism                    monolith
vantage point               free wifi                     pharmacy staff
vibes                       impeccable service            color and texture
view                        lovely ambience               ventilation system
user interface              fun environment               food
wine list                   family_oriented restaurant    weekend_brunch menu
food                        authentic korean food         four star rating
stunning view               simple fare                   value proposition
window view                 vintage decor                 enema
garden centre               unique dining_experience      store layout
soundproofing               stiff drinks                  spp
diner vibe         

### Service (C)

In [271]:
query = 'customer service'

In [272]:
tabulate_results_kmeans(dataset, query, 800)

query: customer service, cluster size: 800
mask_vec                avg_context_vec    avg_concat_mask_vec
----------------------  -----------------  ---------------------
order accuracy
vandalism
enlightenment
digestion
sentience
service recovery
rude behavior
wisdom
intoxication
health standards
sexual orientation
tenure
yelp ratings
moustache
sanitation
cruise control
social media marketing
unprofessionalism
problem solving
exploration
body language
poor attitudes
locality
parenting
jihad
rudeness
communication
breast feeding
programming
laundry service
ethics
friendliness
bad behavior
lack thereof
hygiene
muscle memory
honesty
quality assurance
market share
personal space
critical mass
higher standards
quality control
behavior
diligence
torque
grammar
employment
etiquette
judgement
leadership
customer loyalty
productivity
convenience
turnover
puerto ricans
giant order
economics
gross negligence
product placement
safety
philanthropy
sustainability
discrimination
mastication
employee 

In [273]:
tabulate_results_kmeans(dataset, query, 1000)

query: customer service, cluster size: 1000
mask_vec                   avg_context_vec    avg_concat_mask_vec
-------------------------  -----------------  ---------------------
plumbing
action
marketing
negligence
poor attitudes
productivity
foot traffic
order accuracy
success
vandalism
programming
background noise
convenience
availability
ganja
glitz
customer interaction
intercourse
reservation policy
communication
pacing
innovation
punctuation
sustainability
orientation
parenting
inflated prices
locality
drunk munchies
culture shock
crowds
revenue
cloning
leadership
citizenship
gentrification
laundry service
damage
cruise control
electricity
chrysler
exploration
dated decor
proof
costumer service
customer engagement
yelp ratings
testosterone
greatness
traffic
punishment
nostalgia
late night grub
anesthesia
animation
teamwork
breast feeding
negative space
crowd control
relaxation
profit
turnover
employment
smoke smell
high turnover
street cred
capacity
big business
lowest common deno

### Mexican

In [274]:
query = 'mexican'

In [275]:
tabulate_results_kmeans(dataset, query, 800)

query: mexican, cluster size: 800
mask_vec                avg_context_vec           avg_concat_mask_vec
----------------------  ------------------------  ----------------------
indian                  old fashioned             buddhist
palestinian             savoy                     customer_service 101
isan                    venezuelan                american fusion
deli style              chipotle                  indian
southern soul           fraser                    michelin
traditional japanese    ramen                     caribbean
mexican inspired        omnivore                  americas
tibetan                 oktoberfest               european
pakistani               korean                    americanized chinese
bodybuilding            asian                     industrial
uyghur                  grass                     americanized mexican
thai                    caribbean                 redneck
middle eastern          argentina                 polish
authentic jamai

In [276]:
tabulate_results_kmeans(dataset, query, 1000)

query: mexican, cluster size: 1000
mask_vec             avg_context_vec               avg_concat_mask_vec
-------------------  ----------------------------  ----------------------------
flange               stoner                        pakistan
bellwether           less pretentious              tailgating
southern american    argentine                     lebanese
scandinavian         superlative                   japanese korean
palestinian          galway                        real italian
central american     your typical                  laotian
iranian              usual fare                    piccadilly
balkan               oslo                          ukrainian
salvadoran           traditional irish             peasant
austrian             irish                         authentic hawaiian
slovenian            typical mexican restaurant    mexican american
northern             new age                       guyanese
ole                  british                       french
cana

### Dessert

In [277]:
query = 'dessert'

In [278]:
tabulate_results_kmeans(dataset, query, 800)

query: dessert, cluster size: 800
mask_vec                   avg_context_vec            avg_concat_mask_vec
-------------------------  -------------------------  ---------------------
ny style pizza             an arnold_palmer           turkish tea
lebanese cuisine           cabernet                   chocolate custard
south indian cuisine       honeysuckle                snow cream
eating healthy             soju                       yakiniku
italian american cuisine   jasmine                    barilla
cuisine                    190 octane                 chaat
fake meat                  latte                      aged beef
italian fare               chrysanthemum tea          bottom shelf
middle eastern cuisine     unlimited beer             biosphere
brewed beer                merlot                     polish dog
standard breakfast fare    an iced_americano          nestea
american fare              peach bellini              hitachi
junk                       old fashions      

In [279]:
tabulate_results_kmeans(dataset, query, 1000)

query: dessert, cluster size: 1000
mask_vec                     avg_context_vec            avg_concat_mask_vec
---------------------------  -------------------------  -----------------------------
red velvet cake              clan                       republic ramen
unlimited champagne          extra bonus                french_press coffee
tap water                    chilly day                 pistachio cake
cider                        pizza                      banana tempura
craft beer                   topos                      naan bread
shave ice                    henna                      fresh pressed_juice
secret recipe                old fashioneds             mushroom soup
sangria                      coffee                     tteokbokki
french pressed coffee        some tweaking              roasted green_tea
bubble milk tea              dogs                       butter chicken
bottomless coffee            beer                       bibimbap
cold pizza              

# KNN

In [1]:
def tabulate_results_knn(dataset, query, size, word=None):
    filename = ""
    if word is None:
        filename = 'knn_'+str(size)+'.csv'
    else:
        filename = 'knn_'+str(size)+'.'+str(word)+'.csv'

    print('query: {}, cluster size: {}'.format(query, size))
    corel_knn = pd.read_csv('../../data/'+dataset+'-corel/intermediate/'+filename)
    meg_ac_knn = pd.read_csv('../../data/'+dataset+'-meg-ac/intermediate/'+filename)
    meg_pt_knn = pd.read_csv('../../data/'+dataset+'-meg-pt/intermediate/'+filename)
    get_cluster_knn(query, {"mask_vec": corel_knn, "avg_context_vec": meg_ac_knn, "avg_concat_mask_vec": meg_pt_knn})

In [341]:
def tabulate_results_knn_meg(dataset, query, size):
    print('query: {}, cluster size: {}'.format(query, size))
    meg_ac_knn = pd.read_csv('/home/ubuntu/users/nikita/src/meg-kb/data/'+dataset+'-meg-ac/intermediate/knn_'+str(size)+'.csv')
    meg_pt_knn = pd.read_csv('/home/ubuntu/users/nikita/src/meg-kb/data/'+dataset+'-meg-pt/intermediate/knn_'+str(size)+'.csv')
    get_cluster_knn(query, {"avg_context_vec": meg_ac_knn, "avg_concat_mask_vec": meg_pt_knn})

In [342]:
def tabulate_results_knn_meg_word(dataset, query, size, word):
    print('query: {}, cluster size: {}'.format(query, size))
    meg_ac_knn = pd.read_csv('/home/ubuntu/users/nikita/src/meg-kb/data/'+dataset+'-meg-ac/intermediate/knn_'+str(size)+'.'+str(word)+'.csv')
    meg_pt_knn = pd.read_csv('/home/ubuntu/users/nikita/src/meg-kb/data/'+dataset+'-meg-pt/intermediate/knn_'+str(size)+'.'+str(word)+'.csv')
    get_cluster_knn(query, {"avg_context_vec": meg_ac_knn, "avg_concat_mask_vec": meg_pt_knn})

In [2]:
def get_cluster_knn(query, dfs):
    all_neighbors = {}
    for encoding, df in dfs.items():
        entities = []
        match = df[df['entity'] == query]
        if len(match) > 0:
            entities = match['neighbor'].tolist()
        all_neighbors[encoding] = entities
    print(tabulate(all_neighbors, headers='keys'))

## Indeed Answers

In [51]:
dataset = 'indeeda'

### Drug Test

In [45]:
query = 'drug test'

In [46]:
tabulate_results_knn(query, 5)

query: drug test, cluster size: 5
mask_vec           avg_context_vec    avg_concat_mask_vec
-----------------  -----------------  ---------------------
back ground check  background check   back ground check
background check   back ground check  background check
random drug test   test               urine test
credit check       urine test         backround check
backround check    credit check       test


In [47]:
tabulate_results_knn(dataset, query, 10)

query: drug test, cluster size: 10
mask_vec           avg_context_vec    avg_concat_mask_vec
-----------------  -----------------  ---------------------
back ground check  background check   back ground check
background check   back ground check  background check
random drug test   test               urine test
credit check       urine test         backround check
backround check    credit check       test
swab test          back round check   random drug test
math test          drug tests         urinalysis
ua                 urine drug test    swab test
job offer          urinalysis         ua
back round check   random drug test   math test


In [48]:
tabulate_results_knn(dataset, query, 15)

query: drug test, cluster size: 15
mask_vec           avg_context_vec    avg_concat_mask_vec
-----------------  -----------------  ---------------------
back ground check  background check   back ground check
background check   back ground check  background check
random drug test   test               urine test
credit check       urine test         backround check
backround check    credit check       test
swab test          back round check   random drug test
math test          drug tests         urinalysis
ua                 urine drug test    swab test
job offer          urinalysis         ua
back round check   random drug test   math test
typing test        backround check    saliva test
pension            ua                 back round check
lunch break        swab test          credit check
drug tests         assessment test    assessment test
401k plan          pass               urine drug test


In [49]:
tabulate_results_knn(dataset, query, 20)

query: drug test, cluster size: 20
mask_vec           avg_context_vec      avg_concat_mask_vec
-----------------  -------------------  ---------------------
back ground check  background check     back ground check
background check   back ground check    background check
random drug test   test                 urine test
credit check       urine test           backround check
backround check    credit check         test
swab test          back round check     random drug test
math test          drug tests           urinalysis
ua                 urine drug test      swab test
job offer          urinalysis           ua
back round check   random drug test     math test
typing test        backround check      saliva test
pension            ua                   back round check
group interview    swab test            credit check
lunch break        assessment test      assessment test
drug tests         pass                 urine drug test
401k plan          random drug testing  personality

In [50]:
tabulate_results_knn(dataset, query, 25)

query: drug test, cluster size: 25
mask_vec           avg_context_vec      avg_concat_mask_vec
-----------------  -------------------  ---------------------
back ground check  background check     back ground check
background check   back ground check    background check
random drug test   test                 urine test
credit check       urine test           backround check
backround check    credit check         test
swab test          back round check     random drug test
math test          drug tests           urinalysis
ua                 urine drug test      swab test
job offer          urinalysis           ua
back round check   random drug test     math test
typing test        backround check      saliva test
pension            ua                   back round check
group interview    swab test            credit check
lunch break        assessment test      assessment test
drug tests         pass                 urine drug test
401k plan          random drug testing  personality

### Dress Code

In [52]:
query = 'dress code'

In [53]:
tabulate_results_knn(dataset, query, 5)

query: dress code, cluster size: 5
mask_vec             avg_context_vec    avg_concat_mask_vec
-------------------  -----------------  ---------------------
pay scale            uniform policy     uniform policy
uniform policy       casual wear        pay scale
pay rate             clothing           attendance policy
work / life balance  professional       culture
attendance policy    strict dress code  dress attire


In [54]:
tabulate_results_knn(dataset, query, 10)

query: dress code, cluster size: 10
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy


In [21]:
tabulate_results_knn(query, 15)

query: dress code, cluster size: 15
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy
training program      cap                pay structure
work life balance     watch              point system
contract              casual dress       interview process
union                 caps               union
atmosphere            sport              dress codes


In [55]:
tabulate_results_knn(dataset, query, 20)

query: dress code, cluster size: 20
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy
training program      cap                pay structure
work life balance     watch              point system
contract              casual dress       interview process
union                 caps               union
atmosphere            sport              dress codes
base salary       

In [56]:
tabulate_results_knn(dataset, query, 25)

query: dress code, cluster size: 25
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
culture               professional       culture
work / life balance   strict dress code  dress attire
attendance policy     footwear           bonus structure
pay structure         dress codes        commission structure
commission structure  business casual    pay rate
point system          brand              training program
dress attire          gym                open door policy
bonus structure       cap                pay structure
training program      watch              point system
work life balance     casual dress       interview process
contract              caps               union
union                 sport              dress codes
atmosphere        

### Hiring Age

In [24]:
query = 'hiring age'

In [57]:
tabulate_results_knn(dataset, query, 5)

query: dress code, cluster size: 5
mask_vec             avg_context_vec    avg_concat_mask_vec
-------------------  -----------------  ---------------------
pay scale            uniform policy     uniform policy
uniform policy       casual wear        pay scale
pay rate             clothing           attendance policy
work / life balance  professional       culture
attendance policy    strict dress code  dress attire


In [58]:
tabulate_results_knn(dataset, query, 10)

query: dress code, cluster size: 10
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy


In [59]:
tabulate_results_knn(dataset, query, 15)

query: dress code, cluster size: 15
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy
training program      cap                pay structure
work life balance     watch              point system
contract              casual dress       interview process
union                 caps               union
atmosphere            sport              dress codes


In [28]:
tabulate_results_knn(query, 20)

query: hiring age, cluster size: 20
mask_vec           avg_context_vec    avg_concat_mask_vec
-----------------  -----------------  ---------------------
age requirement    age requirement    age requirement
minimum age        minimum age        age range
age range          youngest age       minimum age
legal working age  minimal age        age limit
youngest age       legal working age  youngest age
starting wage      age range          starting wage
pay rate           legal age          legal working age
starting pay       age limit          starting pay
age limit          years old          starting rate
legal age          17 year olds       starting salary
pay scale          starting wage      pay rate
starting salary    starting pay       legal age
maximum age        starting salary    hourly rate
hourly rate        starting rate      minimal age
starting rate      average            pay scale
hourly pay         retirement age     hourly pay
base pay           pay rate           

In [60]:
tabulate_results_knn(dataset, query, 25)

query: dress code, cluster size: 25
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
culture               professional       culture
work / life balance   strict dress code  dress attire
attendance policy     footwear           bonus structure
pay structure         dress codes        commission structure
commission structure  business casual    pay rate
point system          brand              training program
dress attire          gym                open door policy
bonus structure       cap                pay structure
training program      watch              point system
work life balance     casual dress       interview process
contract              caps               union
union                 sport              dress codes
atmosphere        

### Dental Benefits

In [30]:
query = 'dental benefits'

In [61]:
tabulate_results_knn(dataset, query, 5)

query: dress code, cluster size: 5
mask_vec             avg_context_vec    avg_concat_mask_vec
-------------------  -----------------  ---------------------
pay scale            uniform policy     uniform policy
uniform policy       casual wear        pay scale
pay rate             clothing           attendance policy
work / life balance  professional       culture
attendance policy    strict dress code  dress attire


In [62]:
tabulate_results_knn(dataset, query, 10)

query: dress code, cluster size: 10
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy


In [63]:
tabulate_results_knn(dataset, query, 15)

query: dress code, cluster size: 15
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy
training program      cap                pay structure
work life balance     watch              point system
contract              casual dress       interview process
union                 caps               union
atmosphere            sport              dress codes


In [64]:
tabulate_results_knn(dataset, query, 20)

query: dress code, cluster size: 20
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy
training program      cap                pay structure
work life balance     watch              point system
contract              casual dress       interview process
union                 caps               union
atmosphere            sport              dress codes
base salary       

In [65]:
tabulate_results_knn(dataset, query, 25)

query: dress code, cluster size: 25
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
culture               professional       culture
work / life balance   strict dress code  dress attire
attendance policy     footwear           bonus structure
pay structure         dress codes        commission structure
commission structure  business casual    pay rate
point system          brand              training program
dress attire          gym                open door policy
bonus structure       cap                pay structure
training program      watch              point system
work life balance     casual dress       interview process
contract              caps               union
union                 sport              dress codes
atmosphere        

### Company

In [36]:
query = 'company'

In [66]:
tabulate_results_knn(dataset, query, 5)

query: dress code, cluster size: 5
mask_vec             avg_context_vec    avg_concat_mask_vec
-------------------  -----------------  ---------------------
pay scale            uniform policy     uniform policy
uniform policy       casual wear        pay scale
pay rate             clothing           attendance policy
work / life balance  professional       culture
attendance policy    strict dress code  dress attire


In [67]:
tabulate_results_knn(dataset, query, 10)

query: dress code, cluster size: 10
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy


In [39]:
tabulate_results_knn(query, 15)

query: company, cluster size: 15
mask_vec        avg_context_vec    avg_concat_mask_vec
--------------  -----------------  ----------------------------
corporation     usps               post office
organization    union              organization
post office     management         postal service
usps            publix             usps
postal service  walgreens          corporation
restaurant      government         restaurant
ceo             private            corporate office
union           starbucks          community
warehouse       macy 's            united states postal service
gm              public             hr department
plant           store level        call center
store manager   apple              home office
department      security           ceo
manager         jcp                union
community       retail             franchise owner
                community


In [68]:
tabulate_results_knn(dataset, query, 20)

query: dress code, cluster size: 20
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
work / life balance   professional       culture
attendance policy     strict dress code  dress attire
pay structure         footwear           bonus structure
commission structure  dress codes        commission structure
point system          business casual    pay rate
dress attire          brand              training program
bonus structure       gym                open door policy
training program      cap                pay structure
work life balance     watch              point system
contract              casual dress       interview process
union                 caps               union
atmosphere            sport              dress codes
base salary       

In [69]:
tabulate_results_knn(dataset, query, 25)

query: dress code, cluster size: 25
mask_vec              avg_context_vec    avg_concat_mask_vec
--------------------  -----------------  ---------------------
pay scale             uniform policy     uniform policy
uniform policy        casual wear        pay scale
pay rate              clothing           attendance policy
culture               professional       culture
work / life balance   strict dress code  dress attire
attendance policy     footwear           bonus structure
pay structure         dress codes        commission structure
commission structure  business casual    pay rate
point system          brand              training program
dress attire          gym                open door policy
bonus structure       cap                pay structure
training program      watch              point system
work life balance     casual dress       interview process
contract              caps               union
union                 sport              dress codes
atmosphere        

## Yelp Reviews

In [3]:
dataset = 'yelp'

### Restaurant (C)

In [4]:
query = 'restaurant'

In [7]:
tabulate_results_knn(dataset, query, 15)

query: restaurant, cluster size: 15
mask_vec       avg_context_vec     avg_concat_mask_vec
-------------  ------------------  ---------------------
resturant      hotel               buffet
restaraunt     buffet              hotel
restraunt      keg                 brewery
resteraunt     diner               bakery
store          chinese restaurant  donut shop
establishment  theater             hotel casino
resturaunt     brewery             truck
location       pub                 steakhouse
joint          area                resort
buffet         steak house         property
hotel          steakhouse          ice_cream shop
bakery         starbucks           japanese restaurant
cafe           neighbourhood       steak house
brewery        company             coffeehouse
salon          mexican restaurant  theater


In [94]:
tabulate_results_knn(dataset, query, 20)

query: restaurant, cluster size: 20
mask_vec       avg_context_vec        avg_concat_mask_vec
-------------  ---------------------  ---------------------
resturant      hotel                  buffet
restaraunt     buffet                 hotel
restraunt      keg                    brewery
resteraunt     diner                  bakery
store          chinese restaurant     donut shop
establishment  theater                hotel casino
resturaunt     brewery                truck
location       pub                    steakhouse
joint          area                   resort
buffet         steak house            property
hotel          steakhouse             ice_cream shop
bakery         starbucks              japanese restaurant
cafe           neighbourhood          steak house
brewery        company                coffeehouse
salon          mexican restaurant     theater
pizzeria       japanese restaurant    irish pub
food truck     vietnamese restaurant  casino
wine bar       downtown        

#### Mexican

In [120]:
query = 'mexican'

In [123]:
tabulate_results_knn(dataset, query, 15)

query: mexican, cluster size: 15
mask_vec        avg_context_vec    avg_concat_mask_vec
--------------  -----------------  ---------------------
chinese         greek              chinese
japanese        italian            french
korean          southern           indian
vietnamese      korean             peruvian
italian         asian              japanese
brazilian       brazilian          asian
asian           chinese            italian
filipino        vietnamese         greek
kosher          caribbean          kosher
hakka           island             filipino
thai            thai               brazilian
tex mex         barbecue           hawaiian
french          polish             lebanese
middle eastern  filipino           korean
colombian       mediterranean      thai


In [124]:
tabulate_results_knn(dataset, query, 20)

query: mexican, cluster size: 20
mask_vec        avg_context_vec    avg_concat_mask_vec
--------------  -----------------  ---------------------
chinese         greek              chinese
japanese        italian            french
korean          southern           indian
vietnamese      korean             peruvian
italian         asian              japanese
brazilian       brazilian          asian
asian           chinese            italian
filipino        vietnamese         greek
kosher          caribbean          kosher
hakka           island             filipino
thai            thai               brazilian
tex mex         barbecue           hawaiian
french          polish             lebanese
middle eastern  filipino           korean
colombian       mediterranean      thai
caribbean       gourmet            polish
hawaiian        hawaiian           vietnamese
indian          american           ethiopian
peruvian        french             persian
western         persian            rus

In [417]:
query = 'mexican restaurant'

In [418]:
tabulate_results_knn(dataset, query, 15)

query: mexican restaurant, cluster size: 15
mask_vec    avg_context_vec        avg_concat_mask_vec
----------  ---------------------  ---------------------
            vietnamese restaurant  vietnamese restaurant
            italian restaurant     chinese restaurant
            donut shop             japanese restaurant
            indian restaurant      filipino restaurant
            steak house            donut shop
            filipino restaurant    boba shop
            chinese restaurant     steak house
            japanese restaurant    sub shop
            steakhouse             italian restaurant
            sub shop               steakhouse
            brazilian steakhouse   chinese buffet
            chinese buffet         indian restaurant
            starbucks              brazilian steakhouse
            boba shop              french bakery
            keg                    irish pub


#### Bistro

In [409]:
query = 'bistro'

In [410]:
tabulate_results_knn(dataset, query, 15)

query: bistro, cluster size: 15
mask_vec     avg_context_vec    avg_concat_mask_vec
-----------  -----------------  ---------------------
pub          tavern             tavern
grille       pub                brasserie
deli         village            caf
hut          diner              pub
village      inn                noodle house
shack        steakhouse         garden
izakaya      steak house        izakaya
smokehouse   izakaya            village
supermarket  bakery             inn
legend       gastropub          brewery
snack bar    urban              paradise
garden       villa              factory
paradise     paris              diner
bay          restaurant         creamery
cottage      brewery            winery


#### Steakhouse

In [411]:
query = 'steakhouse'

In [412]:
tabulate_results_knn(dataset, query, 15)

query: steakhouse, cluster size: 15
mask_vec       avg_context_vec       avg_concat_mask_vec
-------------  --------------------  ---------------------
pizzeria       steak house           steak house
diner          mexican restaurant    japanese restaurant
pizza joint    buffet                chinese restaurant
burger joint   chinese restaurant    brewery
bbq joint      italian restaurant    irish pub
buffet         bistro                mexican restaurant
brewery        diner                 donut shop
wine bar       pub                   coffeehouse
pub            keg                   italian restaurant
mexican joint  japanese restaurant   chinese buffet
taqueria       tavern                french bakery
food truck     indian restaurant     diner
sushi joint    restaurant            buffet
gastropub      brazilian steakhouse  sub shop
bakery         chinese buffet        vietnamese restaurant


In [415]:
query = 'brazilian steakhouse'

In [416]:
tabulate_results_knn(dataset, query, 15)

query: brazilian steakhouse, cluster size: 15
mask_vec    avg_context_vec         avg_concat_mask_vec
----------  ----------------------  ---------------------
            mexican restaurant      mexican restaurant
            steakhouse              japanese restaurant
            steak house             chinese restaurant
            vietnamese restaurant   vietnamese restaurant
            indian restaurant       chinese buffet
            japanese restaurant     filipino restaurant
            filipino restaurant     steak house
            keg                     steakhouse
            italian restaurant      boba shop
            chinese restaurant      donut shop
            buffet                  french bakery
            restaurant              sub shop
            benihana                brewery
            fine_dining experience  indian restaurant
            ethiopian restaurant    italian restaurant


### Food (C)

In [427]:
query = 'food'

In [428]:
tabulate_results_knn(dataset, query, 15)

query: food, cluster size: 15
mask_vec          avg_context_vec       avg_concat_mask_vec
----------------  --------------------  ---------------------
wine list         sushi                 thin crust pizza
product           beer                  oxtail soup
pricing           coffee                pizza
beer list         buffet                table side guacamole
portion size      happy_hour prices     atmosphere
food quality      hookah                patio seating
pizza             energy                sangria
cocktail list     table side guacamole  customer services
atmosphere        diner                 deep_dish pizza
ambiance          indian food           coffee
sushi             entertainment         katsu sauce
salad bar         draft selection       almond croissant
coffee            saki                  music
hookah            cocktails             banchan
wood fired pizza  middle_eastern food   latte art


In [429]:
tabulate_results_knn(dataset, query, 20)

query: food, cluster size: 20
mask_vec          avg_context_vec       avg_concat_mask_vec
----------------  --------------------  ---------------------
wine list         sushi                 thin crust pizza
product           beer                  oxtail soup
pricing           coffee                pizza
beer list         buffet                table side guacamole
portion size      happy_hour prices     atmosphere
food quality      hookah                patio seating
pizza             energy                sangria
cocktail list     table side guacamole  customer services
atmosphere        diner                 deep_dish pizza
ambiance          indian food           coffee
sushi             entertainment         katsu sauce
salad bar         draft selection       almond croissant
coffee            saki                  music
hookah            cocktails             banchan
wood fired pizza  middle_eastern food   latte art
plating           restaurant            d cor
serving size      p

#### buffet

In [431]:
query = 'buffet'

In [432]:
tabulate_results_knn(dataset, query, 15)

query: buffet, cluster size: 15
mask_vec          avg_context_vec     avg_concat_mask_vec
----------------  ------------------  ---------------------
buffett           keg                 bakery
bakery            restaurant          restaurant
food truck        steakhouse          seafood buffet
club              steak house         brewery
resturant         chinese buffet      steakhouse
steakhouse        chinese restaurant  steak house
restaurant        diner               supermarket
deli              sushi               pub
diner             bakery              diner
cafe              pub                 truck
breakfast buffet  seafood buffet      donut shop
pizzeria          subway              bar lounge
juice bar         taco truck          co op
brewery           outback             irish pub
franchise         company             nightclub


#### sushi

In [434]:
query = 'sushi'

In [435]:
tabulate_results_knn(dataset, query, 15)

query: sushi, cluster size: 15
mask_vec        avg_context_vec    avg_concat_mask_vec
--------------  -----------------  ---------------------
poke            shawarma           shawarma
dim sum         barbecue           barbecue
pho             coffee             ramen
bubble tea      tapas              hookah
frozen yogurt   sushi sashimi      sushi sashimi
boba tea        buffet             tapas
soul food       teppanyaki         pizza
shabu shabu     beer               japanese curry
pizza           food               coffee
coffee          sake               deep_dish pizza
fro yo          sea food           dosa
hookah          island             beer
frozen custard  bakery             japanese cheesecake
ayce            polish             spanish tapas
chinese food    boat               desert


#### hookah

In [437]:
query = 'hookah'

In [438]:
tabulate_results_knn(dataset, query, 15)

query: hookah, cluster size: 15
mask_vec        avg_context_vec     avg_concat_mask_vec
--------------  ------------------  ---------------------
bubble tea      beer                sushi
coffee          coffee              coffee
sushi           saki                ramen
boba tea        entertainment       beer
beer            food                pizza
dim sum         karaoke             deep_dish pizza
poke            sushi               german beer
booze           buffet              japanese curry
frozen yogurt   pub                 absinthe
fro yo          live entertainment  desert
pho             keg                 sangria
frozen custard  brunch              japanese cheesecake
pizza           dogs                dosa
gaming          energy              poutine
saki            bear                thin crust pizza


#### beer

In [439]:
query = 'beer'

In [440]:
tabulate_results_knn(dataset, query, 15)

query: beer, cluster size: 15
mask_vec      avg_context_vec    avg_concat_mask_vec
------------  -----------------  ---------------------
draft beer    wine               wine
coffee        coffee             coffee
craft beer    sake               tea
wine          hookah             hookah
bubble tea    sushi              gelato
hookah        saki               pizza
saki          cocktails          saki
cold brew     food               sangria
pizza         buffet             cocktail
sake          specialty drinks   ramen
beverage      dogs               juice
drip coffee   pub                mimosa
bottled beer  top shelf          pepsi
booze         java               poutine
sushi         bakery             ice tea


#### price

In [445]:
query = 'happy_hour prices'

In [446]:
tabulate_results_knn(dataset, query, 15)

query: happy_hour prices, cluster size: 15
mask_vec    avg_context_vec       avg_concat_mask_vec
----------  --------------------  -------------------------
            food                  hh prices
            cocktails             cocktails
            draft selection       late_night specials
            specialty drinks      happy_hour drink_specials
            hookah                customer services
            dogs                  low prices
            craft_beer selection  entertainment
            draft_beer selection  room rates
            table side guacamole  kids meals
            sushi                 specialty drinks
            so many options       results
            so many choices       late_night hours
            coffee                dogs
            free popcorn          latte art
            late_night specials   veggie options


#### Very Large K

In [450]:
query = 'food'

In [451]:
tabulate_results_knn(dataset, query, 100, 1)

query: food, cluster size: 100
mask_vec              avg_context_vec         avg_concat_mask_vec
--------------------  ----------------------  -----------------------------
wine list             sushi                   thin crust pizza
product               beer                    oxtail soup
pricing               coffee                  pizza
beer list             buffet                  table side guacamole
portion size          happy_hour prices       atmosphere
food quality          hookah                  patio seating
customer service      energy                  sangria
pizza                 table side guacamole    customer services
cocktail list         pizza                   deep_dish pizza
atmosphere            diner                   coffee
ambiance              indian food             katsu sauce
sushi                 entertainment           almond croissant
salad bar             desert                  music
coffee                draft selection         banchan
hookah    

In [452]:
tabulate_results_knn(dataset, query, 500, 1)

query: food, cluster size: 500
mask_vec                     avg_context_vec                  avg_concat_mask_vec
---------------------------  -------------------------------  -----------------------------
wine list                    sushi                            thin crust pizza
product                      beer                             oxtail soup
pricing                      coffee                           pizza
beer list                    buffet                           table side guacamole
portion size                 happy_hour prices                atmosphere
food quality                 hookah                           patio seating
customer service             energy                           sangria
pizza                        table side guacamole             customer services
cocktail list                pizza                            deep_dish pizza
atmosphere                   diner                            coffee
ambiance                     indian food     

In [None]:
tabulate_results_knn(dataset, query, 1000, 1)

query: food, cluster size: 1000


## Dessert (C) [not derived from Food]

In [419]:
query = 'dessert'

In [420]:
tabulate_results_knn(dataset, query, 15)

query: dessert, cluster size: 15
mask_vec        avg_context_vec    avg_concat_mask_vec
--------------  -----------------  ---------------------
desert          desert             desert
breakfast       cannoli            gelato
pho             baklava            ramen
dim sum         popcorn            sake
bubble tea      mimosa             tea
frozen custard  pizza              poutine
takeaway        coconut cake       pizza
afternoon tea   smoothie           coffee
coffee          milkshake          an appetizer
halo halo       tea                dosa
sake            gelato             soft_serve ice_cream
pizza           cappuccino         tapas
sushi           butter cake        appetizer
takeout         champagne          sicilian pizza
brunch          sake               baklava


In [421]:
tabulate_results_knn(dataset, query, 20)

query: dessert, cluster size: 20
mask_vec        avg_context_vec      avg_concat_mask_vec
--------------  -------------------  ---------------------
desert          desert               desert
breakfast       cannoli              brunch
pho             baklava              gelato
dim sum         popcorn              ramen
bubble tea      mimosa               sake
frozen custard  pizza                tea
takeaway        coconut cake         poutine
afternoon tea   smoothie             pizza
coffee          milkshake            coffee
halo halo       tea                  an appetizer
sake            gelato               dosa
pizza           cappuccino           soft_serve ice_cream
sushi           butter cake          tapas
takeout         champagne            appetizer
brunch          sake                 sicilian pizza
frozen yogurt   banana split         baklava
happy hour      an app               gluten_free pasta
dosa            coffee               deep_dish pizza
shave ice       

### Employee (C)

In [375]:
query = 'employee' #staff/staff member

In [376]:
tabulate_results_knn(dataset, query, 15)

query: employee, cluster size: 15
mask_vec           avg_context_vec    avg_concat_mask_vec
-----------------  -----------------  ---------------------
older lady
guy
girl
lady
older man
woman
gal
staff member
barista
attendant
clerk
gentleman
young man
young lady
assistant manager


#### Manager

In [366]:
query = 'manager'

In [367]:
tabulate_results_knn(dataset, query, 15)

query: manager, cluster size: 15
mask_vec           avg_context_vec    avg_concat_mask_vec
-----------------  -----------------  ---------------------
cashier            mgr                mgr
store manager      gm                 bar tender
mgr                bar tender         bartender
general manager    female server      sushi chef
gm                 male server        barista
owner              an employee        gm
hostess            female owner       busboy
bartender          security guard     pharmacist
delivery driver    front desk         sommelier
barista            cashier lady       chef
shift manager      bartender          female owner
clerk              busboy             security guard
assistant manager  barista            district manager
bus boy            delivery man       delivery man
receptionist       sushi chef         cashier lady


In [368]:
tabulate_results_knn(dataset, query, 20)

query: manager, cluster size: 20
mask_vec           avg_context_vec    avg_concat_mask_vec
-----------------  -----------------  ---------------------
cashier            mgr                mgr
store manager      gm                 bar tender
mgr                bar tender         bartender
general manager    female server      sushi chef
gm                 male server        barista
owner              an employee        gm
hostess            female owner       busboy
bartender          security guard     pharmacist
delivery driver    front desk         sommelier
barista            cashier lady       chef
shift manager      bartender          female owner
clerk              busboy             security guard
assistant manager  barista            district manager
bus boy            delivery man       delivery man
receptionist       sushi chef         cashier lady
greeter            front counter      concierge
pharmacist         young guy          male server
driver             security   

#### An Employee

In [377]:
query = 'an employee'

In [378]:
tabulate_results_knn(dataset, query, 15)

query: an employee, cluster size: 15
mask_vec    avg_context_vec       avg_concat_mask_vec
----------  --------------------  ---------------------
            another employee      another employee
            manager               another patron
            another patron        another staff_member
            security guard        an older_woman
            security              an older_gentleman
            female server         his boss
            mgr                   multiple employees
            an attitude           an older_lady
            an older_woman        her boss
            poor girl             an older_man
            bar tender            my entire_family
            an older_gentleman    security
            young guy             my aunt
            another staff_member  my grandfather
            front desk            jesus


In [379]:
tabulate_results_knn(dataset, query, 20)

query: an employee, cluster size: 20
mask_vec    avg_context_vec       avg_concat_mask_vec
----------  --------------------  ---------------------
            another employee      another employee
            manager               another patron
            another patron        another staff_member
            security guard        an older_woman
            security              an older_gentleman
            female server         his boss
            mgr                   multiple employees
            an attitude           an older_lady
            an older_woman        her boss
            poor girl             an older_man
            bar tender            my entire_family
            an older_gentleman    security
            young guy             my aunt
            another staff_member  more than one person
            front desk            my grandfather
            his attention         jesus
            front counter         kerry
            speaker               my cousi

### Atmosphere (C)

In [96]:
query = 'atmosphere'

In [99]:
tabulate_results_knn(dataset, query, 15)

query: atmosphere, cluster size: 15
mask_vec             avg_context_vec       avg_concat_mask_vec
-------------------  --------------------  ---------------------
ambiance             laid back atmosphere  d cor
interior design      ambient               interior decor
scenery              interior decor        craft_beer selection
vibe                 cool decor            patio seating
decore               d cor                 food
wine list            fun vibe              interior decoration
deco                 fun atmosphere        draft selection
food                 patio seating         draft_beer selection
music choice         design                beverage selection
beer list            craft_beer selection  music
cocktail list        romantic atmosphere   design
seating arrangement  retro decor           architecture
concept              chill environment     outdoor_seating area
outdoor space        fun environment       store layout
interior             modern decor    

In [100]:
tabulate_results_knn(dataset, query, 20)

query: atmosphere, cluster size: 20
mask_vec             avg_context_vec        avg_concat_mask_vec
-------------------  ---------------------  ---------------------
ambiance             laid back atmosphere   d cor
interior design      ambient                interior decor
scenery              interior decor         craft_beer selection
vibe                 cool decor             patio seating
decore               d cor                  food
wine list            fun vibe               interior decoration
deco                 fun atmosphere         draft selection
food                 patio seating          draft_beer selection
music choice         design                 beverage selection
beer list            craft_beer selection   music
cocktail list        romantic atmosphere    design
seating arrangement  retro decor            architecture
concept              chill environment      outdoor_seating area
outdoor space        fun environment        store layout
interior             

#### Fun Atmosphere

In [380]:
query = 'fun atmosphere'

In [381]:
tabulate_results_knn(dataset, query, 15)

query: fun atmosphere, cluster size: 15
mask_vec    avg_context_vec             avg_concat_mask_vec
----------  --------------------------  ---------------------
            fun environment             fun environment
            cool atmosphere             charming atmosphere
            laid back atmosphere        fun vibe
            chill environment           chill environment
            excellent customer_service  cool atmosphere
            impeccable service          welcoming atmosphere
            fast and friendly service   beautiful space
            fast friendly service       clean environment
            clean environment           pleasant environment
            casual environment          laid back atmosphere
            ice_cold beer               casual environment
            decent prices               romantic atmosphere
            atmosphere                  comfortable ambience
            charming atmosphere         fun concept
            craft_beer selecti

In [382]:
tabulate_results_knn(dataset, query, 20)

query: fun atmosphere, cluster size: 20
mask_vec    avg_context_vec             avg_concat_mask_vec
----------  --------------------------  ------------------------
            fun environment             fun environment
            cool atmosphere             charming atmosphere
            laid back atmosphere        fun vibe
            chill environment           chill environment
            excellent customer_service  cool atmosphere
            impeccable service          welcoming atmosphere
            fast and friendly service   beautiful space
            fast friendly service       clean environment
            clean environment           pleasant environment
            casual environment          laid back atmosphere
            ice_cold beer               casual environment
            decent prices               romantic atmosphere
            atmosphere                  comfortable ambience
            charming atmosphere         fun concept
            craft_beer sele

#### Poor Atmosphere

In [391]:
query = 'poor vibe' # bad atmosphere/poor atmosphere/poor vibe

In [390]:
tabulate_results_knn(dataset, query, 15)

query: poor vibe, cluster size: 15
mask_vec    avg_context_vec    avg_concat_mask_vec
----------  -----------------  ---------------------


### Service (C)

In [392]:
query = 'customer service' #service, experience

In [117]:
tabulate_results_knn(dataset, query, 15)

query: customer service, cluster size: 15
mask_vec             avg_context_vec    avg_concat_mask_vec
-------------------  -----------------  ---------------------
costumer service
hospitality
pricing
food quality
customer experience
happy hour pricing
value
atmosphere
ambiance
music choice
quality control
hygiene
vibes
organization
teamwork


In [118]:
tabulate_results_knn(dataset, query, 20)

query: customer service, cluster size: 20
mask_vec             avg_context_vec    avg_concat_mask_vec
-------------------  -----------------  ---------------------
costumer service
hospitality
pricing
food quality
customer experience
happy hour pricing
eye candy
value
atmosphere
ambiance
music choice
wine list
quality control
hygiene
vibes
organization
teamwork
service recovery
energy
product


#### Great Service

In [422]:
query = 'impeccable service' #great/good/poor

In [423]:
tabulate_results_knn(dataset, query, 15)

query: impeccable service, cluster size: 15
mask_vec    avg_context_vec             avg_concat_mask_vec
----------  --------------------------  ----------------------------
            excellent customer_service  excellent customer_service
            fun atmosphere              extremely_friendly staff
            personalized service        cool decor
            authentic_italian food      big portions
            ice_cold beer               ice_cold beer
            quick and friendly service  exceptional customer_service
            fun environment             free wifi
            huge portions               fast friendly service
            fast and friendly service   large portion_sizes
            food                        stiff drinks
            stellar                     low prices
            first class                 cute decor
            atmosphere                  extremely_slow service
            cocktails                   personalized service
            low p

In [424]:
tabulate_results_knn(dataset, query, 20)

query: impeccable service, cluster size: 20
mask_vec    avg_context_vec             avg_concat_mask_vec
----------  --------------------------  ----------------------------
            excellent customer_service  excellent customer_service
            fun atmosphere              extremely_friendly staff
            personalized service        cool decor
            authentic_italian food      big portions
            ice_cold beer               ice_cold beer
            quick and friendly service  exceptional customer_service
            fun environment             free wifi
            huge portions               fast friendly service
            fast and friendly service   large portion_sizes
            food                        stiff drinks
            stellar                     low prices
            first class                 cute decor
            atmosphere                  extremely_slow service
            cocktails                   personalized service
            low p

In [425]:
query = 'excellent service' #great/good/poor

In [426]:
tabulate_results_knn(dataset, query, 15)

query: excellent service, cluster size: 15
mask_vec    avg_context_vec    avg_concat_mask_vec
----------  -----------------  ---------------------
